 Lakshmi KrishnaKumaar -4 years ago 91
Python Question

# Using chaos game representation for gene sequences [Python program]

I need to represent many gene sequences using chaos game representation I got this python code from Boštjan Cigan's blog (https://bostjan-cigan.com/chaos-game-representation-of-gene-structure-in-python/)

# https://bostjan-cigan.com

``````    import collections
from collections import OrderedDict
from matplotlib import pyplot as plt
from matplotlib import cm

import pylab
import math

f = open("ensemblSeq.fa")
s1 = f.read()
data = "".join(s1.split("\n")[1:])

def count_kmers(sequence, k):
d = collections.defaultdict(int)
for i in xrange(len(data)-(k-1)):
d[sequence[i:i+k]] +=1
for key in d.keys():
if "N" in key:
del d[key]
return d

def probabilities(kmer_count, k):
probabilities = collections.defaultdict(float)
N = len(data)
for key, value in kmer_count.items():
probabilities[key] = float(value) / (N - k + 1)
return probabilities

def chaos_game_representation(probabilities, k):
array_size = int(math.sqrt(4**k))
chaos = []
for i in range(array_size):
chaos.append(*array_size)

maxx = array_size
maxy = array_size
posx = 1
posy = 1
for key, value in probabilities.items():
for char in key:
if char == "T":
posx += maxx / 2
elif char == "C":
posy += maxy / 2
elif char == "G":
posx += maxx / 2
posy += maxy / 2
maxx = maxx / 2
maxy /= 2
chaos[posy-1][posx-1] = value
maxx = array_size
maxy = array_size
posx = 1
posy = 1

return chaos

f3 = count_kmers(data, 3)
f4 = count_kmers(data, 4)

f3_prob = probabilities(f3, 3)
f4_prob = probabilities(f4, 4)

chaos_k3 = chaos_game_representation(f3_prob, 3)
pylab.title('Chaos game representation for 3-mers')
pylab.imshow(chaos_k3, interpolation='nearest', cmap=cm.gray_r)
pylab.show()

chaos_k4 = chaos_game_representation(f4_prob, 4)
pylab.title('Chaos game representation for 4-mers')
pylab.imshow(chaos_k4, interpolation='nearest', cmap=cm.gray_r)
pylab.show()
``````

This code works fine but I have many sequence files I need to iterate through each fasta file in the folder and get individual plots stored in a folder with the name of the image file corresponding to the name of the fasta file how can I modify the code according to my need

I am new to python as well as StackOverflow if any mistake is there kindly ignore

Thanks in advance Franz
Answer Source

So, if you want to apply your code on every file in your directory a very simple way to do this is calling all files inside of a `for`-loop. I suggest the following:

``````import collections
import os
from collections import OrderedDict
from matplotlib import pyplot as plt
from matplotlib import cm

import pylab
import math

def count_kmers(sequence, k):
d = collections.defaultdict(int)
for i in xrange(len(data)-(k-1)):
d[sequence[i:i+k]] +=1
for key in d.keys():
if "N" in key:
del d[key]
return d

def probabilities(kmer_count, k):
probabilities = collections.defaultdict(float)
N = len(data)
for key, value in kmer_count.items():
probabilities[key] = float(value) / (N - k + 1)
return probabilities

def chaos_game_representation(probabilities, k):
array_size = int(math.sqrt(4**k))
chaos = []
for i in range(array_size):
chaos.append(*array_size)

maxx = array_size
maxy = array_size
posx = 1
posy = 1
for key, value in probabilities.items():
for char in key:
if char == "T":
posx += maxx / 2
elif char == "C":
posy += maxy / 2
elif char == "G":
posx += maxx / 2
posy += maxy / 2
maxx = maxx / 2
maxy /= 2
chaos[posy-1][posx-1] = value
maxx = array_size
maxy = array_size
posx = 1
posy = 1
return chaos

if __name__ == "__main__":
PATH = os.getcwd()
filelist = sorted([os.path.join(PATH, f) for f in os.listdir(PATH) if f.endswith('.fa')])
for file in filelist:
f = open(file)
s1 = f.read()
data = "".join(s1.split("\n")[1:])
f3 = count_kmers(data, 3)
f4 = count_kmers(data, 4)

f3_prob = probabilities(f3, 3)
f4_prob = probabilities(f4, 4)

chaos_k3 = chaos_game_representation(f3_prob, 3)
pylab.title('Chaos game representation for 3-mers')
pylab.imshow(chaos_k3, interpolation='nearest', cmap=cm.gray_r)
pylab.savefig(os.path.splitext(file)+'chaos3.png')
pylab.show()

chaos_k4 = chaos_game_representation(f4_prob, 4)
pylab.title('Chaos game representation for 4-mers')
pylab.imshow(chaos_k4, interpolation='nearest', cmap=cm.gray_r)
pylab.savefig(os.path.splitext(file)+'chaos4.png')
pylab.show()
``````

I just wrapped a loop around and added a `pylab.savefig()` call. Furthermore I used `os` to get the filenames from your directory. It should work now.

Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download
Latest added