biogeek biogeek - 2 months ago 8
Python Question

How to correct this for loop over tuple function in Python?

Here is a program where each line was split into pairs using tuples, such that every alphabet had a corresponding numeric as A:6, B:6, C:35 ..etc If a value for less than 10, then the alphabets were converted to N. The following is the code. I find that my code does not loop over the tuple function in the last part of the code. It takes in only a single sequence and does not loop over the other

tutorial = open('c:/test/z.txt','r')
## Input file looks like
>qrst
ABCDE-- 6 6 35 25 10
>qqqq
ABBDE-- 7 7 28 29 2

org = []
seqlist = []
seqstring = ""
for line in tutorial:
if line.startswith(">"):
if seqstring!= "":
seqlist.append(seqstring)
seqstring = ""
org.append(line.rstrip("\n"))
else:
seqstring += line.rstrip("\n")
seqlist.append(seqstring)
l = seqlist
#print l

j = []
ll = len(seqlist)
for i in range(0,ll):
sq = l[i]
sequence = sq.split(" ")[0] ## Stores only the alphabets
qualities = sq.split(" ")[1:] ## Stores only the numeric
qualities = filter(None, qualities)
for sub in sequence:
if sub == "-": ## If sequences have "-", it inserts a "0" in that position in corresponding number
idx = list(sequence).index(sub)
qualities.insert(idx,"0")

# Error in the steps below
pairs = []
for sub in l:
print sub
new_list = []
for x in range(len(sequence)):
print x
new_tuple = (sequence[x], qualities[x]) #Printing this step, notice that only one of the sequences is printed twice. ERROR
print new_tuple
if int(qualities[x]) < 10:
new_tuple = ("Z", qualities[x])
new_list.append(new_tuple)
pairs.append(new_list)
print pairs
# When I print pairs it looks like this: [[('Z', '7'), ('Z', '7'), ('B', '28'), ('D', '29'), ('Z', '2'), ('Z', '0'), ('Z', '0')], [('Z', '7'), ('Z', '7'), ('B', '28'), ('D', '29'), ('Z', '2'), ('Z', '0'), ('Z', '0')]]
# Sequence#2 is printed twice over. The first one is not taken in

Answer
all_inputs = []  # <---- add this
for i in range(0,ll):
    sq = l[i]
    sequence = sq.split(" ")[0] ## Stores only the alphabets
    qualities = sq.split(" ")[1:] ## Stores only the numeric
    qualities = filter(None, qualities)
    for sub in sequence:
        if sub == "-":
            idx = list(sequence).index(sub)
            qualities.insert(idx,"0")
    # also add this ***********************
    all_inputs.append((sequence, qualities))

pairs = []
# change this *******************************
for sequence, qualities in all_inputs:
    print sub 
    new_list = []
    for x in range(len(sequence)):
        print x
        new_tuple = (sequence[x], qualities[x]) 
        print new_tuple
        if int(qualities[x]) < 10: 
            new_tuple = ("Z",   qualities[x]) 
        new_list.append(new_tuple)
    pairs.append(new_list)
print pairs

gives:

[[('Z', '6'), ('Z', '6'), ('C', '35'), ('D', '25'), ('E', '10'), ('Z', '0'), ('Z', '0')], [('Z', '7'), ('Z', '7'), ('B
', '28'), ('D', '29'), ('Z', '2'), ('Z', '0'), ('Z', '0')]]