-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
98 lines (76 loc) · 2.94 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
This script is designed for a user to analyze a DNA sequence for creating primers for PCR/sequencing.
Resulting output (in terminal):
GC content (%)
Sequence length
Reverse complemntary sequence (5' -> 3')
Note: If there is a non-DNA letter in the sequence, the script will give an error.
Example: $ python main.py
"""
# while loop to continue
repeat = "y"
while repeat == "y":
# asks user to paste a DNA sequence, will set to lower case for next line in script
seq = input("Please paste a DNA sequence (5'->3'):").lower()
# count the number of letters for a letter NOT in a DNA nucleotide
invalid_dna = int(seq.count("q") + seq.count("w") + seq.count("e") + seq.count("r") + seq.count("y") + seq.count("u") + seq.count("i") + seq.count("o") + seq.count("p") + seq.count("s") + seq.count("d") + seq.count("f") + seq.count("h") + seq.count("j") + seq.count("k") + seq.count("l") + seq.count("z") + seq.count("x") + seq.count("v") + seq.count("b") + seq.count("n") + seq.count("m"))
# if there are non-DNA letters, print error message
if invalid_dna > 0:
print("\n This is not a valid DNA sequence! Please try again. \n")
# if valid, then continue to analyze the DNA
elif invalid_dna == 0:
# capitalize dna in the sequence
cap_sequence = seq.upper()
# create variable to count the length of DNA sequence
countseq = len(cap_sequence)
# set values to calculate number of G's, A's, C's, T's
G=0;
C=0;
A=0;
T=0;
# create for loop to calculate numbers of G's, A's, C's, T's in sequence
for line in cap_sequence:
for char in line:
if char == "G":
G+=1
if char == "A":
A+=1
if char == "C":
C+=1
if char == "T":
T+=1
# solution to calculate GC content
gc = round((G+C+0. ) / (A+T+C+G+0. ) * 100), 2)
# this is to create the complementary reverse sequence
dna = cap_sequence
A = dna.replace("A","X")
B = A.replace("T","Y")
C = B.replace("X","T")
D = C.replace("Y","A")
E = D.replace("G", "Z")
F = E.replace("C","W")
G = F.replace("Z","C")
finaldna = G.replace("W","G")
# print results
print("\n" + "Here are results --- \n"
+" \n"
+"Length: " + str(countseq) + " " + "bp" "\n"
+"The GC content is: " + str(gc) +"%" + "\n"
+" \n"
+"The reverse complement is: "
+" \n"
+"\n 5' " + dna + " 3' " + "\n 5' " + finaldna + " 3'"
+"\n")
if gc < 50:
print("NOTE: Your GC content is under 50%! Try another sequence.")
else:
print("Your GC content is over 50%!")
if countseq > 22:
print("NOTE: Your sequence is over 22 bp. It might be too long." + "\n")
else:
if countseq < 18:
print("NOTE: Your sequence is under 18 bp. It might be too short." + "\n")
elif countseq in range(18, 22):
print("Your sequence is within the recommended 18-22 bp range." + "\n")
# asks user if they want to analyze another DNA sequence
repeat = input("Do you want to analyze another DNA sequence (y/n)?")