-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcore.py
117 lines (103 loc) · 5.08 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"Provide functionalities for the command line tool"
from functools import reduce
from algs.utils import convert_folding_to_DBN, display_histogram
from algs.zuker_backtrack import *
from algs.zuker_distance import DistanceSolver
from algs.nussinov import *
PLOT_FILENAME = "plot"
def run_Zuker_functions(rna: str, folding: list = None, internal_loop_size: int = None,\
display: bool = False, plot: bool = False, plot_file: str = PLOT_FILENAME):
"""Compute the distance profile for a folding with respect to Zuker-Optimal
solutions/foldings.
Args:
rna (str): an RNA sequence
folding (list, optional): a folding to compare against. Defaults to None.
internal_loop_size (int, optional): maximum size for an internal loop. Defaults to None.
display (bool, optional): whether to display in command line output
a folding of max distance from the given folding. Defaults to False.
plot (bool, optional): whether to plot the distance vector. Defaults to False.
plot_file (str, optional): the filepath for storing the plot. Defaults to PLOT_FILENAME.
"""
print("-----Running Zuker's algorithm-----")
# run Zuker algorithm to get optimal folds
solver = Solver(seq = rna, internal_loop_size = internal_loop_size)
solver.fill_table()
min_energy = solver.solve()
print("The minimum energy is: ", min_energy)
# choose a folding to compare against
afold = None
folding_given = False
if folding:
# use the given folding
afold = folding
folding_given = True
else:
# select an arbitrary folding
afold = solver.get_one_solution()
# run distance solver to get max distance and vector
distance_solver = DistanceSolver(rna, afold, solver.W, solver.V,
solver.WM, solver.WM2)
distance_solver.fill_distance_table()
distance_solver.fill_vector_table()
max_distance = distance_solver.solve()[0]
distance_vector = distance_solver.solve()[1]
print("Max distance is: ", max_distance)
print("The distance vector is: ", distance_vector)
num_folds_dp = reduce((lambda x, y: x + y), distance_vector)
print("The number of optimal folds is: ", num_folds_dp)
if display:
# display an optimal fold that is the most distant from the given fold
max_distance_fold = distance_solver.get_one_max_dist_solution()
print("One of the farthest foldings is: ")
print(convert_folding_to_DBN(max_distance_fold))
if plot:
# plot the histogram for the distance vector
display_histogram(distance_vector, 'Z', plot_file, folding_given)
print()
def run_Nussinov_functions(rna: str, folding: list = None, display: bool = False,\
plot: bool = False, plot_file: str = PLOT_FILENAME):
"""Compute the distance profile for a folding with respect to Nussinov-Optimal
solutions/foldings.
Args:
rna (str): an RNA sequence
folding (list, optional): a folding to compare against. Defaults to None.
display (bool, optional): whether to display in command line output
a folding of max distance from the given folding. Defaults to False.
plot (bool, optional): whether to plot the distance vector. Defaults to False.
plot_file (str, optional): the filepath for storing the plot. Defaults to PLOT_FILENAME.
"""
print("-----Running Nussinov's algorithm-----")
# run Nussinov algorithm to get optimal folds
opt_val, opt_solutions = nussinov_dp(rna)
print("The maximum number of matched pairs is: ", opt_val)
# choose a folding to compare against
afold = None
folding_given = False
if folding:
# use the given folding
afold = folding
folding_given = True
else:
# select an arbitrary folding
afold = construct_one_opt_solution(rna, opt_solutions)
max_distance, max_distance_solutions = max_distance_dp(afold, opt_solutions)
distance_vector = distance_vec(afold, opt_solutions).v
print("Max distance is: ", max_distance)
print("The distance vector is: ", distance_vector)
num_folds_dp = reduce((lambda x, y: x + y), distance_vector)
print("The number of optimal folds is: ", num_folds_dp)
if display:
# display an optimal fold that is the most distant from the given fold
max_distance_fold = construct_one_opt_solution(rna, max_distance_solutions)
print("One of the farthest foldings is: ")
print(convert_folding_to_DBN(max_distance_fold))
if plot:
# plot the histogram for the distance vector
display_histogram(distance_vector, 'N', plot_file, folding_given)
print()
if __name__ == '__main__':
# rna = "CUUCCCAGGUAACAAACCAACCAACUUUCGAUCUCUUGUAGAUCUGUUCUCUAAACGCUUCCCAGGUAACAAACCAACCAACUUUCGAUCUCUUGUAGAUCUGUUCUCUAAACG"
# run_Nussinov_functions(rna, display=True, plot=True, plot_file="plot_Nussinov")
# run_Zuker_functions(rna, display=True, plot=True, plot_file="plot_Zuker", internal_loop_size=None)
# run_Zuker_functions(rna, display=True, plot=True, plot_file="plot_Zuker_30", internal_loop_size=30)
pass