diff --git a/src/PAModelpy/utils/pam_generation.py b/src/PAModelpy/utils/pam_generation.py index 836f006..731f835 100644 --- a/src/PAModelpy/utils/pam_generation.py +++ b/src/PAModelpy/utils/pam_generation.py @@ -90,19 +90,19 @@ def parse_gpr_information(gpr_info:str, if genes is None: return gpr_list #convert the genes to the associated proteins - enzyme_relations = [] - if '_'in enzyme_id: - enzyme_relations = [enzyme_id.split('_')] - for sublist in gpr_list: - enz_sublist = [] - for item in sublist: - if item in gene2protein.keys(): - if '_' not in gene2protein[item]: - enz_sublist.append(gene2protein[item]) - enzyme_relations += [enz_sublist] - elif gene2protein[item].split('_') not in enzyme_relations: - enzyme_relations += [gene2protein[item].split('_')] - enzyme_relations = _filter_sublists(enzyme_relations, enzyme_id.split('_'), how='all') + # enzyme_relations = [] + # if '_'in enzyme_id: + enzyme_relations = [enzyme_id.split('_')] + # for sublist in gpr_list: + # enz_sublist = [] + # for item in sublist: + # if item in gene2protein.keys(): + # if '_' not in gene2protein[item]: + # enz_sublist.append(gene2protein[item]) + # enzyme_relations += enz_sublist + # elif gene2protein[item].split('_') not in enzyme_relations: + # enzyme_relations += gene2protein[item].split('_') + # enzyme_relations = _filter_sublists(enzyme_relations, enzyme_id.split('_'), how='all') return sorted(gpr_list), sorted(enzyme_relations) def get_protein_gene_mapping(enzyme_db: pd.DataFrame, model) -> tuple[dict, dict]: @@ -281,15 +281,14 @@ def _order_enzyme_complex_id(enz_id:str, return "_".join(sorted(proteins)) -def parse_reaction2protein(enzyme_db: pd.DataFrame, model: cobra.Model) -> dict: +def parse_reaction2protein(enzyme_db: pd.DataFrame, + model: cobra.Model, + other_enzyme_id_pattern: str = r'E[0-9][0-9]*') -> dict: rxn_info2protein = {} protein2gpr = defaultdict(list) #remove copy number substrings from the reaction to make it matchable to enzyme information filtered_model_reactions = [_extract_reaction_id(r.id) for r in model.reactions] - #make sure all enzyme complexes have an id ordered in a structured way - enzyme_db['enzyme_id'] = enzyme_db['enzyme_id'].map(_order_enzyme_complex_id, na_action='ignore') - # replace NaN values with unique identifiers enzyme_db.loc[enzyme_db['enzyme_id'].isnull(), 'enzyme_id'] = [f'E{i}' for i in range(enzyme_db['enzyme_id'].isnull().sum())] @@ -297,6 +296,11 @@ def parse_reaction2protein(enzyme_db: pd.DataFrame, model: cobra.Model) -> dict: enzyme_db.loc[enzyme_db['gene'].isnull(), 'gene'] = [[f'gene_{i}'] for i in range(enzyme_db['gene'].isnull().sum())] + + #make sure all enzyme complexes have an id ordered in a structured way + enzyme_db['enzyme_id'] = enzyme_db['enzyme_id'].apply(_order_enzyme_complex_id, + other_enzyme_id_pattern = other_enzyme_id_pattern) + protein2gene, gene2protein = _get_genes_for_proteins(enzyme_db, model) # parse the information for all gene-protein-reaction relations in the dataframe diff --git a/tests/data/proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx b/tests/data/proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx new file mode 100644 index 0000000..e0c1800 Binary files /dev/null and b/tests/data/proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx differ diff --git a/tests/data/proteinAllocationModel_iML1515_EnzymaticData_core.xlsx b/tests/data/proteinAllocationModel_iML1515_EnzymaticData_core.xlsx new file mode 100644 index 0000000..927ab80 Binary files /dev/null and b/tests/data/proteinAllocationModel_iML1515_EnzymaticData_core.xlsx differ diff --git a/tests/unit_tests/test_pamodel/test_pam_generation.py b/tests/unit_tests/test_pamodel/test_pam_setup.py similarity index 88% rename from tests/unit_tests/test_pamodel/test_pam_generation.py rename to tests/unit_tests/test_pamodel/test_pam_setup.py index 815dc84..58a414c 100644 --- a/tests/unit_tests/test_pamodel/test_pam_generation.py +++ b/tests/unit_tests/test_pamodel/test_pam_setup.py @@ -1,5 +1,6 @@ import pytest import pickle +import os from src.PAModelpy.configuration import Config from src.PAModelpy.PAModel import PAModel @@ -8,6 +9,7 @@ from Scripts.pam_generation_uniprot_id import (set_up_ecolicore_pam, set_up_ecoli_pam, set_up_toy_pam, parse_gpr_information_for_protein2genes, parse_gpr_information_for_rxn2protein) +from src.PAModelpy.utils import set_up_pam def test_gpr_information_is_parsed_correctly(): @@ -48,11 +50,13 @@ def test_gpr_information_for_protein_is_correctly_filtered(): def test_if_enzyme_complex_in_toy_pam_is_parsed_correctly(): sut = set_up_toy_pam_with_enzyme_complex(sensitivity=False) - assert all([enz in sut.enzymes for enz in ['E1', 'E2', 'E10', 'E2_E10']]) - assert all([const not in sut.constraints.keys() for const in ['EC_E10_f', 'EC_E2_f']]) - constraint = sut.constraints['EC_E2_E10_f'].get_linear_coefficients([sut.reactions.CE_R2_E2_E10.forward_variable]) - assert constraint[sut.reactions.CE_R2_E2_E10.forward_variable] > 0 + print(sut.enzymes, sut.enzyme_variables) + assert all([enz in sut.enzymes for enz in ['E1', 'E10_E2']]) + assert all([const not in sut.constraints.keys() for const in ['EC_E10_f', 'EC_E2_f']]) + constraint = sut.constraints['EC_E10_E2_f'].get_linear_coefficients([sut.reactions.CE_R2_E10_E2.forward_variable]) + assert constraint[sut.reactions.CE_R2_E10_E2.forward_variable] > 0 +# def test_if_isozymes_in_toy_pam_are_parsed_correctly(): sut = set_up_toy_pam_with_isozymes(sensitivity=False) @@ -102,26 +106,16 @@ def test_if_toy_pam_with_enzyme_comples_has_same_growth_rate_as_without(): assert sut.objective.value == pytest.approx(toy_pam.objective.value, abs = 1e-6) -def test_set_up_ecolicore_pam_works(): - sut = set_up_ecolicore_pam() - sut.optimize() - assert True -def test_if_ecolicore_pam_optimizes(): - sut = set_up_ecolicore_pam() - sut.optimize() - assert sut.objective.value > 0 - -def test_set_up_ecoli_pam_works(): - sut = set_up_ecoli_pam() - assert True +def test_if_pamodel_can_be_pickled_and_unpickled(): + # Arrange + pam_data_file = os.path.join('tests', 'data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx') + iml1515 = os.path.join('Models', 'iML1515.xml') + sut = set_up_pam(pam_data_file, + iml1515, + sensitivity=False, + adjust_reaction_ids=False) -def test_if_ecoli_pam_optimizes(): - sut = set_up_ecoli_pam() sut.optimize() - assert sut.objective.value > 0 - -def test_if_pamodel_can_be_pickled_and_unpickled(): - sut = set_up_ecoli_pam(sensitivity=False) sut.change_reaction_bounds('EX_glc__D_e', -10, 0) sut.optimize() @@ -151,9 +145,15 @@ def set_up_toy_pam_with_enzyme_complex(sensitivity =True): Etot = 0.6*1e-3 model = build_toy_gem() active_enzyme = build_active_enzyme_sector(config) + #add an enzyme associated to enzyme complex to the toy model active_enzyme.rxn2protein['R2']['E2']['protein_reaction_association'] = [['E2', 'E10']] - active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy() + # active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy() + active_enzyme.rxn2protein['R2']['E2_E10'] = active_enzyme.rxn2protein['R2']['E2'].copy() + del active_enzyme.rxn2protein['R2']['E2'] + + active_enzyme.protein2gene['E2_E10'] = [['gene2', 'gene10']] + #build the toy model unused_enzyme = build_unused_protein_sector(config) @@ -180,6 +180,7 @@ def set_up_toy_pam_with_isozymes(sensitivity =True): active_enzyme.rxn2protein['R2']['E2']['protein_reaction_association'] = [['E2'], ['E10']] active_enzyme.rxn2protein['R2']['E10']= active_enzyme.rxn2protein['R2']['E2'].copy() + #build the toy model unused_enzyme = build_unused_protein_sector(config) translation_enzyme = build_translational_protein_sector(config) @@ -209,6 +210,9 @@ def set_up_toy_pam_with_isozymes_and_enzymecomplex(sensitivity =True): active_enzyme.rxn2protein['R3']['E3']['protein_reaction_association'] = [['E3','E10', 'E11']] active_enzyme.rxn2protein['R3']['E10']= active_enzyme.rxn2protein['R3']['E3'].copy() active_enzyme.rxn2protein['R3']['E11']= active_enzyme.rxn2protein['R3']['E3'].copy() + active_enzyme.rxn2protein['R3']['E3_E10_E11']= active_enzyme.rxn2protein['R3']['E3'].copy() + + active_enzyme.protein2gene['E3_E10_E11'] = [['gene3', 'gene10', 'gene11']] #build the toy model diff --git a/tests/unit_tests/test_pamodel/test_pamodel.py b/tests/unit_tests/test_pamodel/test_pamodel.py index 8f9cbe6..79910d1 100644 --- a/tests/unit_tests/test_pamodel/test_pamodel.py +++ b/tests/unit_tests/test_pamodel/test_pamodel.py @@ -1,9 +1,10 @@ import pytest from cobra.io import load_json_model +import os from src.PAModelpy import PAModel,Config,ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector, CatalyticEvent -from Scripts.pam_generation_uniprot_id import set_up_ecoli_pam, set_up_ecolicore_pam -from tests.unit_tests.test_pamodel.test_pam_generation import set_up_toy_pam_with_isozymes_and_enzymecomplex +from tests.unit_tests.test_pamodel.test_pam_setup import set_up_toy_pam_with_isozymes_and_enzymecomplex +from src.PAModelpy.utils import set_up_pam def test_if_pamodel_change_kcat_function_works(): #arrange @@ -43,7 +44,7 @@ def test_if_pamodel_change_kcat_function_works_with_catalytic_reactions(): #arrange sut = set_up_ecoli_pam(sensitivity=False) input_kcat = 10 - enzyme_id = 'P0ABJ1' + enzyme_id = 'P0ABI8_P0ABJ1_P0ABJ3_P0ABJ6' rxn_id = "CYTBO3_4pp" ce_rxn= sut.reactions.query(f'CE_{rxn_id}_{enzyme_id}')[0] enzyme_complex_id = "_".join(ce_rxn.id.split("_")[3:]) @@ -173,6 +174,7 @@ def test_if_pamodel_sensitivity_can_be_changed_false_to_true(): def test_if_pamodel_sensitivity_can_be_changed_true_to_false_ecolicore(): # arrange ecolicore_pam = set_up_ecolicore_pam(sensitivity=True) + glc_lb = -ecolicore_pam.constraints['EX_glc__D_e_lb'].ub glc_ub = ecolicore_pam.constraints['EX_glc__D_e_ub'].ub @@ -269,11 +271,13 @@ def test_if_pamodel_gets_catalyzing_enzymes_for_enzyme_object(): # Arrange sut = set_up_toy_pam_with_isozymes_and_enzymecomplex(sensitivity = False) enzyme_ut = 'E10' - associated_enzymes = ['E10', 'E3_E10_E11'] + associated_enzymes = ['E10', 'E10_E11_E3'] # Assert catalyzing_enzymes = sut._get_catalyzing_enzymes_for_enzyme(enzyme_ut) + print(catalyzing_enzymes) + # Assert assert all(enz in catalyzing_enzymes for enz in associated_enzymes) @@ -334,4 +338,24 @@ def assert_bounds(model_ori, model_copy): def assert_total_protein_content(model_ori, model_copy): assert model_ori.p_tot == model_copy.p_tot tot_prot_cons_id = model_ori.TOTAL_PROTEIN_CONSTRAINT_ID - assert model_ori.constraints[tot_prot_cons_id].ub == model_copy.constraints[tot_prot_cons_id].ub \ No newline at end of file + assert model_ori.constraints[tot_prot_cons_id].ub == model_copy.constraints[tot_prot_cons_id].ub + +def set_up_ecoli_pam(sensitivity=True): + pam_data_file = os.path.join('tests', 'data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx') + iml1515 = os.path.join('Models', 'iML1515.xml') + return set_up_pam(pam_data_file, + iml1515, + sensitivity=sensitivity, + adjust_reaction_ids=False) + +def set_up_ecolicore_pam(sensitivity=True): + pam_data_file = os.path.join('tests', 'data', + 'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx') + ecolicore_gem = load_json_model(os.path.join('Models', 'e_coli_core.json')) + + # Apply + return set_up_pam(pam_data_file, + ecolicore_gem, + total_protein=0.1699, + sensitivity=sensitivity, + adjust_reaction_ids=True) diff --git a/tests/unit_tests/test_utils/test_pam_generation.py b/tests/unit_tests/test_utils/test_pam_generation.py index 55b4d8c..fd572f9 100644 --- a/tests/unit_tests/test_utils/test_pam_generation.py +++ b/tests/unit_tests/test_utils/test_pam_generation.py @@ -6,7 +6,7 @@ import pytest from Scripts.toy_ec_pam import build_toy_gem -from src.utils.pam_generation import parse_reaction2protein, set_up_pam +from src.PAModelpy.utils.pam_generation import parse_reaction2protein, set_up_pam def test_if_rxn2protein_info_is_correctly_parsed(): # Arrange @@ -20,6 +20,8 @@ def test_if_rxn2protein_info_is_correctly_parsed(): 'direction':['f','f', 'f', 'f', 'b'] } ) + print(toy_enzyme_db) + toy_model = build_toy_gem() expected_rxn2protein = { @@ -49,7 +51,9 @@ def test_if_rxn2protein_info_is_correctly_parsed(): expected_protein2gpr = {'E1': [['gene1']], 'E2a': [['gene2a']], 'E2b_E2c': [['gene2b', 'gene2c']], 'E3': [['gene3']]} # Apply - rxn2protein, protein2gpr = parse_reaction2protein(toy_enzyme_db, toy_model) + rxn2protein, protein2gpr = parse_reaction2protein(toy_enzyme_db, + toy_model, + other_enzyme_id_pattern = r'E[0-9][0-9]*[a-z]?') # Assert for output_dict, expected_dict in zip([rxn2protein, protein2gpr], [expected_rxn2protein, expected_protein2gpr]): @@ -57,7 +61,7 @@ def test_if_rxn2protein_info_is_correctly_parsed(): def test_if_set_up_pam_can_build_ecolicore_pam(): #Arrange - pam_data_file = os.path.join('Data', 'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx') + pam_data_file = os.path.join('tests','data', 'proteinAllocationModel_iML1515_EnzymaticData_core.xlsx') ecolicore_gem = cobra.io.load_json_model(os.path.join('Models', 'e_coli_core.json')) #Apply @@ -74,14 +78,14 @@ def test_if_set_up_pam_can_build_ecolicore_pam(): def test_if_set_up_pam_can_build_iML1515(): #Arrange - pam_data_file = os.path.join('Results', '1_preprocessing', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx') + pam_data_file = os.path.join('tests','data', 'proteinAllocationModel_iML1515_EnzymaticData_241209.xlsx') iml1515 = os.path.join('Models', 'iML1515.xml') #Apply pam = set_up_pam(pam_data_file, iml1515, sensitivity=False, - adjust_reaction_ids=True) + adjust_reaction_ids=False) pam.optimize()