Skip to content

Commit

Permalink
updated pam generation to include more complex gprs
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Jan 17, 2025
1 parent c48e82e commit 99bb3aa
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
13 changes: 8 additions & 5 deletions src/PAModelpy/EnzymeSectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,14 @@ def add(self, model):
if enzyme_id in model.enzyme_variables and not self._enzyme_is_enzyme_complex(protein_reaction, enzyme_id):
enzyme = model.enzymes.get_by_id(enzyme_id)
self._add_reaction_to_enzyme(model, enzyme, rxn_id, kcat)
self.rxn2protein[rxn_id] = {**self.rxn2protein[rxn_id],
**{enzyme_id: {
**kcat,
'genes': enzyme.genes,
'protein_reaction_association': protein_reaction}}}
if rxn_id == 'ALDD3y_copy1':print(rxn_id, self.rxn2protein[rxn_id])
self.rxn2protein[rxn_id].update({
enzyme_id: {
**kcat,
'genes': enzyme.genes,
'protein_reaction_association': protein_reaction
}
})

else:
if self.protein2gene != {}:
Expand Down
23 changes: 18 additions & 5 deletions src/PAModelpy/utils/pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TypedDict, Literal, Union, Tuple, Iterable
import re
import os
import ast

from collections import defaultdict
from dataclasses import dataclass, field
Expand Down Expand Up @@ -85,14 +86,16 @@ def parse_gpr_information(gpr_info:str,

# #only get the genes associated with this enzyme
gpr_list = _parse_gpr(gpr_info)
gpr_list = _filter_sublists(gpr_list, genes)

if genes is None: return gpr_list

gpr_list = _filter_sublists(gpr_list, genes)

#convert the genes to the associated proteins
# enzyme_relations = []
# if '_'in enzyme_id:
enzyme_relations = [enzyme_id.split('_')]
if any([len(info)>1 for info in gpr_list]):
enzyme_relations = [enzyme_id.split('_')]
else:
enzyme_relations = [[enzyme_id]]
# for sublist in gpr_list:
# enz_sublist = []
# for item in sublist:
Expand Down Expand Up @@ -283,7 +286,7 @@ def _order_enzyme_complex_id(enz_id:str,

def parse_reaction2protein(enzyme_db: pd.DataFrame,
model: cobra.Model,
other_enzyme_id_pattern: str = r'E[0-9][0-9]*') -> dict:
other_enzyme_id_pattern: str = r'(E[0-9][0-9]*|Enzyme_[A-Za-z0-9_]+)') -> dict:
rxn_info2protein = {}
protein2gpr = defaultdict(list)
#remove copy number substrings from the reaction to make it matchable to enzyme information
Expand Down Expand Up @@ -312,7 +315,14 @@ def parse_reaction2protein(enzyme_db: pd.DataFrame,
rxn_info = rxn_info2protein.setdefault(rxn_id, ReactionInformation(rxn_id))
#sometimes, multiple copies are associated with a single reaction
rxns = rxn_info.get_reaction_from_model(model)
#the genes are generally stored in a list, which needs to be recovered from the string formatted column
genes = catalytic_reaction_info.gene.iloc[0]
if isinstance(genes, str) and genes[-1] == ']':
genes = ast.literal_eval(catalytic_reaction_info.gene.iloc[0])
elif isinstance(genes, str):
genes = [genes]


for rxn in rxns:
# If no genes are associated with the reaction, this reaction is not catalyzed by an enzyme
if (not len(rxn.genes) > 0) and (not isinstance(genes, list)): continue
Expand All @@ -324,6 +334,7 @@ def parse_reaction2protein(enzyme_db: pd.DataFrame,
enzyme_id,
gene2protein)


protein2gpr[enzyme_id]+= gene_reaction_relation

enzyme_info = enzyme_information(rxn_id=rxn.id,
Expand All @@ -338,6 +349,8 @@ def parse_reaction2protein(enzyme_db: pd.DataFrame,
rxn_info.enzymes[enzyme_id] = enzyme_info
rxn_info2protein[rxn.id] = rxn_info



# if no enzyme info is found, add dummy enzyme with median kcat and molmass
rxn_info2protein, protein2gpr = _check_if_all_model_reactions_are_in_rxn_info2protein(model,
rxn_info2protein,
Expand Down

0 comments on commit 99bb3aa

Please sign in to comment.