Skip to content

Commit

Permalink
some updates
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Mar 27, 2024
1 parent 146fe83 commit 80da693
Show file tree
Hide file tree
Showing 21 changed files with 1,453 additions and 617 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,samiralvdb,QPE-IAMBPC156,27.03.2024 10:16,file:///home/samiralvdb/.config/libreoffice/4;
1 change: 0 additions & 1 deletion Data/TAModel/.~lock.Sinha-etal_2021_transcript-data.xlsx#

This file was deleted.

Binary file modified Data/TAModel/2024-02-27_gene_enzyme_reaction_relation_Ecoli.xlsx
Binary file not shown.
Binary file modified Data/proteome_data_extract_schmidt2016.xlsx
Binary file not shown.

Large diffs are not rendered by default.

815 changes: 387 additions & 428 deletions Scripts/Translation_parameters_estimation.ipynb

Large diffs are not rendered by default.

Binary file modified Scripts/__pycache__/pam_generation.cpython-310.pyc
Binary file not shown.
113 changes: 93 additions & 20 deletions Scripts/ecolicore_tam_incl_transcript_info.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import matplotlib.pyplot as plt
import pandas as pd
import os
import cobra

from Scripts.tam_generation import set_up_toy_tam, set_up_ecolicore_tam
from Scripts.pam_generation import set_up_ecolicore_pam
Expand All @@ -14,8 +15,8 @@
FLUX_FILE_PATH = os.path.join('Data', 'TAModel', 'Sinha-etal_2021_flux-data.xlsx')


mrna_vs_mu_slope = 2.64E-10
mrna_vs_mu_intercept = 3.59E-11
mrna_vs_mu_slope = 0.00013049558330984208
mrna_vs_mu_intercept = 1.7750480089801658e-05


def get_transcript_data(transcript_file_path:str = TRANSCRIPT_FILE_PATH, mmol = True,
Expand Down Expand Up @@ -43,25 +44,71 @@ def get_pam_fluxes(substrate_uptake_rate):
pam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
sol = pam.optimize()
pam_fluxes = sol.fluxes
return pam_fluxes
return pam_fluxes,pam

def set_up_tamodel(strain ='REF'):
def set_up_tamodel(substrate_uptake_rate, strain ='REF'):
tam = set_up_ecolicore_tam()
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-1e6, upper_bound=0)
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
transcript_data_mmol = get_transcript_data()

for gene, expression_data in transcript_data_mmol.iterrows():
transcript_id = 'mRNA_' + gene
if not transcript_id in tam.transcripts: continue
transcript = tam.transcripts.get_by_id('mRNA_' + gene)
# testing wildtype condition
transcript.change_concentration(concentration=expression_data[strain],
error=expression_data[strain] * 0.01)
i=0
infeasible = []
for transcript in tam.transcripts:
# print(enzyme, transcript)
conc = []
for gene in transcript.genes:
if gene.id in transcript_data_mmol.index: #and gene.id != 'b2987' and gene.id!= 'b3493': #genes related to phosphate transport seem to mess up the calculations
conc += [transcript_data_mmol.loc[gene.id, strain]]
if len(conc) > 0:
transcript.change_concentration(concentration=min(conc) * 1e-3, error= max(conc)*1e-3* 0.01)

tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-9, upper_bound=-9)
tam.optimize()
if tam.solver.status != 'optimal': infeasible += [transcript]
else: print(tam.objective.value)

elif gene.id != 'gene_dummy':
print(transcript)

print(infeasible)
# tam.test()
# print(transcript.enzymes)
# print(tam.solver.status)
# for gene, expression_data in transcript_data_mmol.iterrows():
# if i<10000:
# transcript_id = 'mRNA_' + gene
# if not transcript_id in tam.transcripts: continue
#
# transcript = tam.transcripts.get_by_id('mRNA_' + gene)
# # testing wildtype condition
# transcript.change_concentration(concentration=expression_data[strain]*1e-3,
# error=expression_data[strain] *1e-3* 0.01)
# i+=1
# tam.test()
# print(transcript.enzymes)
# print(tam.solver.status)
# print(transcript)
# if tam.solver.status != 'optimal':
# print('number of transcripts which were be added to the model: ',i)
# for name, cons in transcript._constraints.items():
# print(name, cons, expression_data[strain])
# break
# print('number of transcripts which could be added to the model: ', len(tam.transcripts))
# enz = tam.enzymes.get_by_id('2.7.3.9')
# print(enz.name, enz.enzyme_variable.concentration)
# print(infeasible)
return tam

def get_tam_fluxes(tam,substrate_uptake_rate):
tam.change_reaction_bounds('EX_glc__D_e', lower_bound=-substrate_uptake_rate, upper_bound=0)
sol = tam.optimize()
if tam.solver.status != 'optimal': return None
for transcript in tam.transcripts:
for constr in transcript._constraints.values():
if constr.dual !=0:
print(transcript, transcript.mrna_variable.primal, constr.primal)
print(transcript.f_min*transcript.mrna_variable.primal, transcript.f_max*transcript.mrna_variable.primal)
for enzyme in transcript.enzymes: print(enzyme.id, enzyme.concentration)
print(constr)
tam_fluxes = sol.fluxes
return tam_fluxes

Expand Down Expand Up @@ -92,21 +139,46 @@ def compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain ='REF', abs=True
print(flux_results_percentage.to_markdown())
return flux_results_percentage

def compare_fluxes_holm_reference(strain = 'REF'):
def compare_fluxes_holm_reference(strain = 'REF', plot = True):
flux_data =get_flux_data()
substrate_uptake_rate = flux_data[strain]['GLCptspp']

pam_fluxes = get_pam_fluxes(substrate_uptake_rate=substrate_uptake_rate)
pam_fluxes, pam = get_pam_fluxes(substrate_uptake_rate=substrate_uptake_rate)

tam = set_up_tamodel(strain)
tam = set_up_tamodel(substrate_uptake_rate,strain)
tam_fluxes = get_tam_fluxes(tam, substrate_uptake_rate=substrate_uptake_rate)
if tam_fluxes is None:
print('TAModel was infeasible')
return
for i,row in tam.capacity_sensitivity_coefficients.iterrows():
if row.coefficient > 0: print(row)

# for transcript in tam.transcripts:
# if transcript.mrna_variable.lb != 0:
# for gene in transcript.genes:
# gene_id = gene.id
# enzymes = tam.get_enzymes_by_gene_id(gene_id)
# for enzyme in enzymes:
# print(enzyme.id, 'TAM',enzyme.concentration,'PAM' ,pam.enzymes.get_by_id(enzyme.id).concentration)
# print()
# print('RNA constraints primal', tam.constraints[tam.TOTAL_MRNA_CONSTRAINT_ID].primal)
# print('RNA constraints dual', tam.constraints[tam.TOTAL_MRNA_CONSTRAINT_ID].dual)
# for enzyme in pam.enzyme_variables[:9]:
# tam_enzyme = tam.enzyme_variables.get_by_id(enzyme.id)
#
# print(enzyme.id, enzyme.concentration, tam_enzyme.concentration)
# tam_transcripts = tam.get_transcripts_associated_with_enzyme(tam.enzymes.get_by_id(enzyme.id), output='list')
# for transcript in tam_transcripts:
# print(transcript.id, transcript.mrna_variable.primal)
# print('bounds',transcript.mrna_variable.lb, transcript.mrna_variable.ub)
# for constraint in transcript._constraints.values():
# if constraint.dual>0: print(constraint, constraint.dual)


flux_relative = compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain,abs = False)
flux_absolute = compare_flux_data(flux_data, pam_fluxes, tam_fluxes, strain)

plot_flux_comparison(flux_absolute, flux_relative, strain)
if plot: plot_flux_comparison(flux_absolute, flux_relative, strain)

def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):
fig, ax = plt.subplots(1,2)
Expand Down Expand Up @@ -136,11 +208,12 @@ def plot_flux_comparison(flux_df_abs, flux_df_rel, strain):


if __name__ == '__main__':

print('Reference condition')
compare_fluxes_holm_reference()
compare_fluxes_holm_reference(plot =False)
print('\n-------------------------------------------------------------------------------------------------')
print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
compare_fluxes_holm_reference('NOX')
# print('mutation 1: NOX strain (overexpression of NADH oxidase)\n')
# compare_fluxes_holm_reference('NOX', plot=False)
# TODO print mRNA and protein concentrations to compare with lb
# TODO print shadowprices of mRNA (are lbs hit? how far can I constrain?)

Expand Down
10 changes: 5 additions & 5 deletions Scripts/pam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from src.PAModelpy.EnzymeSectors import ActiveEnzymeSector, UnusedEnzymeSector, TransEnzymeSector
from src.PAModelpy.configuration import Config

from Scripts.tam_generation import parse_reaction2protein2gene2transcript
from Scripts.toy_ec_pam import build_toy_gem, build_active_enzyme_sector, build_translational_protein_sector, build_unused_protein_sector

'Function library for making Protein Allocation Models as described in the publication'
Expand Down Expand Up @@ -43,7 +44,9 @@ def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True,

# some other constants
BIOMASS_REACTION = 'BIOMASS_Ecoli_core_w_GAM'
TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]
# TOTAL_PROTEIN_CONCENTRATION = 0.16995 # [g_prot/g_cdw]
TOTAL_PROTEIN_CONCENTRATION = 0.185 # [g_prot/g_cdw]


config = Config()
config.reset()
Expand All @@ -57,7 +60,7 @@ def set_up_ecolicore_pam(total_protein:bool = True, active_enzymes: bool = True,
enzyme_db = _parse_enzyme_information_from_file(TAM_DATA_FILE_PATH)

# create enzyme objects for each gene-associated reaction
rxn2protein, protein2gene, gene2transcript = parse_reaction2protein(enzyme_db, model)
rxn2protein, protein2gene, gene2transcript = parse_reaction2protein2gene2transcript(enzyme_db, model)

# create active enzymes sector
active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein,
Expand Down Expand Up @@ -252,7 +255,6 @@ def parse_coefficients(pamodel):
def parse_esc(pamodel):
return pamodel.enzyme_sensitivity_coefficients.coefficient.to_list()


def _parse_enzyme_information_from_file(file_path:str):
# load active enzyme sector information
enzyme_db = pd.read_excel(file_path, sheet_name='enzyme-gene-reaction')
Expand Down Expand Up @@ -376,7 +378,6 @@ def _get_fwd_bckw_kcat(rxn_id: str, kcat:float, model:PAModel) -> Union[list, No

# Iterate over each identifier in the input
if base_id in model.reactions:
if not model.reactions.get_by_id(base_id).genes: return None
# Determine the form of the identifier
if rxn_id.endswith('_f'):
kcat_fwd = kcat
Expand All @@ -391,7 +392,6 @@ def _get_fwd_bckw_kcat(rxn_id: str, kcat:float, model:PAModel) -> Union[list, No
else:
return None
elif rxn_id in model.reactions:
if not model.reactions.get_by_id(rxn_id).genes: return None
kcat_fwd = kcat
kcat_rev = kcat
else:
Expand Down
40 changes: 25 additions & 15 deletions Scripts/tam_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,17 @@ def set_up_ecolicore_tam(total_protein:bool = True, active_enzymes: bool = True,
enzyme_db = _parse_enzyme_information_from_file(TAM_DATA_FILE_PATH)

# create enzyme objects for each gene-associated reaction
rxn2protein, protein2gene, gene2transcript = parse_reaction2protein(enzyme_db, model)

rxn2protein, protein2gene, gene2transcript = parse_reaction2protein2gene2transcript(enzyme_db, model)

# import random
# gene2transcript2 ={}
# for i in range(50):
# gene, transcript = random.choice(list(gene2transcript.items()))
# gene2transcript2 = {**gene2transcript2, gene:transcript}
# gene2, transcript2 = random.choice(list(gene2transcript.items()))
# gene3, transcript3 = random.choice(list(gene2transcript.items()))
#
# gene2transcript = {gene:transcript, gene2:transcript2, gene3:transcript3}
# create active enzymes sector
active_enzyme_sector = ActiveEnzymeSector(rxn2protein=rxn2protein,
protein2gene = protein2gene,
Expand Down Expand Up @@ -284,7 +293,7 @@ def _parse_enzyme_information_from_file(file_path:str):
return enzyme_db


def parse_reaction2protein(enzyme_db: pd.DataFrame, model:cobra.Model) -> dict:
def parse_reaction2protein2gene2transcript(enzyme_db: pd.DataFrame, model:cobra.Model) -> dict:
# Initialize dictionaries
rxn2protein = {}
protein2gene = {}
Expand All @@ -301,24 +310,25 @@ def parse_reaction2protein(enzyme_db: pd.DataFrame, model:cobra.Model) -> dict:
#check if there is a forward or backward string which needs to be removed from the rxn id
if any([rxn_id.endswith('_f'), rxn_id.endswith('_b')]): rxn_id = rxn_id[:-2]

rxn = model.reactions.get_by_id(rxn_id)
#get the identifiers and replace nan values by dummy placeholders
enzyme_id = row['EC_nmbr']
if not isinstance(enzyme_id, str): enzyme_id = 'Enzyme_'+rxn_id
gene_id = row['gene_id']
if not isinstance(gene_id, str): gene_id = 'gene_dummy'
mrna_length = row['mrna_length']
if len(rxn.genes)>0:
if not isinstance(enzyme_id, str): enzyme_id = 'Enzyme_'+rxn_id
if not isinstance(gene_id, str): gene_id = 'gene_dummy'
mrna_length = row['mrna_length']

#get the gene-protein-reaction-associations
gpr = parse_gpr_information_for_protein2genes(row['gpr'])
#get the gene-protein-reaction-associations
gpr = parse_gpr_information_for_protein2genes(row['gpr'])

# Create gene-protein-reaction associations
if enzyme_id not in protein2gene:
protein2gene[enzyme_id] = []
protein2gene[enzyme_id].append(gpr)
# Create gene-protein-reaction associations
if enzyme_id not in protein2gene:
protein2gene[enzyme_id] = gpr

# Create gene2transcript dictionary
if gene_id not in gene2transcript.keys():
gene2transcript[gene_id] = {'id': f'mRNA_{gene_id}', 'length': mrna_length}
# Create gene2transcript dictionary
if gene_id not in gene2transcript.keys():
gene2transcript[gene_id] = {'id': f'mRNA_{gene_id}', 'length': mrna_length}

# Create rxn2protein dictionary
if rxn_id not in rxn2protein:
Expand Down
5 changes: 1 addition & 4 deletions docs/source/example.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ First, we'll define the paths we'll download the data

```python
protein_sector_info_path = 'Data/proteinAllocationModel_iML1515_EnzymaticData_py.xls'
active_enzyme_data = pd.read_excel(protein_sector_info_path, sheet_name='ActiveEnzymes'))
active_enzyme_info = pd.read_excel(protein_sector_info_path, sheet_name='ActiveEnzymes'))
```

The data is now in a dataframe with the following columns:
Expand All @@ -52,9 +52,6 @@ rxn_id - rxnName - rxnEquat - EC_nmbr - molMass
First, let's add an identifier to the reactions for which the enzyme is unknown, in order to distinguish between the
enzymes
```python
#load active enzyme sector information
active_enzyme_info = pd.read_excel(pam_info_file, sheet_name='ActiveEnzymes')

# replace NaN values with unique identifiers
#select the NaN values
nan_values = active_enzyme_info['EC_nmbr'].isnull()
Expand Down
42 changes: 38 additions & 4 deletions src/PAModelpy/Enzyme.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import PAModelpy.Enzyme
import cobra.core
import cobra
from cobra import Reaction, Object
from cobra import Reaction, Object, Gene
from cobra.exceptions import OptimizationError
from cobra.util.solver import check_solver_status
from copy import copy, deepcopy
Expand Down Expand Up @@ -87,6 +87,7 @@ def __init__(
self._model = None
self.enzyme_complex = [] #is the enzyme in a complex?
self.genes = genes
self.transcripts = DictList()
self.annotation = {'type':'Constraint'}#you can add an annotation for an enzyme

@property
Expand Down Expand Up @@ -114,9 +115,7 @@ def concentration(self, units:str = 'mmol/gDW', return_units:bool = False) -> fl
"""

# sum up concentrations (aka fluxes) of all enzyme objects
concentration = 0.0
for catalytic_event in self.catalytic_events:
concentration += catalytic_event.flux()
concentration = self.enzyme_variable.concentration
if units == 'g/gDW':
#converting mmol to grams of protein:
# [g] = [mmol]* 1e-3 [mol/mmol] * MW[g/mol]
Expand Down Expand Up @@ -147,6 +146,40 @@ def add_catalytic_event(self, ce: CatalyticEvent, kcats: Dict):
self.catalytic_events += [ce]
self.enzyme_variable.add_catalytic_events([ce],[kcats])

def add_genes(self, gene_list: list, gene_length:list, relation:str = 'OR') -> None:
"""
Add genes to the enzyme and the model related to the enzyme if applicable
Args:
gene_list (list): A list of gene identifiers to be added.
gene_length (list): A list of lengths corresponding to each gene.
relation (str, optional): The relationship between genes in gene_list.
Defaults to 'OR'. Possible values: 'OR' or 'AND'.
Raises:
ValueError: If an invalid relation is provided.
Note:
If relation is 'OR', each gene in gene_list will be treated as coding for an individual isozyme
If relation is 'AND', all genes in gene_list will be treated as coding for peptides in an enzyme complex
"""
# check/correct type of arguments
if isinstance(gene_list, str): gene_list = [gene_list]
if not isinstance(gene_length, list): gene_length = [gene_length]

if relation == 'OR':
genes_to_add = [[Gene(gene)] for gene in gene_list][0]
elif relation == 'AND':
genes_to_add = [Gene(gene) for gene in gene_list]
else:
raise ValueError("Invalid relation. Supported values are 'OR' or 'AND'.")
self.genes.append(genes_to_add)

if self._model is not None:
self._model.add_genes(genes = genes_to_add, enzymes = [self], gene_lengths=gene_length)


def create_catalytic_event(self, rxn_id: str, kcats: Dict):
"""creates enzyme variables that link to reactions
Expand Down Expand Up @@ -633,6 +666,7 @@ def add_reactions(self, reaction_kcat_dict: dict):
self.reverse_variable: -1
})


def remove_catalytic_event(self, catalytic_event: Union[CatalyticEvent, str]):
"""
Function to remove a catalytic event from an enzyme
Expand Down
Loading

0 comments on commit 80da693

Please sign in to comment.