Skip to content

Commit

Permalink
initial work towards tamodel
Browse files Browse the repository at this point in the history
  • Loading branch information
SamiralVdB committed Jan 26, 2024
1 parent c53065c commit 09162fb
Show file tree
Hide file tree
Showing 16 changed files with 6,984 additions and 10 deletions.
Binary file not shown.
1,438 changes: 1,438 additions & 0 deletions Data/TAModel/All-enzymes-of-E.-coli-K-12-substr.-MG1655.tsv

Large diffs are not rendered by default.

4,747 changes: 4,747 additions & 0 deletions Data/TAModel/All-genes-of-E.-coli-K-12-substr.-MG1655.tsv

Large diffs are not rendered by default.

Empty file added Scripts/__init__.py
Empty file.
78 changes: 78 additions & 0 deletions Scripts/parse_ecoli_gpr_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from cobra.io import read_sbml_model
from cobra import Reaction
import os
import pandas as pd
import numpy as np
from datetime import date

TAM_DATA_PATH = os.path.join('Data', 'TAModel')
TAM_DATA_FILE = os.path.join(TAM_DATA_PATH, str(date.today()) +'_gene_enzyme_reaction_relation_Ecoli.xlsx')

def parse_gpr_relationships_from_Ecocyc():
# files last downloaded from Ecocyc: 2024 - 01 - 25
# manual curation of the tabs in the gene file is required!
enzymes_to_genes = pd.read_csv(os.path.join(TAM_DATA_PATH , 'All-enzymes-of-E.-coli-K-12-substr.-MG1655.tsv'), sep = '\t')
genes_info = pd.read_csv(os.path.join(TAM_DATA_PATH, 'All-genes-of-E.-coli-K-12-substr.-MG1655.tsv'), sep = '\t').drop('Object ID', axis = 1)
genes_info['Accession-1'] = genes_info['Accession-1'].str.strip()

enzyme_gene_info = pd.merge(enzymes_to_genes, genes_info, how='outer', left_on = 'Gene', right_on= 'Gene Name')
enzyme_gene_info['mrna_length'] = enzyme_gene_info['Right-End-Position']-enzyme_gene_info['Left-End-Position']

enzyme_gene_info = enzyme_gene_info.drop(['Catalyzes', 'Sequence - polypeptide sequence', 'Gene Name',
'Left-End-Position', 'Right-End-Position', 'Product'], axis=1)
model_path = os.path.join('Models', 'iML1515.xml')
model = read_sbml_model(model_path)


enzyme_gene_reaction_relation = pd.DataFrame(columns = list(enzyme_gene_info.columns)+ ['Reaction', 'gpr'])

#match gpr relationships and average the molecular mass
for index, row in enzyme_gene_info.iterrows():
molmass = row['Molecular-Weight-KiloDaltons']
if not isinstance(molmass, float):
row['Molecular-Weight-KiloDaltons'] = np.mean([float(mass) for mass in molmass.split('//')])

if row['Accession-1'] in model.genes:
gene = model.genes.get_by_id(row['Accession-1'])
for rxn in gene.reactions:
gpr_info = parse_and_or_gpr_relations_from_string(rxn.gpr.to_string())

enzyme_gene_reaction_relation.loc[len(enzyme_gene_reaction_relation)] = row.to_list() + [rxn.id] + [gpr_info]

#find model reactions which are not matched and not exchange reactions
not_matched_reactions = []
for rxn in model.reactions:
if rxn.id not in enzyme_gene_reaction_relation['Reaction'].to_list() and 'EX' not in rxn.id:
not_matched_reactions.append(rxn.id)

#make df look pretty
enzyme_gene_reaction_relation = enzyme_gene_reaction_relation[
['Gene', 'Enzyme', 'Reaction', 'Accession-1', 'Object ID',
'Molecular-Weight-KiloDaltons', 'mrna_length', 'gpr']]
enzyme_gene_reaction_relation.columns = ['Gene', 'Enzyme', 'Reaction', 'gene_id', 'enzyme_id',
'molmass_kDa', 'mrna_length', 'gpr']

#write to excel
with pd.ExcelWriter(TAM_DATA_FILE) as writer:
enzyme_gene_reaction_relation.to_excel(writer, sheet_name='enzyme-gene-reaction')
pd.DataFrame({'not_matched': not_matched_reactions}).to_excel(writer, sheet_name='non-matched_reactions')


def parse_and_or_gpr_relations_from_string(gpr_info:str):
gpr_list = []
for gene_relation in gpr_info.split(') or ('):
gpr_list.append(gene_relation.split(' and '))
return gpr_list

def parse_enzymatic_data_information():
enzyme_gene_reaction_relation = pd.read_excel(TAM_DATA_FILE, sheet_name='enzyme-gene-reaction')
enzyme_info = pd.read_excel(os.path.join('Data', 'proteinAllocationModel_iML1515_EnzymaticData_py.xls'),
sheet_name='ActiveEnzymes')

tam_info = pd.merge(enzyme_info, enzyme_gene_reaction_relation, how = 'left', left_on='rxnID', right_on='Reaction')




if __name__ == '__main__':
parse_gpr_relationships_from_Ecocyc()
Loading

0 comments on commit 09162fb

Please sign in to comment.