diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 4666b73f..a4af9f80 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -25,6 +25,25 @@ error('Yaml file %s cannot be found', string(yamlFilename)); end +if verLessThan('matlab','9.9') %readlines introduced 2020b + fid=fopen(yamlFilename); + line_raw=cell(1000000,1); + while ~feof(fid) + line_raw{i}=fgetl(fid); + i=i+1; + end + line_raw(i:end)=[]; + line_raw=string(line_raw); +else + line_raw=readlines(yamlFilename'); +end + +line_key=regexprep(line_raw,'^ *-? ([^:]+)(:).*','$1'); +line_key=regexprep(line_key,'(.*!!omap)|(---)',''); + +line_value = regexprep(line_raw, '[^":]+: "?(.+)"?$','$1'); +line_value = regexprep(line_value, '"',''); + % Define the required fields of humanGEM % There are a total of 37 fields in the model so far, the non-generic ones % are excluded here @@ -65,60 +84,59 @@ rightEqns={}; objRxns={}; - -% Load Yaml format model - -fid = fopen(yamlFilename); -if ~silentMode - fprintf('Start importing...\n'); -end - section = 0; -while ~feof(fid) - tline = fgetl(fid); - +for i=1:numel(line_key) + tline_raw = line_raw{i}; + tline_key = line_key{i}; + tline_value = line_value{i}; % import different sections - change_to_section = 0; - switch tline + switch tline_raw case '- metaData:' - change_to_section = 1; + section = 1; + if ~silentMode + fprintf('\t%d\n', section); + end + continue % Go to next line case '- metabolites:' - change_to_section = 2; + section = 2; + if ~silentMode + fprintf('\t%d\n', section); + end + continue case '- reactions:' - change_to_section = 3; + section = 3; + readSubsystems = false; + readEquation = false; rxnId = ''; + if ~silentMode + fprintf('\t%d\n', section); + end + continue case '- genes:' - change_to_section = 4; + section = 4; + if ~silentMode + fprintf('\t%d\n', section); + end + if readSubsystems + model.subSystems(end+1,1) = {subSystems}; %last entry from section 3 + readSubsystems = false; + end + continue case '- compartments: !!omap' - change_to_section = 5; - end - if logical(change_to_section) - section = change_to_section; - tline = fgetl(fid); - if ~silentMode - fprintf('\t%d\n', section); - end - - if section == 4 - readSubsystems = true; - else - readSubsystems = false; - end - + section = 5; + if ~silentMode + fprintf('\t%d\n', section); + end + continue end - % skip over lines containing only omap - if any(regexp(tline, "- !!omap")) - if readSubsystems - model.subSystems = [model.subSystems; {subSystems}]; - readSubsystems = false; - end - tline = fgetl(fid); + % skip over empty keys + if isempty(tline_key) + continue; end % import metaData if section == 1 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'short_name' model.id = tline_value; @@ -143,7 +161,6 @@ % import metabolites: if section == 2 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'id' model = readFieldValue(model, 'mets', tline_value); @@ -164,7 +181,6 @@ % import reactions: if section == 3 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'id' model = readFieldValue(model, 'rxns', tline_value); @@ -173,13 +189,13 @@ model = readFieldValue(model, 'rxnNames', tline_value); case 'lower_bound' - model.lb = [model.lb; tline_value]; - leftEqns = [leftEqns; leftEquation]; - rightEqns = [rightEqns; rightEquation]; + model.lb(end+1,1) = {tline_value}; + leftEqns(end+1,1) = {leftEquation}; + rightEqns(end+1,1) = {rightEquation}; readEquation = false; case 'upper_bound' - model.ub = [model.ub; tline_value]; + model.ub(end+1,1) = {tline_value}; case 'gene_reaction_rule' model = readFieldValue(model, 'grRules', tline_value); @@ -191,7 +207,7 @@ model = readFieldValue(model, 'rxnFrom', tline_value); case 'objective_coefficient' - objRxns = [objRxns; rxnId]; + objRxns(end+1,1) = {rxnId}; case 'eccodes' model = readFieldValue(model, 'eccodes', tline_value); @@ -208,28 +224,32 @@ case 'metabolites' readEquation = true; - leftEquation = {''}; - rightEquation = {''}; - + leftEquation = ''; + rightEquation = ''; + if readSubsystems + model.subSystems(end+1,1) = {subSystems}; + readSubsystems = false; + end + otherwise if readSubsystems - subSystems = [subSystems; regexprep(tline_key, '"', '')]; - + subSystems(end+1,1) = {regexprep(tline_value, '^ *- (.+)$','$1')}; + % resolve the equation elseif readEquation - metCoeffi = regexp(regexprep(tline, ' +- ', ''), ': ', 'split'); - coeffi = str2num(metCoeffi{2}); - if coeffi < 0 + metCoeffi = regexp(regexprep(tline_raw, ' +- ', ''), ': ', 'split'); + coeffi = metCoeffi{2}; + if str2double(coeffi) < 0 if strcmp(leftEquation, '') - leftEquation = strcat(num2str(abs(coeffi), 12),32,metCoeffi{1}); + leftEquation = [coeffi(2:end),' ',metCoeffi{1}]; %Remove minus sign from coefficient else - leftEquation = strcat(leftEquation,' +',32,num2str(abs(coeffi), 12),32,metCoeffi{1}); + leftEquation = [leftEquation,' + ',coeffi(2:end),' ',metCoeffi{1}]; end else if strcmp(rightEquation, '') - rightEquation = strcat(32,num2str(coeffi, 12),32,metCoeffi{1}); + rightEquation = [' ',coeffi,' ',metCoeffi{1}]; else - rightEquation = strcat(rightEquation,' +',32,num2str(coeffi, 12),32,metCoeffi{1}); + rightEquation = [rightEquation,' + ',coeffi,' ',metCoeffi{1}]; end end end @@ -238,20 +258,16 @@ % import genes: if section == 4 - [tline_key, tline_value] = tokenizeYamlLine(tline); model = readFieldValue(model, 'genes', tline_value); end % import compartments: if section == 5 - [tline_key, tline_value] = tokenizeYamlLine(tline); - model.comps = [model.comps; tline_key]; - model.compNames = [model.compNames; tline_value]; + model.comps(end+1,1) = {tline_key}; + model.compNames(end+1,1) = {tline_value}; end end -fclose(fid); - % follow-up data processing if ~silentMode @@ -293,17 +309,5 @@ end function model = readFieldValue(model, fieldName, value) - model.(fieldName) = [model.(fieldName); {value}]; + model.(fieldName)(end+1,1) = {value}; end - -function [line_key, line_value]= tokenizeYamlLine(line) - line_key = regexp(line, '^ *-? ([^:]+)', 'tokens'); - line_key = char(line_key{1}); - line_value = regexp(line, '^ [^:]+: "?(.+)"?$', 'tokens'); - if isempty(line_value) - line_value = ''; - else - line_value = regexprep(line_value{1}, '"', ''); - line_value = char(line_value{1}); - end -end \ No newline at end of file diff --git a/model/Human-GEM.yml b/model/Human-GEM.yml index 83f8c183..ba8d7b4d 100644 --- a/model/Human-GEM.yml +++ b/model/Human-GEM.yml @@ -4,7 +4,7 @@ short_name: "Human-GEM" full_name: "Generic genome-scale metabolic model of Homo sapiens" version: "" - date: "2021-12-18" + date: "2022-06-14" authors: "Jonathan Robinson, Hao Wang, Pierre-Etienne Cholley, Pinar Kocabas" email: "jonrob@chalmers.se" organization: "Chalmers University of Technology" @@ -292964,7 +292964,7 @@ - rxnNotes: "DOI:10.1007/978-1-4419-0840-7" - rxnFrom: "Recon3D" - eccodes: "" - - references: PMID:17655371" + - references: "PMID:17655371" - subsystem: - "Drug metabolism" - confidence_score: 0 @@ -305769,8 +305769,8 @@ - id: "MAR13086" - name: "" - metabolites: !!omap - - MAM02847n: -1 - MAM02847c: 1 + - MAM02847n: -1 - lower_bound: 0 - upper_bound: 1000 - gene_reaction_rule: "ENSG00000030066 and ENSG00000047410 and ENSG00000058804 and ENSG00000069248 and ENSG00000075188 and ENSG00000085415 and ENSG00000093000 and ENSG00000094914 and ENSG00000095319 and ENSG00000101146 and ENSG00000102900 and ENSG00000108559 and ENSG00000110713 and ENSG00000111581 and ENSG00000113569 and ENSG00000119392 and ENSG00000120253 and ENSG00000124789 and ENSG00000125450 and ENSG00000126883 and ENSG00000132182 and ENSG00000136243 and ENSG00000138750 and ENSG00000139496 and ENSG00000153201 and ENSG00000153207 and ENSG00000155561 and ENSG00000157020 and ENSG00000157349 and ENSG00000163002 and ENSG00000196313 and ENSG00000213024"