From 9faa6a58f3caa124e7efac9ee2f67d9d89ec1914 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 13:32:05 +0200 Subject: [PATCH 1/8] refactor: speed up writing model fields --- code/io/importYaml.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index ac6486fe..424d72e1 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -286,7 +286,7 @@ end function model = readFieldValue(model, fieldName, value) - model.(fieldName) = [model.(fieldName); {value}]; + model.(fieldName)(end+1,1) = {value}; end function [line_key, line_value]= tokenizeYamlLine(line) From 90c54d8459f089dc898eb38e2299306ca30aea63 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 15:01:03 +0200 Subject: [PATCH 2/8] refactor: speed-up other model fields --- code/io/importYaml.m | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 424d72e1..ec2d00f0 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -164,13 +164,13 @@ model = readFieldValue(model, 'rxnNames', tline_value); case 'lower_bound' - model.lb = [model.lb; tline_value]; - leftEqns = [leftEqns; leftEquation]; - rightEqns = [rightEqns; rightEquation]; + model.lb(end+1,1) = {tline_value}; + leftEqns(end+1,1) = {leftEquation}; + rightEqns(end+1,1) = {rightEquation}; readEquation = false; case 'upper_bound' - model.ub = [model.ub; tline_value]; + model.ub(end+1,1) = {tline_value}; case 'gene_reaction_rule' model = readFieldValue(model, 'grRules', tline_value); @@ -182,7 +182,7 @@ model = readFieldValue(model, 'rxnFrom', tline_value); case 'objective_coefficient' - objRxns = [objRxns; rxnId]; + objRxns(end+1,1) = {rxnId}; case 'eccodes' model = readFieldValue(model, 'eccodes', tline_value); @@ -196,17 +196,17 @@ case 'confidence_score' model = readFieldValue(model, 'rxnConfidenceScores', tline_value); - model.subSystems = [model.subSystems; {subSystems}]; + model.subSystems(end+1,1) = {subSystems}; readSubsystems = false; case 'metabolites' readEquation = true; - leftEquation = {''}; - rightEquation = {''}; + leftEquation = ''; + rightEquation = ''; otherwise if readSubsystems - subSystems = [subSystems; regexprep(tline_key, '"', '')]; + subSystems(end+1,1) = {regexprep(tline_key, '"', '')}; % resolve the equation elseif readEquation @@ -238,8 +238,8 @@ % import compartments: if section == 5 [tline_key, tline_value] = tokenizeYamlLine(tline); - model.comps = [model.comps; tline_key]; - model.compNames = [model.compNames; tline_value]; + model.comps(end+1,1) = {tline_key}; + model.compNames(end+1,1) = {tline_value}; end end From 922a46b650672cd506f5c50edc312c7dfb0251f5 Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 15:12:57 +0200 Subject: [PATCH 3/8] chore: yml file after importYaml & exportYaml --- model/Human-GEM.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model/Human-GEM.yml b/model/Human-GEM.yml index 83f8c183..ba8d7b4d 100644 --- a/model/Human-GEM.yml +++ b/model/Human-GEM.yml @@ -4,7 +4,7 @@ short_name: "Human-GEM" full_name: "Generic genome-scale metabolic model of Homo sapiens" version: "" - date: "2021-12-18" + date: "2022-06-14" authors: "Jonathan Robinson, Hao Wang, Pierre-Etienne Cholley, Pinar Kocabas" email: "jonrob@chalmers.se" organization: "Chalmers University of Technology" @@ -292964,7 +292964,7 @@ - rxnNotes: "DOI:10.1007/978-1-4419-0840-7" - rxnFrom: "Recon3D" - eccodes: "" - - references: PMID:17655371" + - references: "PMID:17655371" - subsystem: - "Drug metabolism" - confidence_score: 0 @@ -305769,8 +305769,8 @@ - id: "MAR13086" - name: "" - metabolites: !!omap - - MAM02847n: -1 - MAM02847c: 1 + - MAM02847n: -1 - lower_bound: 0 - upper_bound: 1000 - gene_reaction_rule: "ENSG00000030066 and ENSG00000047410 and ENSG00000058804 and ENSG00000069248 and ENSG00000075188 and ENSG00000085415 and ENSG00000093000 and ENSG00000094914 and ENSG00000095319 and ENSG00000101146 and ENSG00000102900 and ENSG00000108559 and ENSG00000110713 and ENSG00000111581 and ENSG00000113569 and ENSG00000119392 and ENSG00000120253 and ENSG00000124789 and ENSG00000125450 and ENSG00000126883 and ENSG00000132182 and ENSG00000136243 and ENSG00000138750 and ENSG00000139496 and ENSG00000153201 and ENSG00000153207 and ENSG00000155561 and ENSG00000157020 and ENSG00000157349 and ENSG00000163002 and ENSG00000196313 and ENSG00000213024" From 0bd19d2ac8d4669665899559b7962bd81ca5cb7d Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 15:32:47 +0200 Subject: [PATCH 4/8] refactor: another ~6 sec saved by faster concat --- code/io/importYaml.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index ec2d00f0..0eae9fdc 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -214,15 +214,15 @@ coeffi = str2num(metCoeffi{2}); if coeffi < 0 if strcmp(leftEquation, '') - leftEquation = strcat(num2str(abs(coeffi), 12),32,metCoeffi{1}); + leftEquation = [num2str(abs(coeffi), 12),' ',metCoeffi{1}]; else - leftEquation = strcat(leftEquation,' +',32,num2str(abs(coeffi), 12),32,metCoeffi{1}); + leftEquation = [leftEquation,' + ',num2str(abs(coeffi), 12),' ',metCoeffi{1}]; end else if strcmp(rightEquation, '') - rightEquation = strcat(32,num2str(coeffi, 12),32,metCoeffi{1}); + rightEquation = [' ',num2str(coeffi, 12),' ',metCoeffi{1}]; else - rightEquation = strcat(rightEquation,' +',32,num2str(coeffi, 12),32,metCoeffi{1}); + rightEquation = [rightEquation,' + ',num2str(coeffi, 12),' ',metCoeffi{1}]; end end end From de3c7f49feeb617799a01708b87352d448a9a0bf Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 15:39:49 +0200 Subject: [PATCH 5/8] refactor: avoid conversion str->num->str --- code/io/importYaml.m | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 0eae9fdc..7e901c82 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -211,18 +211,18 @@ % resolve the equation elseif readEquation metCoeffi = regexp(regexprep(tline, ' +- ', ''), ': ', 'split'); - coeffi = str2num(metCoeffi{2}); - if coeffi < 0 + coeffi = metCoeffi{2}; + if str2double(coeffi) < 0 if strcmp(leftEquation, '') - leftEquation = [num2str(abs(coeffi), 12),' ',metCoeffi{1}]; + leftEquation = [coeffi(2:end),' ',metCoeffi{1}]; %Remove minus sign from coefficient else - leftEquation = [leftEquation,' + ',num2str(abs(coeffi), 12),' ',metCoeffi{1}]; + leftEquation = [leftEquation,' + ',coeffi(2:end),' ',metCoeffi{1}]; end else if strcmp(rightEquation, '') - rightEquation = [' ',num2str(coeffi, 12),' ',metCoeffi{1}]; + rightEquation = [' ',coeffi,' ',metCoeffi{1}]; else - rightEquation = [rightEquation,' + ',num2str(coeffi, 12),' ',metCoeffi{1}]; + rightEquation = [rightEquation,' + ',coeffi,' ',metCoeffi{1}]; end end end From 9aea8ae03e5dcd3e3623cb745510e43aa02bd3fb Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Tue, 14 Jun 2022 22:38:37 +0200 Subject: [PATCH 6/8] refactor: importYaml first reads whole file --- code/io/importYaml.m | 105 ++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 7e901c82..02e3559a 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -25,6 +25,25 @@ error('Yaml file %s cannot be found', string(yamlFilename)); end +if verLessThan('matlab','9.9') %readlines introduced 2020b + fid=fopen(yamlFile); + line_raw=cell(1000000,1); + while ~feof(fid) + line_raw{i}=fgetl(fid); + i=i+1; + end + line_raw(i:end)=[]; + line_raw=string(line_raw); +else + line_raw=readlines(yamlFile'); +end + +line_key=regexprep(line_raw,'^ *-? ([^:]+)(:).*','$1'); +line_key=regexprep(line_key,'(.*!!omap)|(---)',''); + +line_value = regexprep(line_raw, '[^":]+: "?(.+)"?$','$1'); +line_value = regexprep(line_value, '"',''); + % Define the required fields of humanGEM % There are a total of 37 fields in the model so far, the non-generic ones % are excluded here @@ -65,51 +84,55 @@ rightEqns={}; objRxns={}; - -% Load Yaml format model - -fid = fopen(yamlFilename); -if ~silentMode - fprintf('Start importing...\n'); -end - section = 0; -while ~feof(fid) - tline = fgetl(fid); - +for i=1:numel(line_key) + tline_raw = line_raw{i}; + tline_key = line_key{i}; + tline_value = line_value{i}; % import different sections - change_to_section = 0; - switch tline + switch tline_raw case '- metaData:' - change_to_section = 1; + section = 1; + if ~silentMode + fprintf('\t%d\n', section); + end + continue % Go to next line case '- metabolites:' - change_to_section = 2; + section = 2; + if ~silentMode + fprintf('\t%d\n', section); + end + continue case '- reactions:' - change_to_section = 3; + section = 3; readSubsystems = false; readEquation = false; rxnId = ''; + if ~silentMode + fprintf('\t%d\n', section); + end + continue case '- genes:' - change_to_section = 4; + section = 4; + if ~silentMode + fprintf('\t%d\n', section); + end + continue case '- compartments: !!omap' - change_to_section = 5; - end - if logical(change_to_section) - section = change_to_section; - tline = fgetl(fid); - if ~silentMode - fprintf('\t%d\n', section); - end + section = 5; + if ~silentMode + fprintf('\t%d\n', section); + end + continue end - % skip over lines containing only omap - if any(regexp(tline, "- !!omap")) - tline = fgetl(fid); + % skip over empty keys + if isempty(tline_key) + continue; end % import metaData if section == 1 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'short_name' model.id = tline_value; @@ -134,7 +157,6 @@ % import metabolites: if section == 2 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'id' model = readFieldValue(model, 'mets', tline_value); @@ -155,7 +177,6 @@ % import reactions: if section == 3 - [tline_key, tline_value] = tokenizeYamlLine(tline); switch tline_key case 'id' model = readFieldValue(model, 'rxns', tline_value); @@ -206,11 +227,11 @@ otherwise if readSubsystems - subSystems(end+1,1) = {regexprep(tline_key, '"', '')}; - + subSystems(end+1,1) = {regexprep(tline_value, '^ *- (.+)$','$1')}; + % resolve the equation elseif readEquation - metCoeffi = regexp(regexprep(tline, ' +- ', ''), ': ', 'split'); + metCoeffi = regexp(regexprep(tline_raw, ' +- ', ''), ': ', 'split'); coeffi = metCoeffi{2}; if str2double(coeffi) < 0 if strcmp(leftEquation, '') @@ -231,20 +252,16 @@ % import genes: if section == 4 - [tline_key, tline_value] = tokenizeYamlLine(tline); model = readFieldValue(model, 'genes', tline_value); end % import compartments: if section == 5 - [tline_key, tline_value] = tokenizeYamlLine(tline); model.comps(end+1,1) = {tline_key}; model.compNames(end+1,1) = {tline_value}; end end -fclose(fid); - % follow-up data processing if ~silentMode @@ -288,15 +305,3 @@ function model = readFieldValue(model, fieldName, value) model.(fieldName)(end+1,1) = {value}; end - -function [line_key, line_value]= tokenizeYamlLine(line) - line_key = regexp(line, '^ *-? ([^:]+)', 'tokens'); - line_key = char(line_key{1}); - line_value = regexp(line, '^ [^:]+: "?(.+)"?$', 'tokens'); - if isempty(line_value) - line_value = ''; - else - line_value = regexprep(line_value{1}, '"', ''); - line_value = char(line_value{1}); - end -end \ No newline at end of file From 26f095ed0399700913ec6173d29b170a67f1db6d Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Wed, 15 Jun 2022 10:58:36 +0200 Subject: [PATCH 7/8] Update code/io/importYaml.m --- code/io/importYaml.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 02e3559a..21cef15d 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -26,7 +26,7 @@ end if verLessThan('matlab','9.9') %readlines introduced 2020b - fid=fopen(yamlFile); + fid=fopen(yamlFilename); line_raw=cell(1000000,1); while ~feof(fid) line_raw{i}=fgetl(fid); From 3ff015aa2ff524342c409adc6d163ba5249be44f Mon Sep 17 00:00:00 2001 From: Eduard Kerkhoven Date: Wed, 15 Jun 2022 10:58:44 +0200 Subject: [PATCH 8/8] Update code/io/importYaml.m --- code/io/importYaml.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/io/importYaml.m b/code/io/importYaml.m index 21cef15d..ab3efafe 100644 --- a/code/io/importYaml.m +++ b/code/io/importYaml.m @@ -35,7 +35,7 @@ line_raw(i:end)=[]; line_raw=string(line_raw); else - line_raw=readlines(yamlFile'); + line_raw=readlines(yamlFilename'); end line_key=regexprep(line_raw,'^ *-? ([^:]+)(:).*','$1');