Skip to content

Commit

Permalink
Merge pull request #273 from SysBioChalmers/feat/limitingProteins
Browse files Browse the repository at this point in the history
feat: stop if not limiting proteins were found
  • Loading branch information
edkerk authored Mar 7, 2023
2 parents 6ab6f4d + 7384a00 commit 60ef6ea
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 77 deletions.
103 changes: 52 additions & 51 deletions protocol.m
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
% included as such in the protocol.

%% Preparation stage for ecModel reconstruction
% - Install GECKO % RAVEN
% - Install GECKO & RAVEN
% - GECKO can be installed via cloning or direct download of ZIP file.
% See installation instructions #here (TO BE ADDED)
% See installation instructions in the README.md:
% https://github.com/SysBioChalmers/GECKO/tree/main#installation
% - Add the appropriate GECKO (sub)folders to MATLAB path:
GECKOInstaller.install %
% - Install RAVEN by following the installation instructions:
Expand Down Expand Up @@ -76,42 +77,38 @@
ecModel=loadEcModel('ecYeastGEMfull.yml');

%% STAGE 2: integration of kcat into the ecModel structure
% STEP 6 Decide which kcat source to use
% In the steps below, all options are shown. Not all are required, it is up
% to the user to decide which ones they want to use.
% Decide which kcat source to use. In the steps below, all options are
% shown. Not all are required, it is up to the user to decide which ones
% they want to use.

% STEP 7 Fuzzy matching with BRENDA
% STEP 6 Fuzzy matching with BRENDA
% Requires EC numbers, which are here first taken from the starting model,
% with the missing ones taken from Uniprot & KEGG databases.
ecModel = getECfromGEM(ecModel);
noEC = cellfun(@isempty, ecModel.ec.eccodes);
ecModel = getECfromDatabase(ecModel);
ecModel = getECfromDatabase(ecModel,noEC);
% Do the actual fuzzy matching with BRENDA
kcatList_fuzzy = fuzzyKcatMatching(ecModel);
% Now we have a kcatList, which will be used to update ecModel in a later
% step.

% STEP 8 DLKcat prediction through machine learning
% STEP 7 DLKcat prediction through machine learning
% Requires metabolite SMILES, which are gathered from PubChem
ecModel = findMetSmiles(ecModel);
% DLKcat runs in Python. An input file is written, which is then used by
% DLKcat, while the output file is read back into MATLAB.
writeDLKcatInput(ecModel);
% runDLKcat will attempt to install and run DLKcat, but this might not work
% for all systems. In that case, the user should install Docker Desktop
% (https://docs.docker.com/get-docker/) and use runDLKcatFromDocker()
% instead.
runDLKcat();
%runDLKcatFromDocker(); % Only uncomment and run if required.
% runDLKcat will run the DLKcat algorithm via a Docker image
%runDLKcat();
kcatList_DLKcat = readDLKcatOutput(ecModel);

% STEP 9 Combine kcat from BRENDA and DLKcat
% STEP 8 Combine kcat from BRENDA and DLKcat
kcatList_merged = mergeDLKcatAndFuzzyKcats(kcatList_DLKcat, kcatList_fuzzy);

% STEP 10 Take kcatList and populate edModel.ec.kcat
% STEP 9 Take kcatList and populate edModel.ec.kcat
ecModel = selectKcatValue(ecModel, kcatList_merged);

% STEP 11 Apply custom kcat values
% STEP 10 Apply custom kcat values
% During the development of yeast-GEM ecModels (through GECKO 1 & 2),
% various kcat values have been manually curated, to result in a model that
% is able to validate a wide range of phenotypes. These curations are
Expand All @@ -121,82 +118,86 @@
% you run applyKcatConstraints.
ecModel = applyKcatConstraints(ecModel);

% STEP 12 Get kcat values across isoenzymes
% STEP 11 Get kcat values across isoenzymes
ecModel = getKcatAcrossIsoenzymes(ecModel);

% STEP 13 Get standard kcat
% STEP 12 Get standard kcat
% Assign an enzyme cost to reactions without gene assocation (except
% exchange, transport and pseudoreactions)
[ecModel, rxnsMissingGPR, standardMW, standardKcat] = getStandardKcat(ecModel);

% STEP 14 Apply kcat constraints from ecModel.ec.kcats to ecModel.S
% STEP 13 Apply kcat constraints from ecModel.ec.kcats to ecModel.S
% This function can be run at any point to re-apply the kcat contraints on
% the model. It also considers the complex data
ecModel = applyKcatConstraints(ecModel);

% STEP 15 Set upper bound of protein pool
ecModel = setProtPoolSize(ecModel);
% STEP 14 Set upper bound of protein pool
% Calculate f-factor (how much proteins are enzymes), based on paxDB.tsv
% that is provided for the model. Other proteomics data can also be used,
% while an alternative approximation of 0.5 is typically quite realistic.
f = calculateFfactor(ecModel);
ecModel = setProtPoolSize(ecModel,[],f);

%% STAGE 3: model tuning
% Test whether the model is able to reach maximum growth if glucose uptake
% is unlimited. First set glucose uptake unconstraint
ecModel = setParam(ecModel,'lb','r_1714',-1000);
% Run FBA
sol = solveLP(ecModel,1)
% It reaches growth rate 0.0936, while it should be able to reach 0.41.
% We can also look at the exchange fluxes, but it does not inform use too
% much at this point. Interesting to see that there is quite some ethanol
% fermentation going on.
% It reaches growth rate 0.0877, while it should be able to reach 0.41 (the
% maximum growth rate of S. cerevisiae, that is entered in the model
% adapter as obj.params.gR_exp. We can also look at the exchange fluxes,
% but it does not inform use too much at this point. Interesting to see
% that there is quite some ethanol fermentation going on.
printFluxes(ecModel, sol.x)

% STEP 16 Relax protein pool constraint
% STEP 15 Relax protein pool constraint
% As a simplistic way to ensure the model to reach the growth rate, the
% upper bound of the protein pool exchange reaction can be increased to
% whatever is required. This works, but STEP 16 is preferred.
protPoolIdx = strcmp(ecModel.rxns, 'prot_pool_exchange');
ecModel.ub(protPoolIdx) = 1000;
ecModel.lb(protPoolIdx) = -1000;
% Important to perform parsimonious FBA by setting minFlux to 1:
sol = solveLP(ecModel,1);
ecModel.ub(protPoolIdx) = sol.x(protPoolIdx);
sol = solveLP(ecModel,1)
ecModel.lb(protPoolIdx) = sol.x(protPoolIdx);

% STEP 17 Sensitivity tuning
% STEP 16 Sensitivity tuning
% First reset the protein pool constraint to a more realistic value,
% reverting STEP 16.
ecModel = setProtPoolSize(ecModel);
[ecModel, tunedKcats] = sensitivityTuning(ecModel);

% STEP 18 Bayesian tuning
% Alternative to STEP 17.
%ecModel = bayesianSensitivityTuning(ecModel);

%% STAGE 4 integration of proteomics data into the ecModel
% STEP 19 Load proteomics data and constrain ecModel
% STEP 17 Load proteomics data and constrain ecModel
protData = loadProtData(3); %Number of replicates
ecModel = fillProtConcs(ecModel,protData);
ecModel = constrainProtConcs(ecModel);

% STEP 20 Update protein pool
% STEP 18 Update protein pool
ecModel = updateProtPool(ecModel);

% STEP 21 Load flux data
% STEP 19 Load flux data
fluxData = loadFluxData();
ecModel = constrainFluxData(ecModel,fluxData,1,'max','loose'); % Use first condition
sol = solveLP(ecModel); % To observe if growth was reached
disp(['Growth rate that is reached: ' num2str(abs(sol.f))])

% Growth rate of 0.1 is by far not reached, flexibilize protein
% concentrations
[ecModel, flexProt] = flexibilizeProtConcs(ecModel,0.1,10);

% It gets stuck at 0.0889. It seems like protein abundances are not
% preventing the model to reach 0.1. First look if the conventional GEM is
% able to reach 0.1 with the same constraints:
% STEP 20 Protein concentrations are flexibilized (increased), until the
% intended growth rate is reached.
[ecModel, flexProt] = flexibilizeProtConcs(ecModel,0.1,10);

% Neither individual protein levels nor total protein pool are limiting
% growth. Test whether the starting model is able to reach 0.1.
modelY = constrainFluxData(modelY,fluxData);
sol = solveLP(modelY)

% It also only reaches 0.0889! So the metabolic network would not be able
% to adhere to all measured constraints. Perhaps there is something
% incorrect with the measurements? For now, we will limit the growth rate
% to 0.08885 in our search:
[ecModel, flexProt] = flexibilizeProtConcs(ecModel,0.08885,10);
% incorrect with the measurements? Regardless, the ecModel is now able to
% reach about 0.0889, which will be fine for now.
sol = solveLP(ecModel)

% Growth is reached! Let's make sure we store this functional model
saveEcModel(ecModel,ModelAdapter,'yml','ecYeastGEMfull');
Expand All @@ -207,7 +208,7 @@
modelY = loadConventionalGEM();
fluxData = loadFluxData;

% STEP 23 Example of various useful RAVEN functions
% STEP 21 Example of various useful RAVEN functions
% % Set the upper bound of reaction r_0001 to 10.
% ecModel = setParam(ecModel,'ub','r_0001',10);
% % Set the lower bound of reaction r_0001 to 0.
Expand All @@ -227,7 +228,7 @@
% % Export to Excel file (will not contain potential model.ec content)
% exportToExcelFormat(ecModel,'filename.xlsx');

% STEP 24 Selecting objective functions
% STEP 22 Selecting objective functions
ecModel = setParam(ecModel,'obj',params.bioRxn,1);
sol = solveLP(ecModel)
disp(['Growth rate reached: ' num2str(abs(sol.f))])
Expand All @@ -237,7 +238,7 @@
sol = solveLP(ecModel)
disp(['Minimum protein pool usage: ' num2str(abs(sol.f)) ' ug/gDCW'])

% STEP 25 Compare fluxes from ecModel and starting model
% STEP 23 Compare fluxes from ecModel and starting model
% Constrain with the same conditions to model and ecModel. We now fix the
% observed growth as lower bound ('min' in constrainFluxData) and allow 5%
% variability around the other measured fluxes.
Expand Down Expand Up @@ -268,13 +269,13 @@
ratioFlux(isnan(ratioFlux)) = 0; % Divisions by zero give NaN, reset to zero.
printFluxes(modelY,ratioFlux,false)

% STEP 26 Inspect enzyme usage
% STEP 24 Inspect enzyme usage
% Show the result from the earlier simulation, without mapping to
% non-ecModel.
usageData = enzymeUsage(ecModel,solEC.x);
usageReport = reportEnzymeUsage(ecModel,usageData,0.90);

% STEP 27 Perform (ec)FVA
% STEP 25 Perform (ec)FVA
% Perform FVA
[minFluxEc, maxFluxEc] = ecFVA(ecModel, modelY);
[minFluxY, maxFluxY] = ecFVA(modelY, modelY);
Expand Down
106 changes: 83 additions & 23 deletions src/geckomat/limit_proteins/flexibilizeProtConcs.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
% Flexibilize protein concentration of an ecModel with constrained with
% proteomics data. The upper bound of the protein usage reaction is
% changed, while the concentrations in ecModel.ec.concs remain unchanged.
% If no (more) limiting enzyme concentrations can be found, an attempt
% will be made to relax the protein pool exchange reaction. If this does
% increase the growth rate, it is encouraged to set the protein pool
% exchange unconstrained before running flexibilizeProtConcs. If relaxing
% the protein pool exchange does not increase the growth rate, then this
% is not due to enzyme constraints, but rather an issue with the
% metabolic network itself, or the set nutrient exchange is not
% sufficient.
%
% Input:
% model an ecModel in GECKO 3 format (with ecModel.ec structure)
Expand Down Expand Up @@ -61,6 +69,17 @@
iterPerEnzyme = inf;
end

bioRxnIdx = getIndexes(model,params.bioRxn,'rxns');
if model.ub(bioRxnIdx) < expGrowth
fprintf(['The upper bound of the biomass reaction was %s, while expGrowth '...
'is %s. The upper bound has been set to expGrowth.'],...
num2str(model.ub(bioRxnIdx)), num2str(expGrowth))
model.ub(bioRxnIdx) = expGrowth;
end

% Keep track if protein pool will be flexibilized
changedProtPool = false;

% In case the model have not been protein constrained
% model = constrainProtConcs(model);

Expand All @@ -83,34 +102,68 @@
% Get the control coefficients
[~, controlCoeffs] = getConcControlCoeffs(model, proteins, foldChange, 0.75);

% Find the maximum coefficient index
[~,maxIdx] = max(controlCoeffs);
% Stop looking for limiting proteins all coeffs are zero
if any(controlCoeffs)

% Find the maximum coefficient index
[~,maxIdx] = max(controlCoeffs);

% Increase +1 the frequence
frequence(maxIdx) = frequence(maxIdx) + 1;
% Increase +1 the frequence
frequence(maxIdx) = frequence(maxIdx) + 1;

% Allow to increase the UB maximum five times
if frequence(maxIdx) <= iterPerEnzyme
% Allow to increase the UB maximum five times
if frequence(maxIdx) <= iterPerEnzyme

% Set how much will increase the UB
increase = foldChange*frequence(maxIdx);
% Set how much will increase the UB
increase = foldChange*frequence(maxIdx);

% Get usage rxn for the highest coeff
protUsageIdx = strcmpi(model.rxns, strcat('usage_prot_', proteins(maxIdx)));
% Get usage rxn for the highest coeff
protUsageIdx = strcmpi(model.rxns, strcat('usage_prot_', proteins(maxIdx)));

% replace the UB
model.lb(protUsageIdx) = -(model.ec.concs(protConcs(maxIdx)) * (1+increase));
% replace the UB
model.lb(protUsageIdx) = -(model.ec.concs(protConcs(maxIdx)) * (1+increase));

% Get the new growth rate
sol = solveLP(model,0,[],hs);
predGrowth = abs(sol.f);
% Get the new growth rate
sol = solveLP(model,0,[],hs);
predGrowth = abs(sol.f);

if verbose
disp(['Protein ' proteins{maxIdx} ' LB adjusted. Grow: ' num2str(predGrowth)])
if verbose
disp(['Protein ' proteins{maxIdx} ' LB adjusted. Grow: ' num2str(predGrowth)])
end
else
disp(['Limit has been reached. Protein ' proteins{maxIdx} ' seems to be problematic. Consider changing the kcat.'])
flexBreak=true;
break
end
else
disp(['Limit has been reached. Protein ' proteins{maxIdx} ' seems to be problematic. Consider changing the kcat '])
flexBreak=true;
disp('No (more) limiting proteins have been found. Attempt to increase protein pool exchange.')
protPoolIdx = getIndexes(model,'prot_pool_exchange','rxns');
oldProtPool = -model.lb(protPoolIdx);
tempModel = setParam(model,'lb',protPoolIdx,-1000);
tempModel = setParam(tempModel,'ub',bioRxnIdx,expGrowth);
sol = solveLP(tempModel);
if (abs(sol.f)-predGrowth)>1e-10 % There is improvement in growth rate
% Find new protein pool constraint
predGrowth = abs(sol.f);
tempModel = setParam(tempModel,'lb',bioRxnIdx,predGrowth);
tempModel = setParam(tempModel,'obj',protPoolIdx,1);
sol = solveLP(tempModel);
newProtPool = abs(sol.f);
model.lb(protPoolIdx) = -newProtPool;
fprintf(['Changing the lower bound of protein pool exchange from %s ',...
'to %s enabled a\ngrowth rate of %s. It can be helpful to set ', ...
'the lower bound of protein\npool exchange to -1000 before ',...
'running flexibilizeProtConcs.\n'],num2str(-oldProtPool), ...
num2str(-newProtPool),num2str(predGrowth));
changedProtPool = true;
else
fprintf(['Protein pool exchange not limiting. Inability to reach growth ',...
'rate is not related to\nenzyme constraints. Maximum growth rate ',...
'is %s.\n'], num2str(predGrowth))
end
% To return an usable model with these conditions, set the
% highest growth rate reached as the experimental value
expGrowth = predGrowth;
break
end
end
Expand All @@ -123,12 +176,12 @@
ecProtId = find(ismember(model.ec.enzymes,protFlex));
oldConcs = model.ec.concs(ecProtId);

if flexBreak == false
if flexBreak == false && ~isempty(protFlex)
% Not all flexibilized proteins require flexibilization in the end
% Test flux distribution with minimum prot_pool usage
modelTemp = setParam(model,'eq',params.bioRxn,expGrowth);
modelTemp = setParam(modelTemp,'obj','prot_pool_exchange',-1);
sol = solveLP(modelTemp,1,[],hs);
modelTemp = setParam(model,'var',params.bioRxn,expGrowth,0.5);
modelTemp = setParam(modelTemp,'obj','prot_pool_exchange',1);
sol = solveLP(modelTemp,1);
% Check which concentrations can remain unchanged
newConcs = abs(sol.x(protUsageIdx));
keepOld = newConcs<oldConcs;
Expand All @@ -155,3 +208,10 @@
flexProt.flexConcs = abs(model.lb(protUsageIdx));
flexProt.frequence = frequence(frequence>0);
end
if changedProtPool
flexProt.uniprotIDs(end+1) = {'prot_pool'};
flexProt.oldConcs(end+1) = oldProtPool;
flexProt.flexConcs(end+1) = newProtPool;
flexProt.frequence(end+1) = 1;
end
end
7 changes: 5 additions & 2 deletions src/geckomat/limit_proteins/getConcControlCoeffs.m
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@
[tempSol,hs] = solveLP(tempModel,0,[],hs);
tempGrowth = abs(tempSol.f);

% Calculate the coeff
controlCoeffs(i) = (tempGrowth-initialGrowth)/(prevConc-newConc);
% Calculate the coeff only if new growth rate is significantly
% higher than initial value
if (tempGrowth-initialGrowth)>1e-10
controlCoeffs(i) = (tempGrowth-initialGrowth)/(prevConc-newConc);
end
end

end
Expand Down
2 changes: 1 addition & 1 deletion src/geckomat/limit_proteins/updateProtPool.m
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@
else
error('The total measured protein mass exceeds the total protein content.')
end
newPtot = PdiffEnz / 1000 / params.f
newPtot = PdiffEnz / 1000 / params.f;
end

0 comments on commit 60ef6ea

Please sign in to comment.