Skip to content

Commit

Permalink
cherry-picking last commit of ptm stoich to clean commits after rebas…
Browse files Browse the repository at this point in the history
…ing to master and matching content
  • Loading branch information
pcruzparri committed Feb 16, 2025
1 parent 42e308f commit 0184816
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 68 deletions.
3 changes: 3 additions & 0 deletions mzLib/FlashLFQ/FlashLFQResults.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using Easy.Common.Extensions;
using MathNet.Numerics.Statistics;
using MzLibUtil;
using Proteomics;
using System;
using System.Collections.Generic;
using System.IO;
Expand All @@ -14,6 +16,7 @@ public class FlashLfqResults
public readonly Dictionary<string, Peptide> PeptideModifiedSequences;
public readonly Dictionary<string, ProteinGroup> ProteinGroups;
public readonly Dictionary<SpectraFileInfo, List<ChromatographicPeak>> Peaks;
public Dictionary<string, MzLibUtil.UtilProteinGroup> ModInfo { get; private set; }
private readonly HashSet<string> _peptideModifiedSequencesToQuantify;
public string PepResultString { get; set; }
public double MbrQValueThreshold { get; set; }
Expand Down
3 changes: 3 additions & 0 deletions mzLib/FlashLFQ/FlashLfqEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ public FlashLfqResults Run()
// do top3 protein quantification
_results.CalculateProteinResultsMedianPolish(UseSharedPeptidesForProteinQuant);

// calculate ptm occupancy at the peptide level
_results.CalculatePTMOccupancy();

// do Bayesian protein fold-change analysis
if (BayesianProteinQuant)
{
Expand Down
15 changes: 14 additions & 1 deletion mzLib/FlashLFQ/Peptide.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Collections.Generic;
using Easy.Common.Extensions;
using System.Collections.Generic;
using System.Linq;
using System.Text;

Expand Down Expand Up @@ -67,6 +68,18 @@ public void SetIntensity(SpectraFileInfo fileInfo, double intensity)
}
}

public double GetTotalIntensity()
{
if (Intensities.IsNotNullOrEmpty())
{
return Intensities.Sum(i => i.Value);
}
else
{
return 0;

Check warning on line 79 in mzLib/FlashLFQ/Peptide.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/FlashLFQ/Peptide.cs#L78-L79

Added lines #L78 - L79 were not covered by tests
}
}

public DetectionType GetDetectionType(SpectraFileInfo fileInfo)
{
if (DetectionTypes.TryGetValue(fileInfo, out DetectionType detectionType))
Expand Down
17 changes: 5 additions & 12 deletions mzLib/MzLibUtil/PositionFrequencyAnalysis.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ public UtilProteinGroup(string name, Dictionary<string, UtilProtein> proteins =
}
}
public class PositionFrequencyAnalysis
{
{

public Dictionary<string, UtilProteinGroup> Occupancy { get; private set; }

/// <summary>
/// Calculates the occupancy of post-translational modifications at the peptide level.
/// </summary>
Expand All @@ -147,11 +150,7 @@ public class PositionFrequencyAnalysis
/// <returns> A nested dictionary whose key mappings are as follows: string ProteinGroup-> string Protein-> string BaseSequence-> int ModifiedAminoAcidIndex-> string ModificationName-> double Intensity
/// Note: Each BaseSequence dictionary contains a ModifiedAminoAcidIndex key of -1 that then contains a ModificationName key called "Total" that is used to track the total intensity observed for
/// all of the amino acids in that peptide.</returns>
///

public Dictionary<string, UtilProteinGroup> Occupancy { get; private set; }


///
public void ProteinGroupsOccupancyByPeptide(List<(string fullSeq, string baseSeq, List<string> proteinGroup, double intensity)> peptides, bool modOnNTerminus = true, bool modOnCTerminus = true, bool ignoreTerminusMod=false)
{
var proteinGroups = new Dictionary<string, UtilProteinGroup>();
Expand Down Expand Up @@ -228,11 +227,5 @@ public void ProteinGroupsOccupancyByProtein(Dictionary<string, string> proteinSe
{
throw new NotImplementedException();

Check warning on line 228 in mzLib/MzLibUtil/PositionFrequencyAnalysis.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/MzLibUtil/PositionFrequencyAnalysis.cs#L227-L228

Added lines #L227 - L228 were not covered by tests
}

public void ChangePeptideToProteinOccupancyIndex(string proteinGroupName, string proteinName, string peptide, int OneBasedStartResidue)
{
Occupancy[proteinGroupName].OccupancyLevel = "protein";
Occupancy[proteinGroupName].Proteins[proteinName].Peptides[peptide].PeptideToProteinPositions(OneBasedStartResidue);
}
}
}
60 changes: 8 additions & 52 deletions mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Text.RegularExpressions;
using Chemistry;
using Omics.Fragmentation.Peptide;
using MzLibUtil;

namespace Omics.SpectrumMatch
{
Expand Down Expand Up @@ -92,58 +93,15 @@ public static string RemoveParentheses(string baseSequence)
}

/// <summary>
/// Parses the full sequence to identify mods
/// Parses the full sequence to identify mods.
/// </summary>
/// <param name="fullSequence"> Full sequence of the peptide in question</param>
/// <param name="fullSeq"> Full sequence of the peptide in question</param>
/// <param name="modOnNTerminus"> If true, the index of modifications at the N-terminus will be 0 (zero-based indexing). Otherwise, it is the index of the first amino acid (one-based indexing).</param>
/// <param name="modOnCTerminus"> If true, the index of modifications at the C-terminus will be one more than the index of the last amino acid. Otherwise, it is the index of the last amino acid.</param>
/// <returns> Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod</returns>
public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
public static Dictionary<int, List<string>> ParseModifications(string fullSeq, bool modOnNTerminus = true, bool modOnCTerminus = true)
{
// use a regex to get all modifications
string pattern = @"\[(.+?)\]";
Regex regex = new(pattern);

// remove each match after adding to the dict. Otherwise, getting positions
// of the modifications will be rather difficult.
//int patternMatches = regex.Matches(fullSeq).Count;
Dictionary<int, List<string>> modDict = new();


// If there is a missed cleavage, then there will be a label on K and a Label on X modification.
// It'll be like [label]|[label] which complicates the positional stuff a little bit. Therefore,
// RemoveSpecialCharacters will remove the "|", to ease things later on.
RemoveSpecialCharacters(ref fullSeq);
MatchCollection matches = regex.Matches(fullSeq);
int captureLengthSum = 0;
foreach (Match match in matches)
{
GroupCollection group = match.Groups;
string val = group[1].Value;
int startIndex = group[0].Index;
int captureLength = group[0].Length;

List<string> modList = new List<string>();
modList.Add(val);

// The position of the amino acids is tracked by the positionToAddToDict variable. It takes the
// startIndex of the modification Match and removes the cumulative length of the modifications
// found (including the brackets). The difference will be the number of nonmodification characters,
// or the number of amino acids prior to the startIndex in the sequence.
int positionToAddToDict = startIndex - captureLengthSum;

// check to see if key already exist
// if the already key exists, update the current position with the capture length + 1.
// otherwise, add the modification to the dict.
if (modDict.ContainsKey(positionToAddToDict))
{
modDict[positionToAddToDict].Add(val);
}
else
{
modDict.Add(positionToAddToDict, modList);
}
captureLengthSum += captureLength;
}
return modDict;
return fullSeq.ParseModifications(modOnNTerminus, modOnCTerminus);
}

/// <summary>
Expand All @@ -155,9 +113,7 @@ public static Dictionary<int, List<string>> ParseModifications(string fullSeq)
/// <returns></returns>
public static void RemoveSpecialCharacters(ref string fullSeq, string replacement = @"", string specialCharacter = @"\|")
{
// next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
Regex regexSpecialChar = new(specialCharacter);
fullSeq = regexSpecialChar.Replace(fullSeq, replacement);
MzLibUtil.ClassExtensions.RemoveSpecialCharacters(ref fullSeq, replacement, specialCharacter);
}


Expand Down
4 changes: 2 additions & 2 deletions mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ public static void TestOutputToCustomDirectoryAndNameMzML()
{
// output to a different directory than the files were originally in
Parameters.OutputType = OutputType.MzML;
string customDestinationDirectory = Path.Combine(OutputDirectory, "NewTestingDirectory");
string customDestinationDirectory2 = Path.Combine(OutputDirectory, "NewTestingDirectory2");
string customDestinationDirectory = Path.Combine(OutputDirectory, "NewAveragedTestingDirectory");
string customDestinationDirectory2 = Path.Combine(OutputDirectory, "NewAveragedTestingDirectory2");
Directory.CreateDirectory(customDestinationDirectory);
string customName = "AveragedSpectra";

Expand Down
2 changes: 1 addition & 1 deletion mzLib/Test/FileReadingTests/TestPsmFromTsv.cs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ public static void TestParseModification()

// psm with two mods on the same amino acid
string fullSeq = "[Common Fixed:Carbamidomethyl on C]|[UniProt:N-acetylserine on S]KPRKIEEIKDFLLTARRKDAKSVKIKKNKDNVKFK";
modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq);
modDict = Omics.SpectrumMatch.SpectrumMatchFromTsv.ParseModifications(fullSeq, true, true);
Assert.That(modDict.Count == 1);
Assert.That(modDict.ContainsKey(0));
Assert.That(modDict[0].Count == 2);
Expand Down
1 change: 1 addition & 0 deletions mzLib/Test/TestMzLibUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Assert = NUnit.Framework.Legacy.ClassicAssert;
using MzLibUtil;
using Readers;
using System.Collections.Generic;

namespace Test
{
Expand Down
6 changes: 6 additions & 0 deletions mzLib/TestFlashLFQ/TestFlashLFQ.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,12 @@ public static void TestFlashLfqQoutputRealData()
var peaks = results.Peaks.Values.ToList();
var peptides = results.PeptideModifiedSequences.Values.ToList();
var proteins = results.ProteinGroups.Values.ToList();
var modInfo = results.ModInfo;

Assert.AreEqual(6989789.488346225, peptides[0].GetTotalIntensity(), 0.0000001);
Assert.AreEqual(726036.539062, peptides[4].GetTotalIntensity(), 0.000001);
Assert.AreEqual(726036.539062, modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].ModifiedAminoAcidPositions[4]["Common Variable:Oxidation on M"].Intensity, 0.000001);
Assert.AreEqual(modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].Intensity, modInfo["Q7KZF4"].Proteins["Q7KZF4"].Peptides["EYGMIYLGK"].ModifiedAminoAcidPositions[4]["Common Variable:Oxidation on M"].Intensity, 0.000001);

Assert.AreEqual(4, peaks[0].Count(m => m.IsMbrPeak == false));
Assert.AreEqual(5, peaks[1].Count(m => m.IsMbrPeak == false));
Expand Down

0 comments on commit 0184816

Please sign in to comment.