From c1a1f1379f8010500bdc23c935bec063b7fb5080 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Mon, 21 Nov 2022 11:13:55 +0100 Subject: [PATCH 1/2] fix non sulfered molecular detection and remove mz < mzmincorestructure --- brassinetGenouest/Readme.md | 31 +++++++++++++++++-- .../metabolomics/p2m2/MainDetection.scala | 5 +-- .../p2m2/builder/ScanLoader.scala | 30 +++++++++--------- .../export/CsvIonsIdentificationFile.scala | 6 ---- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/brassinetGenouest/Readme.md b/brassinetGenouest/Readme.md index 3ee9b48..37584ae 100644 --- a/brassinetGenouest/Readme.md +++ b/brassinetGenouest/Readme.md @@ -1,13 +1,38 @@ -### Genouest +# Genouest jobs +## Batch job + +check [slurm job](./slurm_genouest.sh) + +```shell +sbatch --mem=20G slurm_genouest.sh +``` +### Check job + +```shell +squeue -u ofilangi +``` + +## Interactive job + + +```shell srun --mem=20G --pty bash srun --mem=20G --cpus-per-task=8 --pty bash +``` -sbatch --mem=20G script.sh +### Set environment +```shell . /local/env/envconda.sh export PATH=$HOME/bin:$PATH conda activate /home/genouest/inra_umr1349/ofilangi/sbt_env +``` + +### Example with blanc dataset +```shell +FILES="/groups/arch_igepp/metabolomics/MassSpectrometerOutputFile/brassimet/202111/Racines/MzXML*Neg/Brassinet_Racine_blanc-2_Neg_01_8540.mzXML" +java -cp ../assembly/pack.jar fr.inrae.metabolomics.p2m2.MainDetection $FILES +``` -squeue -u ofilangi \ No newline at end of file diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala index 5b9eb7a..661fdec 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala @@ -24,7 +24,7 @@ object MainDetection extends App { endRT: Option[Double] = None, overrepresentedPeak: Int = 800, precisionMzh: Int = 1000, - toleranceMz: Double = 0.01, + toleranceMz: Double = 0.005, warmup: Double = 0.50, // (30 sec) outfile: Option[String] = None, verbose: Boolean = false, @@ -206,7 +206,8 @@ object MainDetection extends App { nbCarbonMax = confJson.numberCarbonMax(family), nbSulfurMin = confJson.numberSulfurMin(family), nbSulfurMax = confJson.numberSulfurMax(family), - config.toleranceMz, + minMzCoreStructure = confJson.minMzCoreStructure(family), + precisionDeltaM0M2 = config.toleranceMz, deltaMOM2 = confJson.deltaMp0Mp2(family)) /** diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index a09f3ff..6873ac6 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -6,7 +6,6 @@ import umich.ms.datatypes.spectrum.ISpectrum import umich.ms.fileio.filetypes.mzxml._ import java.io.File -import scala.Double.NaN import scala.jdk.CollectionConverters._ import scala.math.sqrt import scala.util.{Success, Try} @@ -150,17 +149,18 @@ case object ScanLoader { } def selectEligibleIons( - source: MZXMLFile, - index: MZXMLIndex, - start : Option[Double] = None, - end : Option[Double] = None, - noiseIntensity : Double, - nbCarbonMin: Double, - nbCarbonMax: Double, - nbSulfurMin : Double, - nbSulfurMax : Double, - precision: Double = 0.01, - deltaMOM2 : Double + source: MZXMLFile, + index: MZXMLIndex, + start : Option[Double] = None, + end : Option[Double] = None, + noiseIntensity : Double, + nbCarbonMin: Double, + nbCarbonMax: Double, + nbSulfurMin : Double, + nbSulfurMax : Double, + minMzCoreStructure : Double, + precisionDeltaM0M2: Double, + deltaMOM2 : Double ): Seq[PeakIdentification] = { println("\n== Search for isotopes sulfur == ") // the file using those numbers. We need the raw scan numbers (the numbers @@ -178,8 +178,8 @@ case object ScanLoader { // remove the first one to compute Delta M mzValues .zipWithIndex - .filter { case (_, idx) => - spectrum.getIntensities()(idx) > noiseIntensity + .filter { case (mz0, idx) => + (spectrum.getIntensities()(idx) > noiseIntensity) && (mz0>minMzCoreStructure) } .map { case (mz0, idx0) => val mz_ms_p2 = mz0 + deltaMOM2 @@ -190,7 +190,7 @@ case object ScanLoader { (mz0, idx0, mz1, idx1, mz_p2, idx2) } .filter { case (mz, _, _, _, mz2, _) => - ((mz - mz2).abs - deltaMOM2).abs < precision + ((mz - mz2).abs - deltaMOM2).abs < precisionDeltaM0M2 } /* criteria M1 of Isotope C are present at 1.1 and S are present 4.4 % */ .filter { case (_, idx0, _, idx1, _, _) => diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFile.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFile.scala index e212c00..e2197d3 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFile.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFile.scala @@ -30,7 +30,6 @@ case object CsvIonsIdentificationFile { bw.write(s"CHEBI;") //bw.write(s"BRACHEMDB;") bw.write(s"BRASSICA;") - bw.write("mz threshold;") bw.write("Nb (NL+DI);") bw.write(s"NL;") bw.write(s"DI;") @@ -72,11 +71,6 @@ case object CsvIonsIdentificationFile { }) + "[R="+ ChemicalUtils.correlation(m.formula,metabolitesIdentificationId.ion.peaks.map( p=> p.abundance)) + "]" } bw.write(namesAndR.mkString(",")+";") - if ( metabolitesIdentificationId.ion.peaks.head.mz < configJson.minMzCoreStructure(familyMetabolite) ) { - bw.write("*;") - } else - bw.write(";") - bw.write(s"${metabolitesIdentificationId.scoreIdentification};") neutralLosses From 9af8fe550c7c1d96eaf0be46d3551e402ef5a3dd Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Mon, 21 Nov 2022 13:34:49 +0100 Subject: [PATCH 2/2] fix test --- .../metabolomics/p2m2/builder/ScanLoaderTest.scala | 9 ++++++++- .../p2m2/diagnostic/DaughterIonsDiagTest.scala | 10 ++++++---- .../p2m2/export/CsvIonsIdentificationFileTest.scala | 2 ++ .../p2m2/output/IonsIdentificationTest.scala | 2 ++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala index 17549c2..5f6b6af 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala @@ -3,7 +3,6 @@ package fr.inrae.metabolomics.p2m2.builder import utest.{TestSuite, Tests, test} import java.io.File -import scala.Double.NaN object ScanLoaderTest extends TestSuite { val tests = Tests { @@ -28,6 +27,8 @@ object ScanLoaderTest extends TestSuite { nbCarbonMax=0.0, 2.0, 5.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 ) assert(v2.isEmpty) @@ -46,6 +47,8 @@ object ScanLoaderTest extends TestSuite { nbCarbonMax = 25.0, 2.0, 0.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 ) assert(v2.isEmpty) @@ -64,6 +67,8 @@ object ScanLoaderTest extends TestSuite { nbCarbonMax = 25.0, 2.0, 1.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 ) assert(v2.isEmpty) @@ -83,6 +88,8 @@ object ScanLoaderTest extends TestSuite { nbCarbonMax = 25.0, 2.0, 5.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 ) assert(v2.nonEmpty) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala index 0f4854c..9fd3824 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala @@ -9,7 +9,7 @@ object DaughterIonsDiagTest extends TestSuite { val tests : Tests = Tests { val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath)) val v2 = { - (ScanLoader.selectEligibleIons( + ScanLoader.selectEligibleIons( v._1, v._2, Some(11.5), // RT start @@ -19,8 +19,10 @@ object DaughterIonsDiagTest extends TestSuite { nbCarbonMax = 20.0, nbSulfurMin = 2.0, nbSulfurMax = 5.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 - )) + ) } test("test") { @@ -35,8 +37,8 @@ object DaughterIonsDiagTest extends TestSuite { ).foldLeft(Map[Int,Int]())( (acc : Map[Int,Int],v : Seq[Int]) => { v.map( p2 => acc.get(p2) match { - case Some(s) => (p2 -> (s + 1)) - case None => (p2 -> 1) + case Some(s) => p2 -> (s + 1) + case None => p2 -> 1 }).toMap } ).toSeq.sortBy(_._2) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFileTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFileTest.scala index 797c0fd..af8d1f8 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFileTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvIonsIdentificationFileTest.scala @@ -21,6 +21,8 @@ object CsvIonsIdentificationFileTest extends TestSuite { nbCarbonMax = 20.0, nbSulfurMin = 2.0, nbSulfurMax = 5.0, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996 ) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/output/IonsIdentificationTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/output/IonsIdentificationTest.scala index 4eafe0e..b15b46a 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/output/IonsIdentificationTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/output/IonsIdentificationTest.scala @@ -28,6 +28,8 @@ object IonsIdentificationTest extends TestSuite { nbCarbonMax = 20, nbSulfurMin = 2, nbSulfurMax = 5, + minMzCoreStructure = 0.01, + precisionDeltaM0M2 = 0.001, deltaMOM2 = 1.996, )) { assert(Try(read[PeakIdentification](write(elem))).isSuccess)