Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix non sulfered molecular detection and remove mz < mzmincorestructure #34

Merged
merged 2 commits into from
Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions brassinetGenouest/Readme.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,38 @@
### Genouest
# Genouest jobs

## Batch job

check [slurm job](./slurm_genouest.sh)

```shell
sbatch --mem=20G slurm_genouest.sh
```
### Check job

```shell
squeue -u ofilangi
```

## Interactive job


```shell
srun --mem=20G --pty bash
srun --mem=20G --cpus-per-task=8 --pty bash
```

sbatch --mem=20G script.sh
### Set environment

```shell
. /local/env/envconda.sh
export PATH=$HOME/bin:$PATH
conda activate /home/genouest/inra_umr1349/ofilangi/sbt_env
```

### Example with blanc dataset

```shell
FILES="/groups/arch_igepp/metabolomics/MassSpectrometerOutputFile/brassimet/202111/Racines/MzXML*Neg/Brassinet_Racine_blanc-2_Neg_01_8540.mzXML"
java -cp ../assembly/pack.jar fr.inrae.metabolomics.p2m2.MainDetection $FILES
```

squeue -u ofilangi
5 changes: 3 additions & 2 deletions src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ object MainDetection extends App {
endRT: Option[Double] = None,
overrepresentedPeak: Int = 800,
precisionMzh: Int = 1000,
toleranceMz: Double = 0.01,
toleranceMz: Double = 0.005,
warmup: Double = 0.50, // (30 sec)
outfile: Option[String] = None,
verbose: Boolean = false,
Expand Down Expand Up @@ -206,7 +206,8 @@ object MainDetection extends App {
nbCarbonMax = confJson.numberCarbonMax(family),
nbSulfurMin = confJson.numberSulfurMin(family),
nbSulfurMax = confJson.numberSulfurMax(family),
config.toleranceMz,
minMzCoreStructure = confJson.minMzCoreStructure(family),
precisionDeltaM0M2 = config.toleranceMz,
deltaMOM2 = confJson.deltaMp0Mp2(family))

/**
Expand Down
30 changes: 15 additions & 15 deletions src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import umich.ms.datatypes.spectrum.ISpectrum
import umich.ms.fileio.filetypes.mzxml._

import java.io.File
import scala.Double.NaN
import scala.jdk.CollectionConverters._
import scala.math.sqrt
import scala.util.{Success, Try}
Expand Down Expand Up @@ -150,17 +149,18 @@ case object ScanLoader {
}

def selectEligibleIons(
source: MZXMLFile,
index: MZXMLIndex,
start : Option[Double] = None,
end : Option[Double] = None,
noiseIntensity : Double,
nbCarbonMin: Double,
nbCarbonMax: Double,
nbSulfurMin : Double,
nbSulfurMax : Double,
precision: Double = 0.01,
deltaMOM2 : Double
source: MZXMLFile,
index: MZXMLIndex,
start : Option[Double] = None,
end : Option[Double] = None,
noiseIntensity : Double,
nbCarbonMin: Double,
nbCarbonMax: Double,
nbSulfurMin : Double,
nbSulfurMax : Double,
minMzCoreStructure : Double,
precisionDeltaM0M2: Double,
deltaMOM2 : Double
): Seq[PeakIdentification] = {
println("\n== Search for isotopes sulfur == ")
// the file using those numbers. We need the raw scan numbers (the numbers
Expand All @@ -178,8 +178,8 @@ case object ScanLoader {
// remove the first one to compute Delta M
mzValues
.zipWithIndex
.filter { case (_, idx) =>
spectrum.getIntensities()(idx) > noiseIntensity
.filter { case (mz0, idx) =>
(spectrum.getIntensities()(idx) > noiseIntensity) && (mz0>minMzCoreStructure)
}
.map { case (mz0, idx0) =>
val mz_ms_p2 = mz0 + deltaMOM2
Expand All @@ -190,7 +190,7 @@ case object ScanLoader {
(mz0, idx0, mz1, idx1, mz_p2, idx2)
}
.filter { case (mz, _, _, _, mz2, _) =>
((mz - mz2).abs - deltaMOM2).abs < precision
((mz - mz2).abs - deltaMOM2).abs < precisionDeltaM0M2
}
/* criteria M1 of Isotope C are present at 1.1 and S are present 4.4 % */
.filter { case (_, idx0, _, idx1, _, _) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ case object CsvIonsIdentificationFile {
bw.write(s"CHEBI;")
//bw.write(s"BRACHEMDB;")
bw.write(s"BRASSICA;")
bw.write("mz threshold;")
bw.write("Nb (NL+DI);")
bw.write(s"NL;")
bw.write(s"DI;")
Expand Down Expand Up @@ -72,11 +71,6 @@ case object CsvIonsIdentificationFile {
}) + "[R="+ ChemicalUtils.correlation(m.formula,metabolitesIdentificationId.ion.peaks.map( p=> p.abundance)) + "]"
}
bw.write(namesAndR.mkString(",")+";")
if ( metabolitesIdentificationId.ion.peaks.head.mz < configJson.minMzCoreStructure(familyMetabolite) ) {
bw.write("*;")
} else
bw.write(";")

bw.write(s"${metabolitesIdentificationId.scoreIdentification};")

neutralLosses
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package fr.inrae.metabolomics.p2m2.builder
import utest.{TestSuite, Tests, test}

import java.io.File
import scala.Double.NaN

object ScanLoaderTest extends TestSuite {
val tests = Tests {
Expand All @@ -28,6 +27,8 @@ object ScanLoaderTest extends TestSuite {
nbCarbonMax=0.0,
2.0,
5.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
)
assert(v2.isEmpty)
Expand All @@ -46,6 +47,8 @@ object ScanLoaderTest extends TestSuite {
nbCarbonMax = 25.0,
2.0,
0.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
)
assert(v2.isEmpty)
Expand All @@ -64,6 +67,8 @@ object ScanLoaderTest extends TestSuite {
nbCarbonMax = 25.0,
2.0,
1.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
)
assert(v2.isEmpty)
Expand All @@ -83,6 +88,8 @@ object ScanLoaderTest extends TestSuite {
nbCarbonMax = 25.0,
2.0,
5.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
)
assert(v2.nonEmpty)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ object DaughterIonsDiagTest extends TestSuite {
val tests : Tests = Tests {
val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath))
val v2 = {
(ScanLoader.selectEligibleIons(
ScanLoader.selectEligibleIons(
v._1,
v._2,
Some(11.5), // RT start
Expand All @@ -19,8 +19,10 @@ object DaughterIonsDiagTest extends TestSuite {
nbCarbonMax = 20.0,
nbSulfurMin = 2.0,
nbSulfurMax = 5.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
))
)
}

test("test") {
Expand All @@ -35,8 +37,8 @@ object DaughterIonsDiagTest extends TestSuite {
).foldLeft(Map[Int,Int]())(
(acc : Map[Int,Int],v : Seq[Int]) => {
v.map( p2 => acc.get(p2) match {
case Some(s) => (p2 -> (s + 1))
case None => (p2 -> 1)
case Some(s) => p2 -> (s + 1)
case None => p2 -> 1
}).toMap
}
).toSeq.sortBy(_._2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ object CsvIonsIdentificationFileTest extends TestSuite {
nbCarbonMax = 20.0,
nbSulfurMin = 2.0,
nbSulfurMax = 5.0,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ object IonsIdentificationTest extends TestSuite {
nbCarbonMax = 20,
nbSulfurMin = 2,
nbSulfurMax = 5,
minMzCoreStructure = 0.01,
precisionDeltaM0M2 = 0.001,
deltaMOM2 = 1.996,
)) {
assert(Try(read[PeakIdentification](write(elem))).isSuccess)
Expand Down