-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathnanoProdWrapper.py
140 lines (129 loc) · 6.74 KB
/
nanoProdWrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Crab wrapper.
import os
import sys
import yaml
import FWCore.ParameterSet.Config as cms
from FWCore.ParameterSet.VarParsing import VarParsing
print("Running nanoProdWrapper.py", file=sys.stderr)
print("Arguments: ", sys.argv, file=sys.stderr)
options = VarParsing('analysis')
options.register('sampleType', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"Indicates the sample type: data or mc")
options.register('era', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"Indicates era: Run2_2016_HIPM, Run2_2016, Run2_2017, Run2_2018, Run3_2022, Run3_2022EE, Run3_2023, Run3_2023BPix")
options.register('mustProcessAllInputs', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool,
"To sucessfully finish, all inputs must be processed.")
options.register('keepIntermediateFiles', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool,
"Do not delete temporary files.")
options.register('maxRuntime', 20, VarParsing.multiplicity.singleton, VarParsing.varType.int,
"Maximal expected job runtime in hours.")
options.register('maxFiles', -1, VarParsing.multiplicity.singleton, VarParsing.varType.int,
"Maximal number of files to process.")
options.register('recoveryIndex', -1, VarParsing.multiplicity.singleton, VarParsing.varType.int,
"If task recovery index >= 0, it will be used as a suffix in output file names.")
options.register('customise', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"Production customization code (if any)")
options.register('customiseCmds', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"Production customization commands (if any)")
options.register('writePSet', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool,
"Dump configuration into PSet.py.")
options.register('processEachRunSeparately', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool,
"Run a separate cmsRun instance for each run.")
options.register('copyInputsToLocal', True, VarParsing.multiplicity.singleton, VarParsing.varType.bool,
"Copy inputs (one at the time) to a job working directory before processing them.")
options.register('inputDBS', 'global', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"DBS instance")
options.register('inputPFNSprefix', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"Custom pfns prefix for input files")
options.register('output', '', VarParsing.multiplicity.list, VarParsing.varType.string,
"""Output descriptions. Possible formats:
file
file;output_pfn
file;output_pfn;skim_cfg;skim_setup
file;output_pfn;skim_cfg;skim_setup;skim_setup_failed
""")
options.register('datasetFiles', '', VarParsing.multiplicity.singleton, VarParsing.varType.string,
"""Path to a JSON file with the dict of all files in the dataset.
It is used to assing file ids to the outputs.
If empty, indices of input files as specified in inputFiles are used.""")
options.parseArguments()
cond_mc = {
'Run2_2016_HIPM': 'auto:run2_mc_pre_vfp',
'Run2_2016': 'auto:run2_mc',
'Run2_2017': 'auto:phase1_2017_realistic',
'Run2_2018': 'auto:phase1_2018_realistic',
'Run3_2022': 'auto:phase1_2022_realistic',
'Run3_2022EE': 'auto:phase1_2022_realistic_postEE',
'Run3_2023': 'auto:phase1_2023_realistic',
'Run3_2023BPix': 'auto:phase1_2023_realistic_postBPix'
}
if options.era.startswith('Run2'):
cond_data = 'auto:run2_data'
era_str = options.era
era_mod = ',run2_nanoAOD_106Xv2'
elif options.era.startswith('Run3'):
cond_data_run3 = {
'Run3_2022': 'auto:run3_data',
'Run3_2023': 'auto:run3_data',
}
if options.sampleType == 'data':
cond_data = cond_data_run3[options.era]
era_str = 'Run3'
era_mod = ''
else:
raise RuntimeError(f'Unknown era = "{options.era}"')
if options.sampleType == 'data':
cond = cond_data
elif options.sampleType == 'mc':
cond = cond_mc[options.era]
else:
raise RuntimeError(f'Unknown sample type = "{options.sampleType}"')
process = cms.Process('NanoProd')
process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring(options.inputFiles))
process.options = cms.untracked.PSet(wantSummary = cms.untracked.bool(False))
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1))
if options.maxEvents > 0:
process.maxEvents.input = options.maxEvents
for output in options.output:
output = output.split(';')
if len(output) not in [1, 2, 4, 5]:
raise RuntimeError(f'Invalid output format: {output}')
while len(output) < 5:
output.append('')
file, output_pfn, skim_cfg, skim_setup, skim_setup_failed = output
if len(file) == 0:
raise RuntimeError(f'Empty output file name.')
if len(skim_cfg) > 0:
if len(skim_setup) == 0:
raise RuntimeError(f'skimCfg={skim_cfg}, but skimSetup is not specified.')
if os.path.isfile(skim_cfg):
with open(skim_cfg, 'r') as f:
skim_config = yaml.safe_load(f)
if skim_setup not in skim_config:
raise RuntimeError(f'Setup "{skim_setup}" not found in skimCfg={skim_cfg}.')
if len(skim_setup_failed) > 0 and skim_setup not in skim_config:
raise RuntimeError(f"Setup {skim_setup_failed} not found in skimCfg={skim_cfg}.")
else:
if len(skim_setup) > 0 or len(skim_setup_failed) > 0:
raise RuntimeError(f"Skim setup can not be specified without a skim configuration file.")
process.exParams = cms.untracked.PSet(
sampleType = cms.untracked.string(options.sampleType),
era = cms.untracked.string(era_str + era_mod),
cond = cms.untracked.string(cond),
customisationFunction = cms.untracked.string(options.customise),
customisationCommands = cms.untracked.string(options.customiseCmds),
mustProcessAllInputs = cms.untracked.bool(options.mustProcessAllInputs),
keepIntermediateFiles = cms.untracked.bool(options.keepIntermediateFiles),
jobModule = cms.untracked.string('crabJob_nanoProd.py'),
output = cms.untracked.vstring(options.output),
datasetFiles = cms.untracked.string(options.datasetFiles),
maxFiles = cms.untracked.int32(options.maxFiles),
recoveryIndex = cms.untracked.int32(options.recoveryIndex),
copyInputsToLocal = cms.untracked.bool(options.copyInputsToLocal),
inputDBS = cms.untracked.string(options.inputDBS),
inputPFNSprefix = cms.untracked.string(options.inputPFNSprefix),
processEachRunSeparately = cms.untracked.bool(options.processEachRunSeparately),
)
if options.writePSet:
with open('PSet.py', 'w') as f:
print(process.dumpPython(), file=f)