1 | #! /usr/bin/env python
|
---|
2 |
|
---|
3 | # derive exclusions for the tested scenario from the signal regions of an
|
---|
4 | # analysis implemented using MadAnalysis 5 (MA5), using a simplified procedure
|
---|
5 | # and the CLs prescription
|
---|
6 |
|
---|
7 | # version 1.1 (July 9, 2014)
|
---|
8 | # made by Beranger Dumont
|
---|
9 | # based on toy MC code by Benjamin Fuks, Chris Wymant and Sam Bein
|
---|
10 | # v1.1: BF added an efficiency map calculator
|
---|
11 |
|
---|
12 | # takes as input:
|
---|
13 | # -- an XML analysis_name.info file that should be present in the same directory
|
---|
14 | # as the analysis code, i.e. Build/SampleAnalyzer/User/Analyzer/
|
---|
15 | # -- the SAF files for the cutflows of the tested scenario of interest in all
|
---|
16 | # signal regions, where the information on acceptance*efficiency can be found
|
---|
17 | # -- the cross section for the tested scenario (if not given as argument in
|
---|
18 | # command-line, it is looked for in the SAF file analysis_name.saf
|
---|
19 |
|
---|
20 | # returns the output on the screen and print basic results
|
---|
21 | # in the file analysis_name_[run number].out in
|
---|
22 | # Output/benchmark_point/
|
---|
23 |
|
---|
24 |
|
---|
25 | # path to the directory where the analysis code and output is present
|
---|
26 | # (this is the directory created when running MA5 in expert mode,
|
---|
27 | # containing the "Build", "Input" and "Output" directories)
|
---|
28 | analysis_path = "./"
|
---|
29 |
|
---|
30 | # The number of Poisson distributions we consider (each one effectively being
|
---|
31 | # a toy experiment with its own certain prediction for the background):
|
---|
32 | numtoyexperiments = 100000
|
---|
33 |
|
---|
34 | #
|
---|
35 | # the user is not supposed to modify the code below this line
|
---|
36 | #
|
---|
37 |
|
---|
38 | import os, sys
|
---|
39 | try:
|
---|
40 | from lxml import ET
|
---|
41 | except:
|
---|
42 | import xml.etree.ElementTree as ET
|
---|
43 | import scipy.stats
|
---|
44 |
|
---|
45 | def usage():
|
---|
46 | print 'Usage: ./exclusion_CLs.py analysis_name benchmark_point ' + \
|
---|
47 | '[run_number] [cross section in pb]'
|
---|
48 | print 'Example: ./exclusion_CLs.py cms_sus_13_011 T2tt_650_50.txt ' + \
|
---|
49 | '0 0.014'
|
---|
50 | print 'Default value of run number is 0'
|
---|
51 | print 'If the cross section is not given, it is taken from the MA5 output'
|
---|
52 |
|
---|
53 | def listdirectory(path):
|
---|
54 | lfile=[]
|
---|
55 | for root, dirs, files in os.walk(path):
|
---|
56 | for i in files:
|
---|
57 | lfile.append(os.path.join(root, i))
|
---|
58 | return lfile
|
---|
59 |
|
---|
60 | bestSR = ""
|
---|
61 |
|
---|
62 | def clean_name(str):
|
---|
63 | # based on CleanName() from
|
---|
64 | # ./tools/SampleAnalyzer/Process/Core/SampleAnalyzer.cpp
|
---|
65 | # in MA5 v1.1.11beta4
|
---|
66 | str = str.replace("/", "_slash_")
|
---|
67 | str = str.replace("->", "_to_")
|
---|
68 | str = str.replace(">=", "_greater_than_or_equal_to_")
|
---|
69 | str = str.replace(">", "_greater_than_")
|
---|
70 | str = str.replace("<=", "_smaller_than_or_equal_to_")
|
---|
71 | str = str.replace("<", "_smaller_than_")
|
---|
72 | str = str.replace(" ", "_")
|
---|
73 | str = str.replace(",", "_")
|
---|
74 | str = str.replace("+", "_")
|
---|
75 | str = str.replace("-", "_")
|
---|
76 | str = str.replace("(", "_lp_")
|
---|
77 | str = str.replace(")", "_rp_")
|
---|
78 | return str
|
---|
79 |
|
---|
80 | def CLs(NumObserved, ExpectedBG, BGError, SigHypothesis, NumToyExperiments):
|
---|
81 | # generate a set of expected-number-of-background-events, one for each toy
|
---|
82 | # experiment, distributed according to a Gaussian with the specified mean
|
---|
83 | # and uncertainty
|
---|
84 | ExpectedBGs = scipy.stats.norm.rvs(loc=ExpectedBG, \
|
---|
85 | scale=BGError, size=NumToyExperiments)
|
---|
86 |
|
---|
87 | # Ignore values in the tail of the Gaussian extending to negative numbers
|
---|
88 | ExpectedBGs = [value for value in ExpectedBGs if value > 0]
|
---|
89 |
|
---|
90 | # For each toy experiment, get the actual number of background events by
|
---|
91 | # taking one value from a Poisson distribution created using the expected
|
---|
92 | # number of events.
|
---|
93 | ToyBGs = scipy.stats.poisson.rvs(ExpectedBGs)
|
---|
94 | ToyBGs = map(float, ToyBGs)
|
---|
95 |
|
---|
96 | # The probability for the background alone to fluctutate as LOW as
|
---|
97 | # observed = the fraction of the toy experiments with backgrounds as low as
|
---|
98 | # observed = p_b.
|
---|
99 | # NB (1 - this p_b) corresponds to what is usually called p_b for CLs.
|
---|
100 | p_b = scipy.stats.percentileofscore(ToyBGs, NumObserved, kind='weak')*.01
|
---|
101 |
|
---|
102 | # Toy MC for background+signal
|
---|
103 | ExpectedBGandS = [expectedbg + SigHypothesis for expectedbg in ExpectedBGs]
|
---|
104 | ToyBplusS = scipy.stats.poisson.rvs(ExpectedBGandS)
|
---|
105 | ToyBplusS = map(float, ToyBplusS)
|
---|
106 |
|
---|
107 | # Calculate the fraction of these that are >= the number observed,
|
---|
108 | # giving p_(S+B). Divide by (1 - p_b) a la the CLs prescription.
|
---|
109 | p_SplusB = scipy.stats.percentileofscore(ToyBplusS, NumObserved, kind='weak')*.01
|
---|
110 |
|
---|
111 | return 1.-(p_SplusB / p_b) # 1 - CLs
|
---|
112 |
|
---|
113 | def exclusion_check(crosssection):
|
---|
114 | global bestSR
|
---|
115 |
|
---|
116 | if len(signalregions) > 1:
|
---|
117 | # if more than one signal region, decide which signal regions
|
---|
118 | # yields the best expected limit
|
---|
119 | limit = -1.
|
---|
120 | for SR in signalregions:
|
---|
121 | nsignal = crosssection * lumi * 1000. * signalregions[SR]["acceff"]
|
---|
122 | nb = signalregions[SR]["nb"]
|
---|
123 | deltanb = signalregions[SR]["deltanb"]
|
---|
124 |
|
---|
125 | limitSR = CLs(nb, nb, deltanb, nsignal, numtoyexperiments)
|
---|
126 | if limitSR > limit:
|
---|
127 | bestSR = SR
|
---|
128 | limit = limitSR
|
---|
129 | else:
|
---|
130 | bestSR = signalregions.keys()[0]
|
---|
131 |
|
---|
132 | nsignal = crosssection * lumi * 1000. * signalregions[bestSR]["acceff"]
|
---|
133 | nobs = int(signalregions[bestSR]["nobs"])
|
---|
134 | nb = signalregions[bestSR]["nb"]
|
---|
135 | deltanb = signalregions[bestSR]["deltanb"]
|
---|
136 | bestCLs=CLs(nobs, nb, deltanb, nsignal, numtoyexperiments)
|
---|
137 |
|
---|
138 | print 'The best expected signal region is "' + bestSR + '".'
|
---|
139 | print 'It has: nobs = ' + str(nobs) + ', nb = ' + str(nb) + ' \pm ' + \
|
---|
140 | str(deltanb) + ', nsignal = ' + str(round(nsignal,2)) + '.'
|
---|
141 |
|
---|
142 | return bestCLs
|
---|
143 |
|
---|
144 | def exclusion_check95(crosssection):
|
---|
145 | return exclusion_check(crosssection)-0.95
|
---|
146 |
|
---|
147 | if analysis_path[-1] == "/":
|
---|
148 | analysis_path = analysis_path[:-1]
|
---|
149 |
|
---|
150 | # at least one argument, check if asking for help
|
---|
151 | if len(sys.argv) < 3 or (sys.argv[1] == "-h" or sys.argv[1] == "--help"):
|
---|
152 | usage()
|
---|
153 | sys.exit()
|
---|
154 |
|
---|
155 | analysis_name = sys.argv[1]
|
---|
156 | bench_name = sys.argv[2]
|
---|
157 | if len(sys.argv) > 3:
|
---|
158 | run_number = sys.argv[3]
|
---|
159 | else:
|
---|
160 | run_number = "0"
|
---|
161 | if len(sys.argv) > 4:
|
---|
162 | try:
|
---|
163 | xsection = float(sys.argv[4])
|
---|
164 | except ValueError:
|
---|
165 | print 'Invalid cross section given as command-line argument: "' + \
|
---|
166 | sys.argv[4]+ '".'
|
---|
167 | sys.exit()
|
---|
168 | else:
|
---|
169 | xsection = 0
|
---|
170 |
|
---|
171 |
|
---|
172 |
|
---|
173 | ######################################
|
---|
174 | # first, read the XML .info file
|
---|
175 | # and fill the variable lumi
|
---|
176 | # and the dictionary signalregions
|
---|
177 | ######################################
|
---|
178 |
|
---|
179 | lumi = 0 # integrated luminosity, in fb^-1
|
---|
180 | signalregions = {}
|
---|
181 |
|
---|
182 | analysisinfo_path = analysis_path + "/Build/SampleAnalyzer/User/Analyzer/" + \
|
---|
183 | analysis_name + ".info"
|
---|
184 |
|
---|
185 | try:
|
---|
186 | info_input = open(analysisinfo_path)
|
---|
187 | except IOError as e:
|
---|
188 | print 'I/O error({0}): {1}'.format(e.errno, e.strerror)
|
---|
189 | print 'Cannot open the XML info file "' + analysisinfo_path + '".'
|
---|
190 | sys.exit()
|
---|
191 |
|
---|
192 | info_tree = ET.parse(info_input)
|
---|
193 | info_input.close()
|
---|
194 |
|
---|
195 | info_root = info_tree.getroot()
|
---|
196 | if info_root.tag != "analysis":
|
---|
197 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
198 | print 'Root tag should be <analysis>, not <' + info_root.tag + '>.'
|
---|
199 | sys.exit()
|
---|
200 | if info_root.attrib["id"] != analysis_name:
|
---|
201 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
202 | print 'Analysis id in root tag <analysis> should be "' + analysis_name + \
|
---|
203 | '", not "' + info_root.attrib["id"] + '".'
|
---|
204 | sys.exit()
|
---|
205 |
|
---|
206 |
|
---|
207 | for child in info_root:
|
---|
208 | # for <lumi> tag
|
---|
209 | if child.tag == "lumi":
|
---|
210 | if lumi != 0:
|
---|
211 | print 'Warning: redefinition of the luminosity in the ' + \
|
---|
212 | 'XML info file "' + analysisinfo_path + '".'
|
---|
213 |
|
---|
214 | try:
|
---|
215 | lumi = float(child.text)
|
---|
216 | except TypeError: # empty tag is of type NULL
|
---|
217 | lumi = 0
|
---|
218 | except ValueError:
|
---|
219 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
220 | print 'The value of the <lumi> tag is not a number.'
|
---|
221 | sys.exit()
|
---|
222 |
|
---|
223 | # for the <region> tags
|
---|
224 | # if no type is specified, assumed to be a signal region
|
---|
225 | if child.tag == "region" and \
|
---|
226 | ("type" not in child.attrib or child.attrib["type"] == "signal"):
|
---|
227 | if "id" not in child.attrib:
|
---|
228 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
229 | print 'Presence of <region> tags with no id attribute.'
|
---|
230 | sys.exit()
|
---|
231 |
|
---|
232 | regionid = child.attrib["id"]
|
---|
233 |
|
---|
234 | if regionid in signalregions:
|
---|
235 | # a <region> tag with the same id has already been defined
|
---|
236 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
237 | print 'A region with id="' + regionid + ' is defined ' + \
|
---|
238 | 'multiple times.'
|
---|
239 | sys.exit()
|
---|
240 |
|
---|
241 | signalregions[regionid] = {"acceff": 0, "syst":0, "stat":0} # initialize efficiency to 0
|
---|
242 | for rchild in child:
|
---|
243 | if rchild.tag in ["nobs", "nb", "deltanb"]:
|
---|
244 | ntag = rchild.tag
|
---|
245 | if ntag in signalregions[regionid]:
|
---|
246 | print 'Warning: redefinition of <' + ntag + '> in the ' + \
|
---|
247 | 'region "' + \
|
---|
248 | regionid + '" of the XML info file "' + \
|
---|
249 | analysisinfo_path + '".'
|
---|
250 |
|
---|
251 | try:
|
---|
252 | signalregions[regionid][ntag] = float(rchild.text)
|
---|
253 | except TypeError: # empty tag is of type NULL
|
---|
254 | signalregions[regionid][ntag] = 0
|
---|
255 | except ValueError:
|
---|
256 | print 'Invalid XML info file "' + analysisinfo_path + '".'
|
---|
257 | print 'The value of the <' + ntag + '> tag in region "' + \
|
---|
258 | regionid + '" is not a number.'
|
---|
259 | sys.exit()
|
---|
260 |
|
---|
261 | ######################################
|
---|
262 | # then, read the SAF files
|
---|
263 | # for the cutflows
|
---|
264 | # generated by MA5
|
---|
265 | ######################################
|
---|
266 |
|
---|
267 | analysisinfo_path = analysis_path + "/Output/" + \
|
---|
268 | bench_name + "/" + analysis_name + "_" + run_number + "/Cutflows"
|
---|
269 |
|
---|
270 | listdir = listdirectory(analysisinfo_path)
|
---|
271 |
|
---|
272 | if not listdir:
|
---|
273 | print 'The directory "' + analysisinfo_path + '" containing the SAF' + \
|
---|
274 | ' files for the cutflows cannot be listed.'
|
---|
275 | sys.exit()
|
---|
276 |
|
---|
277 | for file in listdir:
|
---|
278 | # signal region (as defined in the XML info file)
|
---|
279 | # associated with the cutflow file
|
---|
280 | assoc_SR = ""
|
---|
281 |
|
---|
282 | if file[-4:] != ".saf":
|
---|
283 | continue
|
---|
284 | SRname = file.split("/")[-1][:-4]
|
---|
285 |
|
---|
286 | for regionid in signalregions:
|
---|
287 | for sr in regionid.split(";"):
|
---|
288 | if clean_name(sr) == SRname:
|
---|
289 | assoc_SR = sr
|
---|
290 |
|
---|
291 | # if there is no signal region found associated with the SAF file
|
---|
292 | if assoc_SR == "":
|
---|
293 | print 'Warning: no region found associated with the SAF file "' + \
|
---|
294 | file + '"; will be skipped.'
|
---|
295 | continue
|
---|
296 |
|
---|
297 | # otherwise, read the acceptance times efficiency from the SAF file
|
---|
298 | try:
|
---|
299 | SAF_cutflow = open(file)
|
---|
300 | except IOError as e:
|
---|
301 | print 'I/O error({0}): {1}'.format(e.errno, e.strerror)
|
---|
302 | print 'Cannot open the XML info file "' + file + '".'
|
---|
303 | sys.exit()
|
---|
304 |
|
---|
305 | in_initialcounter = False
|
---|
306 | in_counter = False
|
---|
307 | initialnum = 0
|
---|
308 | finalnum = 0
|
---|
309 | for line in SAF_cutflow:
|
---|
310 | line = line.rstrip("\n").strip()
|
---|
311 | if line[:16] == "<InitialCounter>":
|
---|
312 | in_initialcounter = True
|
---|
313 | continue
|
---|
314 | if line[:17] == "</InitialCounter>":
|
---|
315 | in_initialcounter = False
|
---|
316 | continue
|
---|
317 | if line[:9] == "<Counter>":
|
---|
318 | in_counter = True
|
---|
319 | continue
|
---|
320 | if line[:10] == "</Counter>":
|
---|
321 | in_counter = False
|
---|
322 | continue
|
---|
323 |
|
---|
324 | if in_initialcounter and line[-14:] == "sum of weights":
|
---|
325 | try:
|
---|
326 | initialnum = float(line.split()[0])
|
---|
327 | except:
|
---|
328 | print 'Invalid SAF file "' + file + '".'
|
---|
329 | print 'The initial number of events cannot be read.'
|
---|
330 | sys.exit()
|
---|
331 |
|
---|
332 | if in_counter and line[-14:] == "sum of weights":
|
---|
333 | try:
|
---|
334 | finalnum = float(line.split()[0])
|
---|
335 | except:
|
---|
336 | print 'Invalid SAF file "' + file + '".'
|
---|
337 | print 'The number of events cannot be read.'
|
---|
338 | sys.exit()
|
---|
339 |
|
---|
340 | if initialnum == 0:
|
---|
341 | print 'Invalid SAF file "' + file + '".'
|
---|
342 | print 'The number of events cannot be read.'
|
---|
343 | sys.exit()
|
---|
344 |
|
---|
345 | for regionid in signalregions:
|
---|
346 | # for each SR id as defined in the XML info file...
|
---|
347 | for individualSR in regionid.split(";"):
|
---|
348 | # ...look for each individual SR...
|
---|
349 | if individualSR == assoc_SR:
|
---|
350 | # ... if it matches with the SAF file read, add the
|
---|
351 | # acceptance*efficiency to existing value
|
---|
352 | signalregions[regionid]["acceff"] += finalnum / initialnum
|
---|
353 | signalregions[regionid]["stat"] += finalnum
|
---|
354 |
|
---|
355 | ######################################
|
---|
356 | # then, get the cross section info
|
---|
357 | # if not given as command-line
|
---|
358 | # argument, look into SAF file
|
---|
359 | ######################################
|
---|
360 |
|
---|
361 | if xsection == 0: # not given as command-line argument
|
---|
362 | mainSAF_path = analysis_path + "/Output/" + \
|
---|
363 | bench_name + "/" + bench_name + ".saf"
|
---|
364 |
|
---|
365 | try:
|
---|
366 | mainSAF = open(mainSAF_path)
|
---|
367 | except IOError as e:
|
---|
368 | print 'I/O error({0}): {1}'.format(e.errno, e.strerror)
|
---|
369 | print 'Cannot open the XML info file "' + mainSAF_path + '".'
|
---|
370 | sys.exit()
|
---|
371 |
|
---|
372 | nline = 0
|
---|
373 | for line in mainSAF:
|
---|
374 | line = line.rstrip("\n").strip()
|
---|
375 | if line[:18] == "<SampleGlobalInfo>":
|
---|
376 | nline = 1
|
---|
377 | continue
|
---|
378 | if nline > 0:
|
---|
379 | nline += 1
|
---|
380 | if nline == 3:
|
---|
381 | try:
|
---|
382 | xsection = float(line.split()[0])
|
---|
383 | except:
|
---|
384 | print 'Invalid SAF file "' + mainSAF_path + '".'
|
---|
385 | print 'The cross section cannot be read.'
|
---|
386 | sys.exit()
|
---|
387 |
|
---|
388 | if xsection <= 0.:
|
---|
389 | print 'Invalid cross section of ' + str(xsection) + ' pb.'
|
---|
390 | print 'The cross section cannot be read from the SAF file "' + \
|
---|
391 | mainSAF_path + '".'
|
---|
392 | sys.exit()
|
---|
393 | break
|
---|
394 |
|
---|
395 | ######################################
|
---|
396 | # now, we have the information on the
|
---|
397 | # cross section (variable xsection),
|
---|
398 | # the luminosity (variable lumi),
|
---|
399 | # the number of background events,
|
---|
400 | # observed events, and the acceptance
|
---|
401 | # times efficiency (in the dictionary
|
---|
402 | # signalregions)
|
---|
403 | #
|
---|
404 | # we can proceed with the exclusion
|
---|
405 | ######################################
|
---|
406 |
|
---|
407 | if xsection > 0:
|
---|
408 | # first, decide which signal regions yields the best expected limit
|
---|
409 | final_limit = exclusion_check(xsection)
|
---|
410 |
|
---|
411 | print '\nThis signal is excluded at the ' + str(round(final_limit*100.,1)) + \
|
---|
412 | '% CL (CLs=' + str(round(1-final_limit,3)) + ').'
|
---|
413 | else:
|
---|
414 | # if a negative cross section is given as input,
|
---|
415 | # the code is looking for the cross section that is excluded at 95% CL
|
---|
416 | # using a root-finding algorithm
|
---|
417 |
|
---|
418 | print 'Negative cross section is given.'
|
---|
419 | print 'Will look for the cross section that is excluded at 95% CL'
|
---|
420 | print 'using a root-finding algorithm.\n'
|
---|
421 |
|
---|
422 | # need to tune the lower and upper bound (corresponding to a cross section
|
---|
423 | # that we know is not excluded or excluded, respectively)
|
---|
424 | lowerb = 1. # 1 pb
|
---|
425 | upperb = 1. # 1 pb
|
---|
426 | while exclusion_check95(lowerb) > 0.:
|
---|
427 | lowerb *= 0.1
|
---|
428 | while exclusion_check95(upperb) < 0.:
|
---|
429 | upperb *= 10.
|
---|
430 |
|
---|
431 | print '\nlower and upper bounds for the root-finding algorithm' + \
|
---|
432 | ' have been found: [%.2e %.2e] pb\n' % (lowerb, upperb)
|
---|
433 |
|
---|
434 | final_limit = scipy.optimize.brentq(exclusion_check95, lowerb, upperb)
|
---|
435 |
|
---|
436 | print '\nThe excluded cross section at 2 sigma is %.5E pb.' % final_limit
|
---|
437 |
|
---|
438 | # finally, write the results in an output file
|
---|
439 | output_path = analysis_path + "/Output/" + \
|
---|
440 | bench_name + "/" + analysis_name + "_" + run_number + ".out"
|
---|
441 |
|
---|
442 | try:
|
---|
443 | output = open(output_path, "w")
|
---|
444 | except IOError as e:
|
---|
445 | print 'I/O error({0}): {1}'.format(e.errno, e.strerror)
|
---|
446 | print 'Cannot create the output file "' + output_path + '".'
|
---|
447 | sys.exit()
|
---|
448 |
|
---|
449 | output.write(bestSR+'\n'+str(final_limit))
|
---|
450 | output.close()
|
---|
451 |
|
---|
452 | # Efficiencies
|
---|
453 | import math
|
---|
454 | out = open('efficiencies_'+analysis_name+'.dat','w')
|
---|
455 | Title=['Signal region', 'Efficiency', 'Stat. error', 'Syst. error', 'Total error', 'Best region']
|
---|
456 | out.write(Title[0].center(50) + Title[1].center(16) + Title[2].center(21) + Title[3].center(16) +\
|
---|
457 | Title[4].center(18)+ Title[5].rjust(10)+'\n')
|
---|
458 | for region in signalregions:
|
---|
459 | if signalregions[region]["stat"]!=0:
|
---|
460 | signalregions[region]["stat"] =1./math.sqrt(float(signalregions[region]["stat"]))
|
---|
461 | signalregions[region]["syst"] = float(signalregions[region]["syst"])
|
---|
462 | IsBest=False;
|
---|
463 | if region is bestSR:
|
---|
464 | IsBest=True
|
---|
465 | out.write(region.replace(' ','').ljust(50) + "%1.5f".center(15) %(signalregions[region]["acceff"]) +\
|
---|
466 | "%1.6f".center(15) %(signalregions[region]["stat"])+ \
|
---|
467 | "%1.6f".center(15) %(signalregions[region]["syst"])+ \
|
---|
468 | "%1.6f".center(15) %(math.sqrt(signalregions[region]["syst"]**2+signalregions[region]["stat"]**2))+\
|
---|
469 | str(int(IsBest)).rjust(8) +\
|
---|
470 | '\n')
|
---|
471 | out.close()
|
---|