1 | //FJSTARTHEADER
|
---|
2 | // $Id: BackgroundEstimatorBase.cc 4442 2020-05-05 07:50:11Z soyez $
|
---|
3 | //
|
---|
4 | // Copyright (c) 2005-2020, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
|
---|
5 | //
|
---|
6 | //----------------------------------------------------------------------
|
---|
7 | // This file is part of FastJet.
|
---|
8 | //
|
---|
9 | // FastJet is free software; you can redistribute it and/or modify
|
---|
10 | // it under the terms of the GNU General Public License as published by
|
---|
11 | // the Free Software Foundation; either version 2 of the License, or
|
---|
12 | // (at your option) any later version.
|
---|
13 | //
|
---|
14 | // The algorithms that underlie FastJet have required considerable
|
---|
15 | // development. They are described in the original FastJet paper,
|
---|
16 | // hep-ph/0512210 and in the manual, arXiv:1111.6097. If you use
|
---|
17 | // FastJet as part of work towards a scientific publication, please
|
---|
18 | // quote the version you use and include a citation to the manual and
|
---|
19 | // optionally also to hep-ph/0512210.
|
---|
20 | //
|
---|
21 | // FastJet is distributed in the hope that it will be useful,
|
---|
22 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
23 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
24 | // GNU General Public License for more details.
|
---|
25 | //
|
---|
26 | // You should have received a copy of the GNU General Public License
|
---|
27 | // along with FastJet. If not, see <http://www.gnu.org/licenses/>.
|
---|
28 | //----------------------------------------------------------------------
|
---|
29 | //FJENDHEADER
|
---|
30 |
|
---|
31 |
|
---|
32 | #include "fastjet/tools/BackgroundEstimatorBase.hh"
|
---|
33 |
|
---|
34 | using namespace std;
|
---|
35 |
|
---|
36 | FASTJET_BEGIN_NAMESPACE // defined in fastjet/internal/base.hh
|
---|
37 |
|
---|
38 | LimitedWarning BackgroundEstimatorBase::_warnings_empty_area;
|
---|
39 |
|
---|
40 | //----------------------------------------------------------------------
|
---|
41 | // given a quantity in a vector (e.g. pt_over_area) and knowledge
|
---|
42 | // about the number of empty jets, calculate the median and
|
---|
43 | // stand_dev_if_gaussian (roughly from the 16th percentile)
|
---|
44 | //
|
---|
45 | // If do_fj2_calculation is set to true then this performs FastJet
|
---|
46 | // 2.X estimation of the standard deviation, which has a spurious
|
---|
47 | // offset in the limit of a small number of jets.
|
---|
48 | void BackgroundEstimatorBase::_median_and_stddev(const vector<double> & quantity_vector,
|
---|
49 | double n_empty_jets,
|
---|
50 | double & median,
|
---|
51 | double & stand_dev_if_gaussian,
|
---|
52 | bool do_fj2_calculation) const {
|
---|
53 |
|
---|
54 | // this check is redundant (the code below behaves sensibly even
|
---|
55 | // with a zero size), but serves as a reminder of what happens if
|
---|
56 | // the quantity vector is zero-sized
|
---|
57 | if (quantity_vector.size() == 0) {
|
---|
58 | median = 0;
|
---|
59 | stand_dev_if_gaussian = 0;
|
---|
60 | return;
|
---|
61 | }
|
---|
62 |
|
---|
63 | vector<double> sorted_quantity_vector = quantity_vector;
|
---|
64 | sort(sorted_quantity_vector.begin(), sorted_quantity_vector.end());
|
---|
65 |
|
---|
66 | // empty area can sometimes be negative; with small ranges this can
|
---|
67 | // become pathological, so warn the user
|
---|
68 | int n_jets_used = sorted_quantity_vector.size();
|
---|
69 | if (n_empty_jets < -n_jets_used/4.0)
|
---|
70 | _warnings_empty_area.warn("BackgroundEstimatorBase::_median_and_stddev(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
|
---|
71 |
|
---|
72 | // now get the median & error, accounting for empty jets;
|
---|
73 | // define the fractions of distribution at median, median-1sigma
|
---|
74 | double posn[2] = {0.5, (1.0-0.6827)/2.0};
|
---|
75 | double res[2];
|
---|
76 | for (int i = 0; i < 2; i++) {
|
---|
77 | res[i] = _percentile(sorted_quantity_vector, posn[i], n_empty_jets,
|
---|
78 | do_fj2_calculation);
|
---|
79 | }
|
---|
80 |
|
---|
81 | median = res[0];
|
---|
82 | stand_dev_if_gaussian = res[0] - res[1];
|
---|
83 | }
|
---|
84 |
|
---|
85 |
|
---|
86 | //----------------------------------------------------------------------
|
---|
87 | // computes a percentile of a given _sorted_ vector of quantities
|
---|
88 | // - sorted_quantities the (sorted) vector contains the data sample
|
---|
89 | // - percentile the percentile (defined between 0 and 1) to compute
|
---|
90 | // - nempty an additional number of 0's
|
---|
91 | // (considered at the beginning of
|
---|
92 | // the quantity vector)
|
---|
93 | // - do_fj2_calculation carry out the calculation as it
|
---|
94 | // was done in fj2 (suffers from "edge effects")
|
---|
95 | double BackgroundEstimatorBase::_percentile(const vector<double> & sorted_quantities,
|
---|
96 | const double percentile,
|
---|
97 | const double nempty,
|
---|
98 | const bool do_fj2_calculation
|
---|
99 | ) const {
|
---|
100 | assert(percentile >= 0.0 && percentile <= 1.0);
|
---|
101 |
|
---|
102 | int quantities_size = sorted_quantities.size();
|
---|
103 | if (quantities_size == 0) return 0;
|
---|
104 |
|
---|
105 | double total_njets = quantities_size + nempty;
|
---|
106 | double percentile_pos;
|
---|
107 | if (do_fj2_calculation) {
|
---|
108 | percentile_pos = (total_njets-1)*percentile - nempty;
|
---|
109 | } else {
|
---|
110 | percentile_pos = (total_njets)*percentile - nempty - 0.5;
|
---|
111 | }
|
---|
112 |
|
---|
113 | double result;
|
---|
114 | if (percentile_pos >= 0 && quantities_size > 1) {
|
---|
115 | int int_percentile_pos = int(percentile_pos);
|
---|
116 |
|
---|
117 | // avoid potential overflow issues
|
---|
118 | if (int_percentile_pos+1 > quantities_size-1){
|
---|
119 | int_percentile_pos = quantities_size-2;
|
---|
120 | percentile_pos = quantities_size-1;
|
---|
121 | }
|
---|
122 |
|
---|
123 | result =
|
---|
124 | sorted_quantities[int_percentile_pos] * (int_percentile_pos+1-percentile_pos)
|
---|
125 | + sorted_quantities[int_percentile_pos+1] * (percentile_pos - int_percentile_pos);
|
---|
126 |
|
---|
127 |
|
---|
128 | } else if (percentile_pos > -0.5 && quantities_size >= 1
|
---|
129 | && !do_fj2_calculation) {
|
---|
130 | // in the LHS of this "bin", just keep a constant value (we could have
|
---|
131 | // interpolated to zero, but this might misbehave in cases where all jets
|
---|
132 | // are active, because it would go to zero too fast)
|
---|
133 | result = sorted_quantities[0];
|
---|
134 | } else {
|
---|
135 | result = 0.0;
|
---|
136 | }
|
---|
137 | return result;
|
---|
138 |
|
---|
139 |
|
---|
140 | }
|
---|
141 |
|
---|
142 |
|
---|
143 | FASTJET_END_NAMESPACE // defined in fastjet/internal/base.hh
|
---|