[d7d2da3] | 1 | //STARTHEADER
|
---|
| 2 | // $Id$
|
---|
| 3 | //
|
---|
| 4 | // Copyright (c) 2005-2011, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
|
---|
| 5 | //
|
---|
| 6 | //----------------------------------------------------------------------
|
---|
| 7 | // This file is part of FastJet.
|
---|
| 8 | //
|
---|
| 9 | // FastJet is free software; you can redistribute it and/or modify
|
---|
| 10 | // it under the terms of the GNU General Public License as published by
|
---|
| 11 | // the Free Software Foundation; either version 2 of the License, or
|
---|
| 12 | // (at your option) any later version.
|
---|
| 13 | //
|
---|
| 14 | // The algorithms that underlie FastJet have required considerable
|
---|
| 15 | // development and are described in hep-ph/0512210. If you use
|
---|
| 16 | // FastJet as part of work towards a scientific publication, please
|
---|
| 17 | // include a citation to the FastJet paper.
|
---|
| 18 | //
|
---|
| 19 | // FastJet is distributed in the hope that it will be useful,
|
---|
| 20 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 21 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 22 | // GNU General Public License for more details.
|
---|
| 23 | //
|
---|
| 24 | // You should have received a copy of the GNU General Public License
|
---|
| 25 | // along with FastJet. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 26 | //----------------------------------------------------------------------
|
---|
| 27 | //ENDHEADER
|
---|
| 28 |
|
---|
| 29 |
|
---|
| 30 | #include "fastjet/tools/BackgroundEstimatorBase.hh"
|
---|
| 31 |
|
---|
| 32 | using namespace std;
|
---|
| 33 |
|
---|
| 34 | FASTJET_BEGIN_NAMESPACE // defined in fastjet/internal/base.hh
|
---|
| 35 |
|
---|
| 36 | LimitedWarning BackgroundEstimatorBase::_warnings_empty_area;
|
---|
| 37 |
|
---|
| 38 | //----------------------------------------------------------------------
|
---|
| 39 | // given a quantity in a vector (e.g. pt_over_area) and knowledge
|
---|
| 40 | // about the number of empty jets, calculate the median and
|
---|
| 41 | // stand_dev_if_gaussian (roughly from the 16th percentile)
|
---|
| 42 | //
|
---|
| 43 | // If do_fj2_calculation is set to true then this performs FastJet
|
---|
| 44 | // 2.X estimation of the standard deviation, which has a spurious
|
---|
| 45 | // offset in the limit of a small number of jets.
|
---|
| 46 | void BackgroundEstimatorBase::_median_and_stddev(const vector<double> & quantity_vector,
|
---|
| 47 | double n_empty_jets,
|
---|
| 48 | double & median,
|
---|
| 49 | double & stand_dev_if_gaussian,
|
---|
| 50 | bool do_fj2_calculation) const {
|
---|
| 51 |
|
---|
| 52 | // this check is redundant (the code below behaves sensibly even
|
---|
| 53 | // with a zero size), but serves as a reminder of what happens if
|
---|
| 54 | // the quantity vector is zero-sized
|
---|
| 55 | if (quantity_vector.size() == 0) {
|
---|
| 56 | median = 0;
|
---|
| 57 | stand_dev_if_gaussian = 0;
|
---|
| 58 | return;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | vector<double> sorted_quantity_vector = quantity_vector;
|
---|
| 62 | sort(sorted_quantity_vector.begin(), sorted_quantity_vector.end());
|
---|
| 63 |
|
---|
| 64 | // empty area can sometimes be negative; with small ranges this can
|
---|
| 65 | // become pathological, so warn the user
|
---|
| 66 | int n_jets_used = sorted_quantity_vector.size();
|
---|
| 67 | if (n_empty_jets < -n_jets_used/4.0)
|
---|
| 68 | _warnings_empty_area.warn("BackgroundEstimatorBase::_median_and_stddev(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
|
---|
| 69 |
|
---|
| 70 | // now get the median & error, accounting for empty jets;
|
---|
| 71 | // define the fractions of distribution at median, median-1sigma
|
---|
| 72 | double posn[2] = {0.5, (1.0-0.6827)/2.0};
|
---|
| 73 | double res[2];
|
---|
| 74 | for (int i = 0; i < 2; i++) {
|
---|
| 75 | res[i] = _percentile(sorted_quantity_vector, posn[i], n_empty_jets,
|
---|
| 76 | do_fj2_calculation);
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | median = res[0];
|
---|
| 80 | stand_dev_if_gaussian = res[0] - res[1];
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 |
|
---|
| 84 | //----------------------------------------------------------------------
|
---|
| 85 | // computes a percentile of a given _sorted_ vector of quantities
|
---|
| 86 | // - sorted_quantities the (sorted) vector contains the data sample
|
---|
| 87 | // - percentile the percentile (defined between 0 and 1) to compute
|
---|
| 88 | // - nempty an additional number of 0's
|
---|
| 89 | // (considered at the beginning of
|
---|
| 90 | // the quantity vector)
|
---|
| 91 | // - do_fj2_calculation carry out the calculation as it
|
---|
| 92 | // was done in fj2 (suffers from "edge effects")
|
---|
| 93 | double BackgroundEstimatorBase::_percentile(const vector<double> & sorted_quantities,
|
---|
| 94 | const double percentile,
|
---|
| 95 | const double nempty,
|
---|
| 96 | const bool do_fj2_calculation
|
---|
| 97 | ) const {
|
---|
| 98 | assert(percentile >= 0.0 && percentile <= 1.0);
|
---|
| 99 |
|
---|
| 100 | int quantities_size = sorted_quantities.size();
|
---|
| 101 | if (quantities_size == 0) return 0;
|
---|
| 102 |
|
---|
| 103 | double total_njets = quantities_size + nempty;
|
---|
| 104 | double percentile_pos;
|
---|
| 105 | if (do_fj2_calculation) {
|
---|
| 106 | percentile_pos = (total_njets-1)*percentile - nempty;
|
---|
| 107 | } else {
|
---|
| 108 | percentile_pos = (total_njets)*percentile - nempty - 0.5;
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | double result;
|
---|
| 112 | if (percentile_pos >= 0 && quantities_size > 1) {
|
---|
| 113 | int int_percentile_pos = int(percentile_pos);
|
---|
| 114 |
|
---|
| 115 | // avoid potential overflow issues
|
---|
| 116 | if (int_percentile_pos+1 > quantities_size-1){
|
---|
| 117 | int_percentile_pos = quantities_size-2;
|
---|
| 118 | percentile_pos = quantities_size-1;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | result =
|
---|
| 122 | sorted_quantities[int_percentile_pos] * (int_percentile_pos+1-percentile_pos)
|
---|
| 123 | + sorted_quantities[int_percentile_pos+1] * (percentile_pos - int_percentile_pos);
|
---|
| 124 |
|
---|
| 125 |
|
---|
| 126 | } else if (percentile_pos > -0.5 && quantities_size >= 1
|
---|
| 127 | && !do_fj2_calculation) {
|
---|
| 128 | // in the LHS of this "bin", just keep a constant value (we could have
|
---|
| 129 | // interpolated to zero, but this might misbehave in cases where all jets
|
---|
| 130 | // are active, because it would go to zero too fast)
|
---|
| 131 | result = sorted_quantities[0];
|
---|
| 132 | } else {
|
---|
| 133 | result = 0.0;
|
---|
| 134 | }
|
---|
| 135 | return result;
|
---|
| 136 |
|
---|
| 137 |
|
---|
| 138 | }
|
---|
| 139 |
|
---|
| 140 |
|
---|
| 141 | FASTJET_END_NAMESPACE // defined in fastjet/internal/base.hh
|
---|