Fix peak-fits where one or more channels had a small, but non-zero number of counts.

wcjohns · wcjohns · commit 293b56dc787a · 2024-09-11T22:08:44.000-07:00
This occurs primarily on background-subtracted spectra, that may have channel contents like 0.0007, which even a single channel would mess up the whole fit.
Changed so that any channel contents under 1.0, will have uncertainty 1.0.  Not totally happy with this, but hopefully does avoid this particular edge case.
diff --git a/src/PeakFit.cpp b/src/PeakFit.cpp
@@ -76,6 +76,15 @@ using namespace std;
 
 using SpecUtils::Measurement;
 
+// 20240911: The minimum uncertainty allowed for a gamma spectrum channel.
+// Background subtracted spectra can end up with tiny bins, like 0.0007,
+// which if we take its uncertainty to be its sqrt, a single bin like this will
+// totally mess up the whole ROI.  So we'll impose a minimum uncertainty on
+// each channel.
+// However, if a spectrum is scaled, and not Poisson errored, this will totally
+// mess things up (even though it wouldnt be right in the first place).
+#define MIN_CHANNEL_UNCERT 1.0
+
 template<class T>
 bool matrix_invert( const boost::numeric::ublas::matrix<T>& input,
                    boost::numeric::ublas::matrix<T> &inverse )
@@ -1774,7 +1783,7 @@ double chi2_for_region( const PeakShrdVec &peaks,
         const double ncontinuum = continuum->offset_integral(xbinlow, xbinup, data);
         
         const double npeak = gauss_counts[i];
-        const double uncert = ndata > 0.0 ? sqrt(ndata) : 1.0;
+        const double uncert = ndata > MIN_CHANNEL_UNCERT ? sqrt(ndata) : 1.0;
         const double chi = (ndata-ncontinuum-npeak)/uncert;
         
         chi2 += chi*chi;
@@ -1803,7 +1812,7 @@ double chi2_for_region( const PeakShrdVec &peaks,
       {
         const double peakarea = i->second;
         const double ndata = data->gamma_channel_content( i->first );
-        const double uncert = ndata > 0.0 ? sqrt(ndata) : 1.0;
+        const double uncert = ndata > MIN_CHANNEL_UNCERT ? sqrt(ndata) : 1.0;
         const double chi = (ndata-peakarea)/uncert;
         chi2 += chi*chi;
       }//for( int bin = minbin; bin <= maxbin; ++bin )
@@ -2069,7 +2078,7 @@ double evaluate_chi2dof_for_range( const std::vector<PeakDef> &peaks,
     if( y_pred < 0.0 )
       y_pred = 0.0;
     
-    const double uncert = (y > 0.0f ? sqrt(y) : 1.0);
+    const double uncert = (y > MIN_CHANNEL_UNCERT ? sqrt(y) : 1.0);
     chi2 += std::pow( (y_pred - y) / uncert, 2.0 );
   }//for( int bin = 0; bin < nbin; ++bin )
   
@@ -3702,7 +3711,7 @@ bool check_lowres_single_peak_fit( const std::shared_ptr<const PeakDef> peak,
       const double x = dataH->gamma_channel_lower( channel );
       const double y = dataH->gamma_channel_content( channel );
       const double y_pred = poly_coeffs[0] + poly_coeffs[1]*x;
-      const double uncert = (y > 0.0f ? std::sqrt(y) : 1.0);
+      const double uncert = (y > MIN_CHANNEL_UNCERT ? std::sqrt(y) : 1.0);
       const double thichi2 = std::pow( (y_pred - y) / uncert, 2.0 );
       
       linechi2 += thichi2;
@@ -4101,7 +4110,7 @@ PeakRejectionStatus check_lowres_multi_peak_fit( const vector<std::shared_ptr<co
       const double x = dataH->gamma_channel_lower( channel );
       const double y = dataH->gamma_channel_content( channel );
       const double y_pred = poly_coeffs[0] + poly_coeffs[1]*x;
-      const double uncert = (y > 0.0f ? sqrt(y) : 1.0);
+      const double uncert = (y > MIN_CHANNEL_UNCERT ? sqrt(y) : 1.0);
       linechi2 += std::pow( (y_pred - y) / uncert, 2.0 );
     }//for( int bin = 0; bin < nbin; ++bin )
     
@@ -4624,7 +4633,7 @@ bool check_highres_single_peak_fit( const std::shared_ptr<const PeakDef> peak,
       const double x = dataH->gamma_channel_lower( channel );
       const double y = dataH->gamma_channel_content( channel );
       const double y_pred = poly_coeffs[0] + poly_coeffs[1]*x;
-      const double uncert = (y > 0.0f ? std::sqrt(y) : 1.0);
+      const double uncert = (y > MIN_CHANNEL_UNCERT ? std::sqrt(y) : 1.0);
       const double thichi2 = std::pow( (y_pred - y) / uncert, 2.0 );
       
       linechi2 += thichi2;
@@ -6262,7 +6271,7 @@ double fit_to_polynomial( const float *x, const float *data, const size_t nbin,
   
   for( size_t row = 0; row < nbin; ++row )
   {
-    const double uncert = (data[row] > 0.0 ? sqrt( data[row] ) : 1.0);
+    const double uncert = (data[row] > MIN_CHANNEL_UNCERT ? sqrt( data[row] ) : 1.0);
     b(row) = (data[row] > 0.0 ? sqrt( data[row] ) : 0.0);
     for( int col = 0; col < poly_terms; ++col )
       A(row,col) = std::pow( double(x[row]), double(col)) / uncert;
@@ -6292,7 +6301,7 @@ double fit_to_polynomial( const float *x, const float *data, const size_t nbin,
     double y_pred = 0.0;
     for( int i = 0; i < poly_terms; ++i )
       y_pred += a(i) * std::pow( double(x[bin]), double(i) );
-    const double uncert = (data[bin] > 0.0 ? sqrt( data[bin] ) : 1.0);
+    const double uncert = (data[bin] > MIN_CHANNEL_UNCERT ? sqrt( data[bin] ) : 1.0);
     chi2 += std::pow( (y_pred - data[bin]) / uncert, 2.0 );
   }//for( int bin = 0; bin < nbin; ++bin )
   
@@ -6375,16 +6384,15 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
   //    cerr << "{" << means[i] << ", " << sigmas[i] << "}, ";
   //  cerr << endl << endl;
   
-  double step_roi_data_sum = 0.0, step_cumulative_data = 0.0;
+  double roi_data_sum = 0.0, step_cumulative_data = 0.0;
   
   const double roi_lower = x[0];
   const double roi_upper = x[nbin];
-  if( step_continuum )
-  {
-    for( size_t row = 0; row < nbin; ++row )
-      step_roi_data_sum += data[row];
-  }
   
+  for( size_t row = 0; row < nbin; ++row )
+    roi_data_sum += std::max( data[row], 0.0f );
+  
+  const double avrg_data_val = roi_data_sum / nbin;
   
   // For the RelACtAuto calcs, there can easily be 100 peaks, so for this case we'll calculate
   //  their contribution multithreaded.
@@ -6447,7 +6455,14 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
     const double x1_rel = x1 - ref_energy;
     
     //const double uncert = (dataval > 0.0 ? sqrt(dataval) : 1.0);
-    double uncert = (dataval > 0.0 ? sqrt(dataval) : 1.0);
+    // If we are background subtracting a spectrum, we can end up with bins with really
+    //  small values, like 0.0007, which, even one of would mess the whole fit up if
+    //  we take its uncertainty to be its square-root, so in this case we will, fairly arbitrarily
+    //  we want to use an uncert of 1.
+    //  However, there are also highly scaled spectra, whose all values are really small, so
+    //  in this case we want to do something more reasonable.
+    // TODO: evaluate these choices of thresholds and tradeoffs, more better
+    double uncert = (dataval > MIN_CHANNEL_UNCERT ? sqrt(dataval) : 1.0);
   
     if( step_continuum )
       step_cumulative_data += dataval;
@@ -6511,13 +6526,13 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
       {
         // This logic mirrors that of PeakContinuum::offset_integral(...), and code
         // If you change it in one place - change it in here, below, and in offset_integral.
-        const double frac_data = (step_cumulative_data - 0.5*data[row]) / step_roi_data_sum;
+        const double frac_data = (step_cumulative_data - 0.5*data[row]) / roi_data_sum;
         const double contribution = frac_data * (x1 - x0);
         
         A(row,col) = contribution / uncert;
       }else if( step_continuum && (num_polynomial_terms == 4) )
       {
-        const double frac_data = (step_cumulative_data - 0.5*data[row]) / step_roi_data_sum;
+        const double frac_data = (step_cumulative_data - 0.5*data[row]) / roi_data_sum;
 
         double contrib = 0.0;
         switch( col )
@@ -6577,7 +6592,7 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
     for( size_t channel = 0; channel < nbin; ++channel )
     {
       const double dataval = data[channel];
-      const double uncert = (dataval > 0.0 ? sqrt(dataval) : 1.0);
+      const double uncert = (dataval > MIN_CHANNEL_UNCERT ? sqrt(dataval) : 1.0);
       A(channel,npoly + i) = peak_areas[channel] / uncert;
     }//for( size_t channel = 0; channel < nbin; ++channel )
   }//for( size_t i = 0; i < npeaks; ++i )
@@ -6733,7 +6748,7 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
       {
         // This logic mirrors that of PeakContinuum::offset_integral(...) and above code in this
         //  function that defines the matrix, see above for comments
-        const double frac_data = (step_cumulative_data - 0.5*data[bin]) / step_roi_data_sum;
+        const double frac_data = (step_cumulative_data - 0.5*data[bin]) / roi_data_sum;
         const double contribution = frac_data * (x1 - x0);
         
         y_pred += a(col)*contribution;
@@ -6742,7 +6757,7 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
         // This logic mirrors that of PeakContinuum::offset_integral(...) and above code in this
         //  function that defines the matrix, see above for comments
         
-        const double frac_data = (step_cumulative_data - 0.5*data[bin]) / step_roi_data_sum;
+        const double frac_data = (step_cumulative_data - 0.5*data[bin]) / roi_data_sum;
         
         double contrib = 0.0;
         switch( col )
@@ -6774,7 +6789,7 @@ double fit_amp_and_offset( const float *x, const float *data, const size_t nbin,
       y_pred += fixedAmpPeaks[i].gauss_integral( x0, x1 );
     
     //    cerr << "bin " << bin << " predicted " << y_pred << " data=" << data[bin] << endl;
-    const double uncert = (data[bin] > 0.0 ? sqrt( data[bin] ) : 1.0);
+    const double uncert = (data[bin] > MIN_CHANNEL_UNCERT ? sqrt( data[bin] ) : 1.0);
     chi2 += std::pow( (y_pred - data[bin]) / uncert, 2.0 );
   }//for( int bin = 0; bin < nbin; ++bin )
   
@@ -8412,7 +8427,7 @@ std::vector<PeakDef> search_for_peaks( const std::shared_ptr<const Measurement>
             y_pred += continuum_coeffs[col] * (1.0/exp) * (pow(x1cont,exp) - pow(x0cont,exp));
           }//for( int order = 0; order < maxorder; ++order )
           
-          const double uncert = (data[bin] > 0.0 ? sqrt( data[bin] ) : 1.0);
+          const double uncert = (data[bin] > MIN_CHANNEL_UNCERT ? sqrt( data[bin] ) : 1.0);
           
           if( y_pred < 0.0 )
             y_pred = 0.0;
diff --git a/src/PeakFitChi2Fcn.cpp b/src/PeakFitChi2Fcn.cpp
@@ -552,10 +552,17 @@ double PeakFitChi2Fcn::chi2( const double *params ) const
         const double nfitpeak = peak_counts[i];
         const double ncontinuum = continuum->offset_integral(energies[i], energies[i+1], m_data);
         
-        if( ndata > 0.000001 )
-          chi2 += pow( (ndata - ncontinuum - nfitpeak), 2.0 ) / ndata;
-        else
-          chi2 += fabs(nfitpeak + ncontinuum);  //This is a bit ad-hoc - is there a better solution? //XXX untested
+        const double uncert2 = ndata > 1.0 ? ndata : 1.0;
+        
+        // 20240911: Changed to prevent edge-case of background subtracted spectra having bin
+        //           contents like 0.0007, which then one channel of would ruin whole peak fit
+        chi2 += pow( (ndata - ncontinuum - nfitpeak), 2.0 ) / uncert2;
+        // Previous to 20240911, we used
+        // if( ndata > 0.000001 )
+        //   chi2 += pow( (ndata - ncontinuum - nfitpeak), 2.0 ) / ndata;
+        // else
+        //   chi2 += fabs(nfitpeak + ncontinuum);  //This is a bit ad-hoc - is there a better solution? //XXX untested
+        
       }
     }else
     {