GeographicLib
1.21
|
00001 /** 00002 * \file Accumulator.hpp 00003 * \brief Header for GeographicLib::Accumulator class 00004 * 00005 * Copyright (c) Charles Karney (2010, 2011) <charles@karney.com> and licensed 00006 * under the MIT/X11 License. For more information, see 00007 * http://geographiclib.sourceforge.net/ 00008 **********************************************************************/ 00009 00010 #if !defined(GEOGRAPHICLIB_ACCUMULATOR_HPP) 00011 #define GEOGRAPHICLIB_ACCUMULATOR_HPP \ 00012 "$Id: 03b7f4fdb9974c822f98d5f5aab1094b5a9ac8f2 $" 00013 00014 #include <GeographicLib/Constants.hpp> 00015 00016 namespace GeographicLib { 00017 00018 /** 00019 * \brief An accumulator for sums. 00020 * 00021 * This allow many numbers of floating point type \e T to be added together 00022 * with twice the normal precision. Thus if \e T is double, the effective 00023 * precision of the sum is 106 bits or about 32 decimal places. The core 00024 * idea is the error free transformation of a sum, D. E. Knuth, TAOCP, Vol 2, 00025 * 4.2.2, Theorem B. 00026 * 00027 * The implementation follows J. R. Shewchuk, 00028 * <a href="http://dx.doi.org/10.1007/PL00009321"> Adaptive Precision 00029 * Floating-Point Arithmetic and Fast Robust Geometric Predicates</a>, 00030 * Discrete & Computational Geometry 18(3) 305-363 (1997). 00031 * 00032 * Approximate timings (summing a vector<double>) 00033 * - double: 2ns 00034 * - Accumulator<double>: 23ns 00035 * 00036 * In the documentation of the member functions, \e sum stands for the value 00037 * currently held in the accumulator. 00038 * 00039 * Example of use: 00040 * \include example-Accumulator.cpp 00041 **********************************************************************/ 00042 template<typename T = Math::real> 00043 class GEOGRAPHIC_EXPORT Accumulator { 00044 private: 00045 // _s + _t accumulates for the sum. 00046 T _s, _t; 00047 // Error free transformation of a sum. Note that t can be the same as one 00048 // of the first two arguments. 00049 static inline T sum(T u, T v, T& t) { 00050 volatile T s = u + v; 00051 volatile T up = s - v; 00052 volatile T vpp = s - up; 00053 up -= u; 00054 vpp -= v; 00055 t = -(up + vpp); 00056 // u + v = s + t 00057 // = round(u + v) + t 00058 return s; 00059 } 00060 // Same as sum, but requires abs(u) >= abs(v). This isn't currently used. 00061 static inline T fastsum(T u, T v, T& t) { 00062 volatile T s = u + v; 00063 volatile T vp = s - u; 00064 t = v - vp; 00065 return s; 00066 } 00067 void Add(T y) throw() { 00068 // Here's Shewchuk's solution... 00069 T u; // hold exact sum as [s, t, u] 00070 y = sum(y, _t, u); // Accumulate starting at least significant end 00071 _s = sum(y, _s, _t); 00072 // Start is _s, _t decreasing and non-adjacent. Sum is now (s + t + u) 00073 // exactly with s, t, u non-adjacent and in decreasing order (except for 00074 // possible zeros). The following code tries to normalize the result. 00075 // Ideally, we want _s = round(s+t+u) and _u = round(s+t+u - _s). The 00076 // following does an approximate job (and maintains the decreasing 00077 // non-adjacent property). Here are two "failures" using 3-bit floats: 00078 // 00079 // Case 1: _s is not equal to round(s+t+u) -- off by 1 ulp 00080 // [12, -1] - 8 -> [4, 0, -1] -> [4, -1] = 3 should be [3, 0] = 3 00081 // 00082 // Case 2: _s+_t is not as close to s+t+u as it shold be 00083 // [64, 5] + 4 -> [64, 8, 1] -> [64, 8] = 72 (off by 1) 00084 // should be [80, -7] = 73 (exact) 00085 // 00086 // "Fixing" these problems is probably not worth the expense. The 00087 // representation inevitably leads to small errors in the accumulated 00088 // values. The additional errors illustrated here amount to 1 ulp of the 00089 // less significant word during each addition to the Accumulator and an 00090 // additional possible error of 1 ulp in the reported sum. 00091 // 00092 // Incidentally, the "ideal" representation described above is not 00093 // canonical, because _s = round(_s + _t) may not be true. For example, 00094 // with 3-bit floats: 00095 // 00096 // [128, 16] + 1 -> [160, -16] -- 160 = round(145). 00097 // But [160, 0] - 16 -> [128, 16] -- 128 = round(144). 00098 // 00099 if (_s == 0) // This implies t == 0, 00100 _s = u; // so result is u 00101 else 00102 _t += u; // otherwise just accumulate u to t. 00103 } 00104 T Sum(T y) const throw() { 00105 Accumulator a(*this); 00106 a.Add(y); 00107 return a._s; 00108 } 00109 public: 00110 /** 00111 * Construct from a \e T. This is not declared explicit, so that you can 00112 * write <code>Accumulator<double> a = 5;</code>. 00113 * 00114 * @param[in] y set \e sum = \e y. 00115 **********************************************************************/ 00116 Accumulator(T y = T(0)) throw() : _s(y), _t(0) { 00117 STATIC_ASSERT(!std::numeric_limits<T>::is_integer, 00118 "Accumulator type is not floating point"); 00119 } 00120 /** 00121 * Set the accumulator to a number. 00122 * 00123 * @param[in] y set \e sum = \e y. 00124 **********************************************************************/ 00125 Accumulator& operator=(T y) throw() { _s = y; _t = 0; return *this; } 00126 /** 00127 * Return the value held in the accumulator. 00128 * 00129 * @return \e sum. 00130 **********************************************************************/ 00131 T operator()() const throw() { return _s; } 00132 /** 00133 * Return the result of adding a number to \e sum (but don't change \e sum). 00134 * 00135 * @param[in] y the number to be added to the sum. 00136 * @return \e sum + \e y. 00137 **********************************************************************/ 00138 T operator()(T y) const throw() { return Sum(y); } 00139 /** 00140 * Add a number to the accumulator. 00141 * 00142 * @param[in] y set \e sum += \e y. 00143 **********************************************************************/ 00144 Accumulator& operator+=(T y) throw() { Add(y); return *this; } 00145 /** 00146 * Subtract a number from the accumulator. 00147 * 00148 * @param[in] y set \e sum -= \e y. 00149 **********************************************************************/ 00150 Accumulator& operator-=(T y) throw() { Add(-y); return *this; } 00151 /** 00152 * Multiply accumulator by an integer. To avoid loss of accuracy, use only 00153 * integers such that \e n * \e T is exactly representable as a \e T (i.e., 00154 * +/- powers of two). Use \e n = -1 to negate \e sum. 00155 * 00156 * @param[in] n set \e sum *= \e n. 00157 **********************************************************************/ 00158 Accumulator& operator*=(int n) throw() { _s *= n; _t *= n; return *this; } 00159 /** 00160 * Test equality of an Accumulator with a number. 00161 **********************************************************************/ 00162 bool operator==(T y) const throw() { return _s == y; } 00163 /** 00164 * Test inequality of an Accumulator with a number. 00165 **********************************************************************/ 00166 bool operator!=(T y) const throw() { return _s != y; } 00167 /** 00168 * Less operator on an Accumulator and a number. 00169 **********************************************************************/ 00170 bool operator<(T y) const throw() { return _s < y; } 00171 /** 00172 * Less or equal operator on an Accumulator and a number. 00173 **********************************************************************/ 00174 bool operator<=(T y) const throw() { return _s <= y; } 00175 /** 00176 * Greater operator on an Accumulator and a number. 00177 **********************************************************************/ 00178 bool operator>(T y) const throw() { return _s > y; } 00179 /** 00180 * Greater or equal operator on an Accumulator and a number. 00181 **********************************************************************/ 00182 bool operator>=(T y) const throw() { return _s >= y; } 00183 }; 00184 00185 } // namespace GeographicLib 00186 00187 #endif // GEOGRAPHICLIB_ACCUMULATOR_HPP