General Utility Library for C++14  2.11
statistics.h
Go to the documentation of this file.
1 
23 #ifndef GUL14_STATISTICS_H_
24 #define GUL14_STATISTICS_H_
25 
26 #include <algorithm>
27 #include <cmath>
28 #include <limits>
29 #include <numeric>
30 #include <type_traits>
31 #include <vector>
32 
33 #include "gul14/internal.h"
34 #include "gul14/traits.h"
35 
36 namespace gul14 {
37 
44 using statistics_result_type = double;
45 
58 template <typename ElementT>
60 {
61  return [](ElementT const& el) -> ElementT const&
62  { return el; };
63 }
64 
76 template <typename DataT, typename = void, typename = std::enable_if_t<std::is_arithmetic<DataT>::value>>
77 struct MinMax {
78  DataT min{ std::numeric_limits<DataT>::max() };
79  DataT max{ std::numeric_limits<DataT>::lowest() };
80 };
81 
82 template <typename DataT>
83 struct MinMax<DataT, std::enable_if_t<std::is_floating_point<DataT>::value>> {
84  DataT min{ NAN };
85  DataT max{ NAN };
86 };
87 
104 template <typename DataT, typename = std::enable_if_t<std::is_floating_point<DataT>::value>>
106 public:
107  DataT sigma_{ NAN };
108  DataT mean_{ NAN };
109 
122  operator DataT() const noexcept {
123  return sigma_;
124  }
126  auto sigma() const noexcept -> DataT {
127  return sigma_;
128  }
130  auto mean() const noexcept -> DataT {
131  return mean_;
132  }
133 };
134 
136 
160 template <typename ResultT = statistics_result_type,
161  typename ContainerT,
162  typename ElementT = typename ContainerT::value_type,
163  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
164  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
165  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
166  >
167 auto mean(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
168 {
169  auto const sum = std::accumulate(
170  container.cbegin(), container.cend(),
171  ResultT{ },
172  [accessor] (ResultT const& accu, ElementT const& el) {
173  return accu + static_cast<ResultT>(accessor(el)); } );
174  return sum / static_cast<ResultT>(container.size());
175 }
176 
198 template <typename ResultT = statistics_result_type,
199  typename ContainerT,
200  typename ElementT = typename ContainerT::value_type,
201  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
202  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
203  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
204  >
205 auto rms(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
206 {
207  auto const sum = std::accumulate(
208  container.cbegin(), container.cend(),
209  ResultT{ },
210  [accessor] (ResultT const& accu, ElementT const& el) {
211  return accu + std::pow(static_cast<ResultT>(accessor(el)), 2); } );
212  return std::sqrt(sum / static_cast<ResultT>(container.size()));
213 }
214 
239 template <typename ResultT = statistics_result_type,
240  typename ContainerT,
241  typename ElementT = typename ContainerT::value_type,
242  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
243  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
244  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
245  >
246 auto median(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
247 {
248  auto const len = container.size();
249  if (len == 0)
250  return std::numeric_limits<ResultT>::quiet_NaN();
251 
252  // work with a copy of the data
253  // because nth_element() partially sorts the input data
254  auto data_copy = std::vector<DataT>{ };
255  data_copy.resize(len);
256  auto data_copy_it = data_copy.begin();
257  auto data_it = container.cbegin();
258  auto const data_end = container.cend();
259  for (; data_it != data_end; ++data_it) {
260  *(data_copy_it++) = accessor(*data_it);
261  }
262 
263  // What is the middle element?
264  auto middle = data_copy.begin() + (len / 2);
265  std::nth_element(data_copy.begin(), middle, data_copy.end());
266  auto median = static_cast<ResultT>(*middle);
267 
268  // If we have an even number of elements we need to do more:
269  // We calculate the mean value of the two 'middle' elements
270  if (0 == len % 2) {
271  std::nth_element(data_copy.begin(), middle - 1, data_copy.end());
272  median = (median / 2) + (static_cast<ResultT>(*(middle - 1)) / 2);
273  }
274 
275  return median;
276 }
277 
308 template <typename ContainerT,
309  typename ElementT = typename ContainerT::value_type,
310  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
311  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
312  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
313 >
314 auto maximum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
315 {
316  constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
317  std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::lowest();
318 
319  return std::accumulate(
320  container.cbegin(), container.cend(), initial_value,
321  [&accessor](DataT const& accu, ElementT const& el) -> DataT {
322  auto const val = accessor(el);
323  // Test portably for not-NAN (some compilers do not have std::isnan() for
324  // integral types)
325  if (val == val) {
326  if (not (val <= accu)) // inverted logic to handle NAN correctly
327  return val;
328  }
329  return accu; });
330 }
331 
362 template <typename ContainerT,
363  typename ElementT = typename ContainerT::value_type,
364  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
365  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
366  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
367 >
368 auto minimum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
369 {
370  constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
371  std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::max();
372 
373  return std::accumulate(
374  container.cbegin(), container.cend(), initial_value,
375  [&accessor](DataT const& accu, ElementT const& el) -> DataT {
376  auto const val = accessor(el);
377  // Test portably for not-NAN (some compilers do not have std::isnan() for
378  // integral types)
379  if (val == val) {
380  if (not (val >= accu)) // inverted logic to handle NAN correctly
381  return val;
382  }
383  return accu; });
384 }
385 
421 template <typename ContainerT,
422  typename ElementT = typename ContainerT::value_type,
423  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
424  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
425  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
426  >
427 auto min_max(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
428 {
429  using MinMaxT = MinMax<DataT>;
430  auto const sum = std::accumulate(
431  container.cbegin(), container.cend(),
432  MinMaxT{ },
433  [accessor] (MinMaxT const& accu, ElementT const& el) -> MinMaxT {
434  auto out{ accu };
435  auto const val = accessor(el);
436  // Test portably for not-NAN (some compilers do not have std::isnan() for
437  // integral types)
438  if (val == val) {
439  // (a >= NAN) and (a <= NAN) always false for all a
440  if (not (val >= out.min))
441  out.min = val;
442  if (not (val <= out.max))
443  out.max = val;
444  }
445  return out; });
446  return sum;
447 }
448 
469 template <typename ContainerT,
470  typename ElementT = typename ContainerT::value_type,
471  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
472  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
473  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
474  >
475 auto remove_outliers(ContainerT&& cont, std::size_t outliers,
476  Accessor accessor = ElementAccessor<ElementT>()) -> ContainerT&
477 {
478  while (outliers-- > 0 and cont.size() > 0) {
479  auto max_distant = std::max_element(cont.begin(), cont.end(),
480  [mean = mean(cont, accessor), accessor] (ElementT const& a, ElementT const& b)
481  { return std::abs(accessor(a) - mean) < std::abs(accessor(b) - mean); });
482  cont.erase(max_distant);
483  }
484  return cont;
485 }
486 
493 template <typename ContainerT,
494  typename ElementT = typename ContainerT::value_type,
495  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
496  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
497  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
498  >
499 auto remove_outliers(ContainerT const& cont, std::size_t outliers,
500  Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
501 {
502  auto c = std::vector<ElementT>(cont.size());
503  std::copy(cont.cbegin(), cont.cend(), c.begin());
504  return remove_outliers(std::move(c), outliers, accessor);
505 }
506 
545 template <typename ResultT = statistics_result_type,
546  typename ContainerT,
547  typename ElementT = typename ContainerT::value_type,
548  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
549  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
550  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
551  >
552 auto standard_deviation(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
553 {
554  auto const len = container.size();
555 
556  if (len == 0)
557  return { };
558 
559  auto mean_val = mean<ResultT>(container, accessor);
560 
561  if (len == 1)
562  return { std::numeric_limits<ResultT>::quiet_NaN(), mean_val };
563 
564  auto sum = std::accumulate(container.cbegin(), container.cend(),
565  ResultT{ },
566  [mean_val, accessor] (ResultT const& accu, ElementT const& el)
567  { return accu + std::pow(static_cast<ResultT>(accessor(el)) - mean_val, 2); });
568 
569  sum /= static_cast<ResultT>(container.size() - 1);
570 
571  return { std::sqrt(sum), mean_val };
572 }
573 
598 template <typename ResultT = statistics_result_type,
599  typename ContainerT,
600  typename ElementT = typename ContainerT::value_type,
601  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
602  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
603  typename OpClosure,
604  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
605  >
606 auto accumulate(ContainerT const& container, OpClosure op, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
607 {
608  auto const sum = std::accumulate(
609  container.cbegin(), container.cend(),
610  ResultT{ },
611  [accessor, op] (ResultT const& accu, ElementT const& el) {
612  return op(accu, accessor(el)); } );
613  return sum;
614 }
615 
616 namespace {
617 
618 // The following stuff is only there to have a two iterator interface
619 
620  template <typename IteratorT>
621  struct ContainerView {
622  IteratorT const& begin_;
623  IteratorT const& end_;
624  using value_type = std::decay_t<decltype(*begin_)>;
625 
626  ContainerView(IteratorT const& i1, IteratorT const& i2)
627  : begin_{ i1 }
628  , end_{ i2 }
629  {
630  }
631 
632  // Just implement the member functions that we use here
633 
634  auto cbegin() const noexcept -> IteratorT const&
635  {
636  return begin_;
637  }
638  auto cend() const noexcept -> IteratorT const&
639  {
640  return end_;
641  }
642 
643  auto size() const noexcept -> std::size_t
644  {
645  return std::distance(begin_, end_);
646  }
647  };
648 
649  template<typename IteratorT>
650  auto make_view(IteratorT const& cbegin, IteratorT const& cend) -> ContainerView<IteratorT> const
651  {
652  return ContainerView<IteratorT>{ cbegin, cend };
653  }
654 
655 } // namespace anonymous
656 
666 template <typename ResultT = statistics_result_type,
667  typename IteratorT,
668  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
669  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
670  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
671 auto mean(IteratorT const& begin, IteratorT const& end,
672  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
673 {
674  return mean<ResultT>(make_view(begin, end), accessor);
675 }
676 
686 template <typename ResultT = statistics_result_type,
687  typename IteratorT,
688  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
689  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
690  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
691 auto rms(IteratorT const& begin, IteratorT const& end,
692  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
693 {
694  return rms<ResultT>(make_view(begin, end), accessor);
695 }
696 
706 template <typename ResultT = statistics_result_type,
707  typename IteratorT,
708  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
709  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
710  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
711 auto median(IteratorT const& begin, IteratorT const& end,
712  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
713 {
714  return median<ResultT>(make_view(begin, end), accessor);
715 }
716 
726 template <typename IteratorT,
727  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
728  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
729  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
730  auto maximum(IteratorT const& begin, IteratorT const& end,
731  Accessor accessor = ElementAccessor<ElementT>()) -> DataT
732 {
733  return maximum(make_view(begin, end), accessor);
734 }
735 
745 template <typename IteratorT,
746  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
747  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
748  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
749  auto minimum(IteratorT const& begin, IteratorT const& end,
750  Accessor accessor = ElementAccessor<ElementT>()) -> DataT
751 {
752  return minimum(make_view(begin, end), accessor);
753 }
754 
764 template <typename IteratorT,
765  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
766  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
767  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
768 auto min_max(IteratorT const& begin, IteratorT const& end,
769  Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
770 {
771  return min_max(make_view(begin, end), accessor);
772 }
773 
785 template <typename IteratorT,
786  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
787  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
788  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
789 auto remove_outliers(IteratorT const& begin, IteratorT const& end,
790  std::size_t outliers, Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
791 {
792  return remove_outliers(make_view(begin, end), outliers, accessor);
793 }
794 
805 template <typename ResultT = statistics_result_type,
806  typename IteratorT,
807  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
808  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
809  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
810 auto standard_deviation(IteratorT const& begin, IteratorT const& end,
811  Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
812 {
813  return standard_deviation<ResultT>(make_view(begin, end), accessor);
814 }
815 
827 template <typename ResultT = statistics_result_type,
828  typename IteratorT,
829  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
830  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
831  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>,
832  typename OpClosure>
833 auto accumulate(IteratorT const& begin, IteratorT const& end, OpClosure op,
834  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
835 {
836  return accumulate<ResultT>(make_view(begin, end), op, accessor);
837 }
838 
839 } // namespace gul14
840 
841 #endif
842 
843 // vi:ts=4:sw=4:sts=4:et
A struct holding a standard deviation and a mean value.
Definition: statistics.h:105
constexpr auto abs(ValueT n) noexcept -> std::enable_if_t< std::is_unsigned< ValueT >::value, ValueT >
Compute the absolute value of a number.
Definition: num_util.h:53
DataT max
Maximum value.
Definition: statistics.h:79
auto mean(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the arithmetic mean value of all elements in a container.
Definition: statistics.h:167
auto mean(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:671
DataT mean_
The mean value.
Definition: statistics.h:108
double statistics_result_type
Type used to return statistic properties.
Definition: statistics.h:44
auto sigma() const noexcept -> DataT
Get the standard deviation value.
Definition: statistics.h:126
auto median(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Find the median of all elements in a container.
Definition: statistics.h:246
DataT min
Minimum value.
Definition: statistics.h:78
auto remove_outliers(IteratorT const &begin, IteratorT const &end, std::size_t outliers, Accessor accessor=ElementAccessor< ElementT >()) -> std::vector< ElementT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:789
auto ElementAccessor()
Return a mock element accessor for containers.
Definition: statistics.h:59
auto maximum(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:730
auto min_max(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> MinMax< DataT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:768
DataT sigma_
The standard deviation (sigma) value.
Definition: statistics.h:107
auto standard_deviation(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> StandardDeviationMean< ResultT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:810
auto maximum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the maximum element value in a container.
Definition: statistics.h:314
auto mean() const noexcept -> DataT
Get the arithmetic mean value.
Definition: statistics.h:130
auto rms(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:691
auto minimum(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:749
auto rms(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the root mean square of all elements in a container.
Definition: statistics.h:205
auto accumulate(IteratorT const &begin, IteratorT const &end, OpClosure op, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:833
auto median(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:711
Definition of macros used internally by GUL.
Namespace gul14 contains all functions and classes of the General Utility Library.
Definition: doxygen.h:26
Object that is designed to holds two values: minimum and maximum of something.
Definition: statistics.h:77
Some metaprogramming traits for the General Utility Library.