General Utility Library for C++14  2.7
statistics.h
Go to the documentation of this file.
1 
24 #ifndef GUL14_STATISTICS_H_
25 #define GUL14_STATISTICS_H_
26 
27 #include <algorithm>
28 #include <cmath>
29 #include <limits>
30 #include <numeric>
31 #include <type_traits>
32 #include <vector>
33 #include "gul14/internal.h"
34 #include "gul14/traits.h"
35 
36 namespace gul14 {
37 
38 using statistics_result_type = double;
39 
52 template <typename ElementT>
54 {
55  return [](ElementT const& el) -> ElementT const&
56  { return el; };
57 }
58 
70 template <typename DataT, typename = void, typename = std::enable_if_t<std::is_arithmetic<DataT>::value>>
71 struct MinMax {
72  DataT min{ std::numeric_limits<DataT>::max() };
73  DataT max{ std::numeric_limits<DataT>::lowest() };
74 };
75 
76 template <typename DataT>
77 struct MinMax<DataT, std::enable_if_t<std::is_floating_point<DataT>::value>> {
78  DataT min{ NAN };
79  DataT max{ NAN };
80 };
81 
98 template <typename DataT, typename = std::enable_if_t<std::is_floating_point<DataT>::value>>
100 public:
101  DataT sigma_{ NAN };
102  DataT mean_{ NAN };
103 
116  operator DataT() const noexcept {
117  return sigma_;
118  }
120  auto sigma() const noexcept -> DataT {
121  return sigma_;
122  }
124  auto mean() const noexcept -> DataT {
125  return mean_;
126  }
127 };
128 
130 
154 template <typename ResultT = statistics_result_type,
155  typename ContainerT,
156  typename ElementT = typename ContainerT::value_type,
157  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
158  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
159  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
160  >
161 auto mean(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
162 {
163  auto const sum = std::accumulate(
164  container.cbegin(), container.cend(),
165  ResultT{ },
166  [accessor] (ResultT const& accu, ElementT const& el) {
167  return static_cast<ResultT>(accu + accessor(el)); } );
168  return sum / static_cast<ResultT>(container.size());
169 }
170 
192 template <typename ResultT = statistics_result_type,
193  typename ContainerT,
194  typename ElementT = typename ContainerT::value_type,
195  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
196  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
197  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
198  >
199 auto rms(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
200 {
201  auto const sum = std::accumulate(
202  container.cbegin(), container.cend(),
203  ResultT{ },
204  [accessor] (ResultT const& accu, ElementT const& el) {
205  return accu + std::pow(static_cast<ResultT>(accessor(el)), 2); } );
206  return std::sqrt(sum / container.size());
207 }
208 
233 template <typename ResultT = statistics_result_type,
234  typename ContainerT,
235  typename ElementT = typename ContainerT::value_type,
236  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
237  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
238  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
239  >
240 auto median(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
241 {
242  auto const len = container.size();
243  if (len == 0)
244  return std::numeric_limits<ResultT>::quiet_NaN();
245 
246  // work with a copy of the data
247  // because nth_element() partially sorts the input data
248  auto data_copy = std::vector<DataT>{ };
249  data_copy.resize(len);
250  auto data_copy_it = data_copy.begin();
251  auto data_it = container.cbegin();
252  auto const data_end = container.cend();
253  for (; data_it != data_end; ++data_it) {
254  *(data_copy_it++) = accessor(*data_it);
255  }
256 
257  // What is the middle element?
258  auto middle = data_copy.begin() + (len / 2);
259  std::nth_element(data_copy.begin(), middle, data_copy.end());
260  auto median = static_cast<ResultT>(*middle);
261 
262  // If we have an even number of elements we need to do more:
263  // We calculate the mean value of the two 'middle' elements
264  if (0 == len % 2) {
265  std::nth_element(data_copy.begin(), middle - 1, data_copy.end());
266  median = (median / 2) + (static_cast<ResultT>(*(middle - 1)) / 2);
267  }
268 
269  return median;
270 }
271 
302 template <typename ContainerT,
303  typename ElementT = typename ContainerT::value_type,
304  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
305  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
306  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
307 >
308 auto maximum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
309 {
310  constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
311  std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::lowest();
312 
313  return std::accumulate(
314  container.cbegin(), container.cend(), initial_value,
315  [&accessor](DataT const& accu, ElementT const& el) -> DataT {
316  auto const val = accessor(el);
317  // Test portably for not-NAN (some compilers do not have std::isnan() for
318  // integral types)
319  if (val == val) {
320  if (not (val <= accu)) // inverted logic to handle NAN correctly
321  return val;
322  }
323  return accu; });
324 }
325 
356 template <typename ContainerT,
357  typename ElementT = typename ContainerT::value_type,
358  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
359  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
360  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
361 >
362 auto minimum(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> DataT
363 {
364  constexpr auto initial_value = std::numeric_limits<DataT>::has_quiet_NaN ?
365  std::numeric_limits<DataT>::quiet_NaN() : std::numeric_limits<DataT>::max();
366 
367  return std::accumulate(
368  container.cbegin(), container.cend(), initial_value,
369  [&accessor](DataT const& accu, ElementT const& el) -> DataT {
370  auto const val = accessor(el);
371  // Test portably for not-NAN (some compilers do not have std::isnan() for
372  // integral types)
373  if (val == val) {
374  if (not (val >= accu)) // inverted logic to handle NAN correctly
375  return val;
376  }
377  return accu; });
378 }
379 
415 template <typename ContainerT,
416  typename ElementT = typename ContainerT::value_type,
417  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
418  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
419  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
420  >
421 auto min_max(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
422 {
423  using MinMaxT = MinMax<DataT>;
424  auto const sum = std::accumulate(
425  container.cbegin(), container.cend(),
426  MinMaxT{ },
427  [accessor] (MinMaxT const& accu, ElementT const& el) -> MinMaxT {
428  auto out{ accu };
429  auto const val = accessor(el);
430  // Test portably for not-NAN (some compilers do not have std::isnan() for
431  // integral types)
432  if (val == val) {
433  // (a >= NAN) and (a <= NAN) always false for all a
434  if (not (val >= out.min))
435  out.min = val;
436  if (not (val <= out.max))
437  out.max = val;
438  }
439  return out; });
440  return sum;
441 }
442 
463 template <typename ContainerT,
464  typename ElementT = typename ContainerT::value_type,
465  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
466  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
467  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
468  >
469 auto remove_outliers(ContainerT&& cont, std::size_t outliers,
470  Accessor accessor = ElementAccessor<ElementT>()) -> ContainerT&
471 {
472  while (outliers-- > 0 and cont.size() > 0) {
473  auto max_distant = std::max_element(cont.begin(), cont.end(),
474  [mean = mean(cont, accessor), accessor] (ElementT const& a, ElementT const& b)
475  { return std::abs(accessor(a) - mean) < std::abs(accessor(b) - mean); });
476  cont.erase(max_distant);
477  }
478  return cont;
479 }
480 
487 template <typename ContainerT,
488  typename ElementT = typename ContainerT::value_type,
489  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
490  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
491  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
492  >
493 auto remove_outliers(ContainerT const& cont, std::size_t outliers,
494  Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
495 {
496  auto c = std::vector<ElementT>(cont.size());
497  std::copy(cont.cbegin(), cont.cend(), c.begin());
498  return remove_outliers(std::move(c), outliers, accessor);
499 }
500 
539 template <typename ResultT = statistics_result_type,
540  typename ContainerT,
541  typename ElementT = typename ContainerT::value_type,
542  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
543  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
544  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
545  >
546 auto standard_deviation(ContainerT const& container, Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
547 {
548  auto const len = container.size();
549 
550  if (len == 0)
551  return { };
552 
553  auto mean_val = mean<ResultT>(container, accessor);
554 
555  if (len == 1)
556  return { std::numeric_limits<ResultT>::quiet_NaN(), mean_val };
557 
558  auto sum = std::accumulate(container.cbegin(), container.cend(),
559  ResultT{ },
560  [mean_val, accessor] (ResultT const& accu, ElementT const& el)
561  { return accu + std::pow(accessor(el) - mean_val, 2); });
562 
563  sum /= container.size() - 1;
564 
565  return { std::sqrt(sum), mean_val };
566 }
567 
592 template <typename ResultT = statistics_result_type,
593  typename ContainerT,
594  typename ElementT = typename ContainerT::value_type,
595  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
596  typename DataT = typename std::decay_t<std::result_of_t<Accessor(ElementT)>>,
597  typename OpClosure,
598  typename = std::enable_if_t<IsContainerLike<ContainerT>::value>
599  >
600 auto accumulate(ContainerT const& container, OpClosure op, Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
601 {
602  auto const sum = std::accumulate(
603  container.cbegin(), container.cend(),
604  ResultT{ },
605  [accessor, op] (ResultT const& accu, ElementT const& el) {
606  return op(accu, accessor(el)); } );
607  return sum;
608 }
609 
610 namespace {
611 
612 // The following stuff is only there to have a two iterator interface
613 
614  template <typename IteratorT>
615  struct ContainerView {
616  IteratorT const& begin_;
617  IteratorT const& end_;
618  using value_type = std::decay_t<decltype(*begin_)>;
619 
620  ContainerView(IteratorT const& i1, IteratorT const& i2)
621  : begin_{ i1 }
622  , end_{ i2 }
623  {
624  }
625 
626  // Just implement the member functions that we use here
627 
628  auto cbegin() const noexcept -> IteratorT const&
629  {
630  return begin_;
631  }
632  auto cend() const noexcept -> IteratorT const&
633  {
634  return end_;
635  }
636 
637  auto size() const noexcept -> std::size_t
638  {
639  return std::distance(begin_, end_);
640  }
641  };
642 
643  template<typename IteratorT>
644  auto make_view(IteratorT const& cbegin, IteratorT const& cend) -> ContainerView<IteratorT> const
645  {
646  return ContainerView<IteratorT>{ cbegin, cend };
647  }
648 
649 } // namespace anonymous
650 
660 template <typename ResultT = statistics_result_type,
661  typename IteratorT,
662  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
663  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
664  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
665 auto mean(IteratorT const& begin, IteratorT const& end,
666  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
667 {
668  return mean<ResultT>(make_view(begin, end), accessor);
669 }
670 
680 template <typename ResultT = statistics_result_type,
681  typename IteratorT,
682  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
683  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
684  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
685 auto rms(IteratorT const& begin, IteratorT const& end,
686  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
687 {
688  return rms<ResultT>(make_view(begin, end), accessor);
689 }
690 
700 template <typename ResultT = statistics_result_type,
701  typename IteratorT,
702  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
703  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
704  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
705 auto median(IteratorT const& begin, IteratorT const& end,
706  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
707 {
708  return median<ResultT>(make_view(begin, end), accessor);
709 }
710 
720 template <typename IteratorT,
721  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
722  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
723  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
724  auto maximum(IteratorT const& begin, IteratorT const& end,
725  Accessor accessor = ElementAccessor<ElementT>()) -> DataT
726 {
727  return maximum(make_view(begin, end), accessor);
728 }
729 
739 template <typename IteratorT,
740  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
741  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
742  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
743  auto minimum(IteratorT const& begin, IteratorT const& end,
744  Accessor accessor = ElementAccessor<ElementT>()) -> DataT
745 {
746  return minimum(make_view(begin, end), accessor);
747 }
748 
758 template <typename IteratorT,
759  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
760  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
761  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
762 auto min_max(IteratorT const& begin, IteratorT const& end,
763  Accessor accessor = ElementAccessor<ElementT>()) -> MinMax<DataT>
764 {
765  return min_max(make_view(begin, end), accessor);
766 }
767 
779 template <typename IteratorT,
780  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
781  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
782  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
783 auto remove_outliers(IteratorT const& begin, IteratorT const& end,
784  std::size_t outliers, Accessor accessor = ElementAccessor<ElementT>()) -> std::vector<ElementT>
785 {
786  return remove_outliers(make_view(begin, end), outliers, accessor);
787 }
788 
799 template <typename ResultT = statistics_result_type,
800  typename IteratorT,
801  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
802  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
803  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>>
804 auto standard_deviation(IteratorT const& begin, IteratorT const& end,
805  Accessor accessor = ElementAccessor<ElementT>()) -> StandardDeviationMean<ResultT>
806 {
807  return standard_deviation<ResultT>(make_view(begin, end), accessor);
808 }
809 
821 template <typename ResultT = statistics_result_type,
822  typename IteratorT,
823  typename ElementT = std::decay_t<decltype(*std::declval<IteratorT>())>,
824  typename Accessor = std::result_of_t<decltype(ElementAccessor<ElementT>())(ElementT)>(*)(ElementT const&),
825  typename DataT = std::decay_t<std::result_of_t<Accessor(ElementT)>>,
826  typename OpClosure>
827 auto accumulate(IteratorT const& begin, IteratorT const& end, OpClosure op,
828  Accessor accessor = ElementAccessor<ElementT>()) -> ResultT
829 {
830  return accumulate<ResultT>(make_view(begin, end), op, accessor);
831 }
832 
833 } // namespace gul14
834 
835 #endif
836 
837 // vi:ts=4:sw=4:sts=4:et
gul14::abs
constexpr auto abs(ValueT n) noexcept -> std::enable_if_t< std::is_unsigned< ValueT >::value, ValueT >
Compute the absolute value of a number.
Definition: num_util.h:47
gul14::accumulate
auto accumulate(IteratorT const &begin, IteratorT const &end, OpClosure op, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:827
gul14::statistics_result_type
double statistics_result_type
Type used to return statistic properties.
Definition: statistics.h:38
gul14::StandardDeviationMean
A struct holding a standard deviation and a mean value.
Definition: statistics.h:99
gul14::rms
auto rms(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the root mean square of all elements in a container.
Definition: statistics.h:199
gul14::StandardDeviationMean::mean_
DataT mean_
The mean value.
Definition: statistics.h:102
gul14::maximum
auto maximum(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:724
gul14::rms
auto rms(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:685
gul14::remove_outliers
auto remove_outliers(IteratorT const &begin, IteratorT const &end, std::size_t outliers, Accessor accessor=ElementAccessor< ElementT >()) -> std::vector< ElementT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:783
gul14::median
auto median(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:705
gul14::mean
auto mean(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Calculate the arithmetic mean value of all elements in a container.
Definition: statistics.h:161
gul14::MinMax
Object that is designed to holds two values: minimum and maximum of something.
Definition: statistics.h:71
gul14::standard_deviation
auto standard_deviation(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> StandardDeviationMean< ResultT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:804
gul14::StandardDeviationMean::sigma
auto sigma() const noexcept -> DataT
Get the standard deviation value.
Definition: statistics.h:120
internal.h
Definition of macros used internally by GUL.
gul14::ElementAccessor
auto ElementAccessor()
Return a mock element accessor for containers.
Definition: statistics.h:53
gul14::StandardDeviationMean::sigma_
DataT sigma_
The standard deviation (sigma) value.
Definition: statistics.h:101
gul14
Namespace gul14 contains all functions and classes of the General Utility Library.
Definition: doxygen.h:26
gul14::min_max
auto min_max(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> MinMax< DataT >
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:762
gul14::median
auto median(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
Find the median of all elements in a container.
Definition: statistics.h:240
gul14::MinMax::max
DataT max
Maximum value.
Definition: statistics.h:73
traits.h
Some metaprogramming traits for the General Utility Library.
gul14::maximum
auto maximum(ContainerT const &container, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
Return the maximum element value in a container.
Definition: statistics.h:308
gul14::MinMax::min
DataT min
Minimum value.
Definition: statistics.h:72
gul14::minimum
auto minimum(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> DataT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:743
gul14::mean
auto mean(IteratorT const &begin, IteratorT const &end, Accessor accessor=ElementAccessor< ElementT >()) -> ResultT
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: statistics.h:665
gul14::StandardDeviationMean::mean
auto mean() const noexcept -> DataT
Get the arithmetic mean value.
Definition: statistics.h:124