Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
return self.variance ** 0.5
std_dev = _StatsProperty('std_dev', _calc_std_dev)
def _calc_median_abs_dev(self):
"""\
Median Absolute Deviation is a robust measure of statistical
dispersion: http://en.wikipedia.org/wiki/Median_absolute_deviation
>>> median_abs_dev(range(97))
24.0
"""
global median # defined elsewhere in this file
sorted_vals = sorted(self.data)
x = float(median(sorted_vals))
return median([abs(x - v) for v in sorted_vals])
median_abs_dev = _StatsProperty('median_abs_dev', _calc_median_abs_dev)
mad = median_abs_dev # convenience
def _calc_rel_std_dev(self):
"""\
Standard deviation divided by the absolute value of the average.
http://en.wikipedia.org/wiki/Relative_standard_deviation
>>> print('%1.3f' % rel_std_dev(range(97)))
0.583
"""
abs_mean = abs(self.mean)
if abs_mean:
return self.std_dev / abs_mean
else:
return self.default
See the module docstring for more about statistical moments.
>>> skewness(range(97)) # symmetrical around 48.0
0.0
>>> left_skewed = skewness(list(range(97)) + list(range(10)))
>>> right_skewed = skewness(list(range(97)) + list(range(87, 97)))
>>> round(left_skewed, 3), round(right_skewed, 3)
(0.114, -0.114)
"""
data, s_dev = self.data, self.std_dev
if len(data) > 1 and s_dev > 0:
return (sum(self._get_pow_diffs(3)) /
float((len(data) - 1) * (s_dev ** 3)))
else:
return self.default
skewness = _StatsProperty('skewness', _calc_skewness)
def _calc_kurtosis(self):
"""\
Indicates how much data is in the tails of the distribution. The
result is always positive, with the normal "bell-curve"
distribution having a kurtosis of 3.
http://en.wikipedia.org/wiki/Kurtosis
See the module docstring for more about statistical moments.
>>> kurtosis(range(9))
1.99125
With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more
centrally distributed than the normal curve.
"""
return len(self.data)
count = _StatsProperty('count', _calc_count)
def _calc_mean(self):
"""
The arithmetic mean, or "average". Sum of the values divided by
the number of values.
>>> mean(range(20))
9.5
>>> mean(list(range(19)) + [949]) # 949 is an arbitrary outlier
56.0
"""
return sum(self.data, 0.0) / len(self.data)
mean = _StatsProperty('mean', _calc_mean)
def _calc_max(self):
"""
The maximum value present in the data.
>>> Stats([2, 1, 3]).max
3
"""
if self._is_sorted:
return self.data[-1]
return max(self.data)
max = _StatsProperty('max', _calc_max)
def _calc_min(self):
"""
The minimum value present in the data.
if round(c1, precision) == 0:
if round(beta2, precision) == 3:
return 0 # Normal
else:
if beta2 < 3:
return 2 # Symmetric Beta
elif beta2 > 3:
return 7
elif round(c2, precision) == 0:
return 3 # Gamma
else:
k = c1 ** 2 / (4 * c0 * c2)
if k < 0:
return 1 # Beta
raise RuntimeError('missed a spot')
pearson_type = _StatsProperty('pearson_type', _calc_pearson_type)
@staticmethod
def _get_quantile(sorted_data, q):
data, n = sorted_data, len(sorted_data)
idx = q / 1.0 * (n - 1)
idx_f, idx_c = int(floor(idx)), int(ceil(idx))
if idx_f == idx_c:
return data[idx_f]
return (data[idx_f] * (idx_c - idx)) + (data[idx_c] * (idx - idx_f))
def get_quantile(self, q):
"""Get a quantile from the dataset. Quantiles are floating point
values between ``0.0`` and ``1.0``, with ``0.0`` representing
the minimum value in the dataset and ``1.0`` representing the
maximum. ``0.5`` represents the median:
if self._is_sorted:
return self.data[-1]
return max(self.data)
max = _StatsProperty('max', _calc_max)
def _calc_min(self):
"""
The minimum value present in the data.
>>> Stats([2, 1, 3]).min
1
"""
if self._is_sorted:
return self.data[0]
return min(self.data)
min = _StatsProperty('min', _calc_min)
def _calc_median(self):
"""
The median is either the middle value or the average of the two
middle values of a sample. Compared to the mean, it's generally
more resilient to the presence of outliers in the sample.
>>> median([2, 1, 3])
2
>>> median(range(97))
48
>>> median(list(range(96)) + [1066]) # 1066 is an arbitrary outlier
48
"""
return self._get_quantile(self._get_sorted_data(), 0.5)
median = _StatsProperty('median', _calc_median)
56.0
"""
return sum(self.data, 0.0) / len(self.data)
mean = _StatsProperty('mean', _calc_mean)
def _calc_max(self):
"""
The maximum value present in the data.
>>> Stats([2, 1, 3]).max
3
"""
if self._is_sorted:
return self.data[-1]
return max(self.data)
max = _StatsProperty('max', _calc_max)
def _calc_min(self):
"""
The minimum value present in the data.
>>> Stats([2, 1, 3]).min
1
"""
if self._is_sorted:
return self.data[0]
return min(self.data)
min = _StatsProperty('min', _calc_min)
def _calc_median(self):
"""
The median is either the middle value or the average of the two
"""The trimean is a robust measure of central tendency, like the
median, that takes the weighted average of the median and the
upper and lower quartiles.
>>> trimean([2, 1, 3])
2.0
>>> trimean(range(97))
48.0
>>> trimean(list(range(96)) + [1066]) # 1066 is an arbitrary outlier
48.0
"""
sorted_data = self._get_sorted_data()
gq = lambda q: self._get_quantile(sorted_data, q)
return (gq(0.25) + (2 * gq(0.5)) + gq(0.75)) / 4.0
trimean = _StatsProperty('trimean', _calc_trimean)
def _calc_variance(self):
"""\
Variance is the average of the squares of the difference between
each value and the mean.
>>> variance(range(97))
784.0
"""
global mean # defined elsewhere in this file
return mean(self._get_pow_diffs(2))
variance = _StatsProperty('variance', _calc_variance)
def _calc_std_dev(self):
"""\
Standard deviation. Square root of the variance.
See the module docstring for more about statistical moments.
>>> kurtosis(range(9))
1.99125
With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more
centrally distributed than the normal curve.
"""
data, s_dev = self.data, self.std_dev
if len(data) > 1 and s_dev > 0:
return (sum(self._get_pow_diffs(4)) /
float((len(data) - 1) * (s_dev ** 4)))
else:
return 0.0
kurtosis = _StatsProperty('kurtosis', _calc_kurtosis)
def _calc_pearson_type(self):
precision = self._pearson_precision
skewness = self.skewness
kurtosis = self.kurtosis
beta1 = skewness ** 2.0
beta2 = kurtosis * 1.0
# TODO: range checks?
c0 = (4 * beta2) - (3 * beta1)
c1 = skewness * (beta2 + 3)
c2 = (2 * beta2) - (3 * beta1) - 6
if round(c1, precision) == 0:
if round(beta2, precision) == 3:
return self._get_quantile(self._get_sorted_data(), 0.5)
median = _StatsProperty('median', _calc_median)
def _calc_iqr(self):
"""Inter-quartile range (IQR) is the difference between the 75th
percentile and 25th percentile. IQR is a robust measure of
dispersion, like standard deviation, but safer to compare
between datasets, as it is less influenced by outliers.
>>> iqr([1, 2, 3, 4, 5])
2
>>> iqr(range(1001))
500
"""
return self.get_quantile(0.75) - self.get_quantile(0.25)
iqr = _StatsProperty('iqr', _calc_iqr)
def _calc_trimean(self):
"""The trimean is a robust measure of central tendency, like the
median, that takes the weighted average of the median and the
upper and lower quartiles.
>>> trimean([2, 1, 3])
2.0
>>> trimean(range(97))
48.0
>>> trimean(list(range(96)) + [1066]) # 1066 is an arbitrary outlier
48.0
"""
sorted_data = self._get_sorted_data()
gq = lambda q: self._get_quantile(sorted_data, q)