Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python
import math
from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import Quantiles
from agate.warns import warn_null_calculation
class Percentiles(Aggregation):
"""
Divide a column into 100 equal-size groups using the "CDF" method.
See `this explanation `_
of the various methods for computing percentiles.
"Zeroth" (min value) and "Hundredth" (max value) percentiles are included
for reference and intuitive indexing.
A reference implementation was provided by
`pycalcstats `_.
This aggregation can not be applied to a :class:`.TableSet`.
:param column_name:
The name of a column containing :class:`.Number` data.
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Boolean
class HasNulls(Aggregation):
"""
Check if the column contains null values.
:param column_name:
The name of the column to check.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Boolean()
def run(self, table):
return None in table.columns[self._column_name].values()
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import max_precision
class MaxPrecision(Aggregation):
"""
Find the most decimal places present for any value in this column.
:param column_name:
The name of the column to be searched.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Number
from agate.exceptions import DataTypeError
class Sum(Aggregation):
"""
Calculate the sum of a column.
:param column_name:
The name of a column containing :class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Boolean
class Any(Aggregation):
"""
Check if any value in a column passes a test.
The test may be omitted when checking :class:`.Boolean` data.
:param column_name:
The name of the column to check.
:param test:
A function that takes a value and returns `True` or `False`.
"""
def __init__(self, column_name, test=None):
self._column_name = column_name
self._test = test
def get_aggregate_data_type(self, table):
return Boolean()
#!/usr/bin/env python
from decimal import Decimal
from agate.aggregations.base import Aggregation
from agate.data_types import Number, Text
from agate.exceptions import DataTypeError
class MaxLength(Aggregation):
"""
Find the length of the longest string in a column.
:param column_name:
The name of a column containing :class:`.Text` data.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Text):
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Date, DateTime, Number
from agate.exceptions import DataTypeError
class Min(Aggregation):
"""
Find the minimum value in a column.
This aggregation can be applied to columns containing :class:`.Date`,
:class:`.DateTime`, or :class:`.Number` data.
:param column_name:
The name of the column to be searched.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
column = table.columns[self._column_name]
if (isinstance(column.data_type, Number) or
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.aggregations.mean import Mean
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.warns import warn_null_calculation
class Variance(Aggregation):
"""
Calculate the sample variance of a column.
For the population variance see :class:`.PopulationVariance`.
:param column_name:
The name of a column containing :class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
self._mean = Mean(column_name)
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.data_types import Boolean
class All(Aggregation):
"""
Check if all values in a column pass a test.
:param column_name:
The name of the column to check.
:param test:
Either a single value that all values in the column are compared against
(for equality) or a function that takes a column value and returns
`True` or `False`.
"""
def __init__(self, column_name, test):
self._column_name = column_name
if callable(test):
self._test = test
else:
#!/usr/bin/env python
from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.aggregations.percentiles import Percentiles
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import Quantiles
from agate.warns import warn_null_calculation
class Deciles(Aggregation):
"""
Calculate the deciles of a column based on its percentiles.
Deciles will be equivalent to the 10th, 20th ... 90th percentiles.
"Zeroth" (min value) and "Tenth" (max value) deciles are included for
reference and intuitive indexing.
See :class:`Percentiles` for implementation details.
This aggregation can not be applied to a :class:`.TableSet`.
:param column_name:
The name of a column containing :class:`.Number` data.
"""
def __init__(self, column_name):