Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
This class calculates the Pareto proportion for a subscriber's interactions -
that fraction of their contacts who account for 80% of their interactions.
"""
from typing import List, Union
from flowmachine.features.subscriber.contact_balance import ContactBalance
from flowmachine.features.subscriber.subscriber_degree import SubscriberDegree
from flowmachine.features.subscriber.metaclasses import SubscriberFeature
from flowmachine.features.utilities.direction_enum import Direction
from flowmachine.utils import standardise_date
class ParetoInteractions(SubscriberFeature):
"""
Calculates the proportion of a subscriber's contacts who
account for some proportion of their interactions, ala the
Pareto Principle.
Returns a two columns, a subscriber, pareto - the proportion
of that subscribers contacts who account for the requested
proportion (0.8, by default) of their interactions in this time period.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : tuple of float, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
This method also allows one to get the subset of counterparts together
with subscribers by turning the `include_subscribers` flag to `True`.
Parameters
----------
include_subscribers: bool, default True
Wether to include the list of subscribers in the subset as well.
"""
return _ContactBalanceSubset(
contact_balance=self, include_subscribers=include_subscribers
)
class _ContactBalanceSubset(SubscriberFeature):
"""
This internal class returns the subset of counterparts. In some cases, we
are interested in obtaining information about the subset of subscribers
contacts.
This method also allows one to get the subset of counterparts together with
subscribers by turning the `include_subscribers` flag to `True`.
Parameters
----------
include_subscribers: bool, default False
Wether to include the list of subscribers in the subset as well.
"""
def __init__(self, contact_balance, include_subscribers=False):
super().__init__()
@property
def column_names(self) -> List[str]:
return ["subscriber", "msisdn_counterpart", "value"]
def _make_query(self):
return f"""
SELECT subscriber, msisdn_counterpart, {self.statistic}(duration) as value
FROM ({self.unioned_query.get_query()}) u
WHERE outgoing
GROUP BY subscriber, msisdn_counterpart
"""
class PairedPerLocationSubscriberCallDurations(SubscriberFeature):
"""
This class returns the total amount of time a subscriber spent calling
each other subscriber within the period, faceted by their respective
locations at the time.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
Either msisdn, or imei, the column that identifies the subscriber.
subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
If provided, string or list of string which are msisdn or imeis to limit
def column_names(self) -> List[str]:
return ["subscriber"] + self.spatial_unit.location_id_columns + ["value"]
def _make_query(self):
loc_cols = ", ".join(self.spatial_unit.location_id_columns)
where_clause = make_where(self.direction.get_filter_clause())
return f"""
SELECT subscriber, {loc_cols}, {self.statistic}(duration) as value
FROM ({self.unioned_query.get_query()}) u
{where_clause}
GROUP BY subscriber, {loc_cols}
"""
class PairedSubscriberCallDurations(SubscriberFeature):
"""
This class returns the total amount of time a subscriber spent calling
each other subscriber within the period.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
Either msisdn, or imei, the column that identifies the subscriber.
subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
If provided, string or list of string which are msisdn or imeis to limit
results to; or, a query or table which has a column with a name matching
@property
def column_names(self) -> List[str]:
return ["subscriber", "value"]
def _make_query(self):
where_clause = make_where(self.direction.get_filter_clause())
return f"""
SELECT subscriber, {self.statistic}(duration) as value FROM
({self.unioned_query.get_query()}) u
{where_clause}
GROUP BY subscriber
"""
class PerLocationSubscriberCallDurations(SubscriberFeature):
"""
This class returns the total amount of time a subscriber spent calling
within the period, optionally limited to only calls they made, or received,
faceted by their location at the time.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
Either msisdn, or imei, the column that identifies the subscriber.
subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
If provided, string or list of string which are msisdn or imeis to limit
self.contact_reciprocal_query = contact_reciprocal
@property
def column_names(self):
return ["subscriber", "proportion"]
def _make_query(self):
return f"""
SELECT subscriber, AVG(reciprocal::int) AS proportion
FROM ({self.contact_reciprocal_query.get_query()}) R
GROUP BY subscriber
"""
class ProportionEventReciprocal(SubscriberFeature):
"""
This class calculates the proportion of events with a reciprocal contact
per subscriber. It is possible to fine-tune the period for which a
reciprocal contact must have happened.
A reciprocal contact is a contact who has initiated contact with the
subscriber and who also has been the counterpart of an initatiated contact
by the subscriber.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
# -*- coding: utf-8 -*-
"""
Calculate metrics related with distance between caller and her/his counterparts.
"""
from typing import List, Union
from flowmachine.features.utilities.events_tables_union import EventsTablesUnion
from flowmachine.features.spatial.distance_matrix import DistanceMatrix
from flowmachine.features.subscriber.metaclasses import SubscriberFeature
from flowmachine.features.utilities.direction_enum import Direction
from flowmachine.utils import make_where, standardise_date
valid_stats = {"count", "sum", "avg", "max", "min", "median", "stddev", "variance"}
class DistanceCounterparts(SubscriberFeature):
"""
This class returns metrics related with the distance between event
initiator and her/his counterparts.
It assumes that the ID column uniquely identifies the event initiator and
their counterparts' event. Choose only tables for which this assumption is
true. In some cases, asynchronous communication like SMS might not be
tagged with an ID that allows one to recover the counterpart event.
Distances are measured in km.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Calculates the proportion of calls that a
subscriber makes during night time. Nocturnal
hour definitions can be specified.
"""
from typing import Union
from flowmachine.features.utilities.events_tables_union import EventsTablesUnion
from flowmachine.features.subscriber.metaclasses import SubscriberFeature
from flowmachine.features.utilities.direction_enum import Direction
from flowmachine.utils import make_where, standardise_date
class NocturnalEvents(SubscriberFeature):
"""
Represents the percentage of events that a subscriber make/receives which
began at night. The definition of night is configurable.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : tuple of ints, default (20, 4)
Hours that count as being nocturnal. e.g. (20,4)
will be the times after 8pm and before 4 am.
subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
Either msisdn, or imei, the column that identifies the subscriber.
subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
If provided, string or list of string which are msisdn or imeis to limit
results to; or, a query or table which has a column with a name matching
from abc import ABCMeta, abstractmethod
from typing import List, Union
from flowmachine.core import make_spatial_unit
from flowmachine.core.spatial_unit import AnySpatialUnit
from flowmachine.features.utilities.events_tables_union import EventsTablesUnion
from flowmachine.features.utilities.subscriber_locations import SubscriberLocations
from flowmachine.features.subscriber.contact_balance import ContactBalance
from flowmachine.features.subscriber.metaclasses import SubscriberFeature
from flowmachine.features.utilities.direction_enum import Direction
from flowmachine.utils import make_where, standardise_date
class BaseEntropy(SubscriberFeature, metaclass=ABCMeta):
""" Base query for calculating entropy of subscriber features. """
@property
def column_names(self) -> List[str]:
return ["subscriber", "entropy"]
def _make_query(self):
return f"""
SELECT
subscriber,
-1 * SUM( relative_freq * LN( relative_freq ) ) AS entropy
FROM ({self._relative_freq_query}) u
GROUP BY subscriber
"""
# -*- coding: utf-8 -*-
"""
Classes for searching and dealing with reciprocal contacts.
"""
from typing import Union
from flowmachine.core.mixins.graph_mixin import GraphMixin
from flowmachine.features.subscriber.contact_balance import ContactBalance
from flowmachine.features.utilities.events_tables_union import EventsTablesUnion
from flowmachine.features.subscriber.metaclasses import SubscriberFeature
from flowmachine.features.utilities.direction_enum import Direction
from flowmachine.utils import make_where, standardise_date
class ContactReciprocal(GraphMixin, SubscriberFeature):
"""
This class classifies a subscribers contact as reciprocal or not. In
addition to that, it calculates the number of incoming and outgoing events
between the subscriber and her/his counterpart as well as the proportion
that those events represent in total incoming and outgoing events.
A reciprocal contact is a contact who has initiated contact with the
subscriber and who also has been the counterpart of an initatiated contact
by the subscriber.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set