Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# -*- coding: utf-8 -*-
"""
Calculates the total number of events subscribers
have done over a certain time period.
"""
from typing import List
from ..utilities import EventsTablesUnion
from .metaclasses import SubscriberFeature
class TotalSubscriberEvents(SubscriberFeature):
"""
Class representing the number of calls made over a certain time
period. This can be subset to either texts or calls and incoming
outgoing.
Parameters
----------
start : str
iso format date range for the beginning of the time frame,
e.g. 2016-01-01 or 2016-01-01 14:03:01.
stop : str
As above.
direction : {'both','out','in'}, default 'both'
A string representing whether to include only outgoing events,
only incoming, or both.
event_type : str, default 'ALL'
valid_stats = {"sum", "avg", "max", "min", "median", "stddev", "variance"}
valid_time_buckets = [
"second",
"minute",
"hour",
"day",
"week",
"month",
"quarter",
"year",
"century",
]
class DistanceSeries(SubscriberFeature):
"""
Per subscriber time series of distance in meters from some reference location.
For the time series, returns the first date/datetime within the time bucket for each
row, e.g. 1/1/1999 for a year bucket, 1/1/2026, 1/2/2026 and so on for a month bucket.
Notes
-----
The datetime column will contain dates for time buckets longer than an hour, and datetimes for
time buckets less than a day.
Parameters
----------
subscriber_locations : SubscriberLocations
A subscriber locations query with a lon-lat spatial unit to build the distance series against.
reference_location : BaseLocation or tuple of int, default (0, 0)
The set of home locations from which to calculate distance at each sighting, or a tuple
References
----------
[1] Veronique Lefebvre, https://docs.google.com/document/d/1BVOAM8bVacen0U0wXbxRmEhxdRbW8J_lyaOcUtDGhx8/edit
"""
from typing import List, Tuple, Union, Optional
from flowmachine.core import Query
from .metaclasses import SubscriberFeature
from flowmachine.utils import time_period_add, standardise_date
from ..utilities.sets import UniqueSubscribers
from functools import reduce
class TotalActivePeriodsSubscriber(SubscriberFeature):
"""
Breaks a time span into distinct time periods (currently integer number
of days). For each subscriber counts the total number of time periods in
which each subscriber was seen.
For instance we might ask for a month worth of data, break down our
month into 10 3 day chunks, and ask for each subscriber how many of these
three day chunks each subscriber was present in the data in.
Parameters
----------
start : str
iso-format date, start of the analysis.
total_periods : int
Total number of periods to break your time span into
period_length : int, default 1
valid_stats = {"count", "sum", "avg", "max", "min", "median", "stddev", "variance"}
valid_characteristics = {
"width",
"height",
"depth",
"weight",
"display_width",
"display_height",
}
from ...core import Table
from .metaclasses import SubscriberFeature
from .subscriber_tacs import SubscriberHandsets
class HandsetStats(SubscriberFeature):
"""
This class calculates statistics associated with numeric fields of the TAC
table, such as width, height, etc.
A subscriber might use different phones for different periods of time. In
order to take the different usage periods into account we must calculate
weighted statistics of the desired characteristics. As such, here we
calculate the weighted characteristic, weighted by the number of seconds a
subscriber held that handset. Given that we only learn about changes in
handset when an event occurs, this average will be biased. For instance,
if a subscriber changes his handset straight after an event occur, we will
be tracking the handset that the subscriber used to handle the event and
not the handset he actually switched to. We will only learn about the new
handset once a new event occurs.
We further assume that the handset held before the first observed event in
# -*- coding: utf-8 -*-
"""
Proportion of events that are outgoing and
incoming per subscriber.
"""
from typing import List
from ..utilities import EventsTablesUnion
from .metaclasses import SubscriberFeature
class ProportionEventOutgoing(SubscriberFeature):
"""
Find the proportion of interactions initiated by a
given subscriber.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : tuple of float, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
table : str, default 'all'
subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn'
Either msisdn, or imei, the column that identifies the subscriber.
subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None
If provided, string or list of string which are msisdn or imeis to limit
elif self.method == "last":
query = """
SELECT DISTINCT ON(t.subscriber) t.subscriber as subscriber, tac
FROM ({}) t
ORDER BY t.subscriber, time DESC
""".format(
self.subscriber_tacs.get_query()
)
else:
raise ValueError(
f"Unsupported method. Valid values are: 'last', 'most-common'"
)
return query
class SubscriberHandsets(SubscriberFeature):
"""
Class representing all the handsets for which a subscriber has been associated.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
hours : 2-tuple of floats, default 'all'
Restrict the analysis to only a certain set
of hours within each day.
table : str, default 'all'
subscriber_identifier : str, default 'msisdn'
The focus of the analysis, usually either
'msisdn', 'imei'
# -*- coding: utf-8 -*-
"""
Number of call days over a certain period
of time that a given subscriber makes. This feature
represent the number of days that a subscriber
is connected to a given tower, or within a given location in a
specified time period.
"""
from typing import List
from .metaclasses import SubscriberFeature
from ..utilities.subscriber_locations import SubscriberLocations
class CallDays(SubscriberFeature):
"""
Class representing the number of call days over a certain
period of time. Call days represent the number of days that a
subscriber was connected to a tower in the given period of time.
Parameters
----------
subscriber_locations : SubscriberLocations
Locations of subscribers' interactions
See Also
--------
flowmachine.features.subscriber_locations
"""
def __init__(self, subscriber_locations: SubscriberLocations):
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# -*- coding: utf-8 -*-
"""
Statistics for the distance between subscriber's own modal
location and its contacts' modal location.
"""
from .metaclasses import SubscriberFeature
valid_stats = {"count", "sum", "avg", "max", "min", "median", "stddev", "variance"}
class ContactReferenceLocationStats(SubscriberFeature):
"""
This class calculates statistics of the distance between a subscriber's reference point and its contacts' reference point.
Parameters
----------
contact_balance: flowmachine.features.ContactBalance
An instance of `ContactBalance` which lists the contacts of the
targeted subscribers along with the number of events between them.
contact_locations: flowmachine.core.Query
A flowmachine Query instance that contains a subscriber column. In
addition to that the query must have a spatial unit or the target
geometry column that contains the subscribers' reference locations.
statistic : {'count', 'sum', 'avg', 'max', 'min', 'median', 'mode', 'stddev', 'variance'}, default 'sum'
Defaults to sum, aggregation statistic over the durations.
geom_column:
The column containing the subscribers' reference locations. This is
subscriber_subset=subscriber_subset,
)
self.method = method
self.tacs = Table("infrastructure.tacs")
self.joined = self.subscriber_tac.join(self.tacs, "tac", "id", how="left")
super().__init__()
@property
def column_names(self) -> List[str]:
return self.joined.column_names
def _make_query(self):
return self.joined.get_query()
class SubscriberHandsetCharacteristic(SubscriberFeature):
"""
Class extracting a single characteristic from the handset.
Parameters
----------
start, stop : str
iso-format start and stop datetimes
characteristic: {
"brand",
"depth",
"display_colors",
"display_height",
"display_type",
"display_width",
"hardware_bluetooth",
"hardware_edge",
from flowmachine.utils import standardise_date
valid_stats = {
"count",
"sum",
"avg",
"max",
"min",
"median",
"mode",
"stddev",
"variance",
}
class TopUpBalance(SubscriberFeature):
"""
This class calculates statistics associated with top-up balances.
Top-up balance is a stock variable. As such, here we calculate the weighted
balance, weighted by the number of seconds a subscriber held that balance.
Given that we only learn about changes in balance when a top-up event
occurs, this average will be biased upwards. Unfortunately, we do not have
information about depletions to the balance caused by CDR events such as
calls, SMS, MDS, etc since this information is not provided by the MNOs.
For instance, if a subscriber with zero balance top-up a certain amount and
spends the whole balance right away, the subscriber's effective balance
during the whole period is 0 and so should be its average. However, because
we do not account for topup balance depletions its average balance is
biased upwards by the recharge amount.
However, given the nature of the data we take the conservative approach