Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
minimum_data (str): Minimum data before starting search. Default value is first time of index.
gap (str or int): Time between examples. Default value is window size.
If an integer, search will start on the first event after the minimum data.
drop_empty (bool): Whether to drop empty slices. Default value is True.
label_type (str): The label type can be "continuous" or "categorical". Default value is the inferred label type.
verbose (bool): Whether to render progress bar. Default value is True.
*args: Positional arguments for labeling function.
**kwargs: Keyword arguments for labeling function.
Returns:
lt (LabelTimes): Calculated labels with cutoff times.
"""
assert self.labeling_function, 'missing labeling function(s)'
self._check_example_count(num_examples_per_instance, gap)
self.window_size = self.window_size or len(df)
gap = to_offset(gap or self.window_size)
is_label_search = isinstance(num_examples_per_instance, dict)
search = (LabelSearch if is_label_search else ExampleSearch)(num_examples_per_instance)
records = self._run_search(
df=df,
search=search,
gap=gap,
min_data=minimum_data,
drop_empty=drop_empty,
verbose=verbose,
*args,
**kwargs,
)
lt = LabelTimes(
def _set_window_size(self, window_size):
"""Set and format initial window size parameter.
Args:
window_size (str or int): Duration of each data slice.
The default value for window size is all future data.
"""
if window_size is not None:
window_size = to_offset(window_size)
self.window_size = window_size
Args:
df (DataFrame): Data frame to create slices on.
num_examples_per_instance (int): Number of examples per unique instance of target entity.
minimum_data (str): Minimum data before starting search. Default value is first time of index.
gap (str or int): Time between examples. Default value is window size.
If an integer, search will start on the first event after the minimum data.
drop_empty (bool): Whether to drop empty slices. Default value is True.
verbose (bool): Whether to print metadata about slice. Default value is False.
Returns:
ds (generator): Returns a generator of data slices.
"""
self._check_example_count(num_examples_per_instance, gap)
self.window_size = self.window_size or len(df)
gap = to_offset(gap or self.window_size)
groups = self.set_index(df).groupby(self.target_entity)
if num_examples_per_instance == -1:
num_examples_per_instance = float('inf')
for key, df in groups:
slices = self._slice(df=df, gap=gap, min_data=minimum_data, drop_empty=drop_empty)
for ds in slices:
ds.context.target_instance = key
if verbose: print(ds)
yield ds
if ds.context.slice_number >= num_examples_per_instance:
break