Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
`buffersize`, `tempdir` and `cache` arguments under the
:func:`petl.transform.sorts.sort` function.
See also :func:`petl.transform.dedup.unique` and
:func:`petl.transform.dedup.distinct`.
"""
return DuplicatesView(table, key=key, presorted=presorted,
buffersize=buffersize, tempdir=tempdir, cache=cache)
Table.duplicates = duplicates
class DuplicatesView(Table):
def __init__(self, source, key=None, presorted=False, buffersize=None,
tempdir=None, cache=True):
if presorted:
self.source = source
else:
self.source = sort(source, key, buffersize=buffersize,
tempdir=tempdir, cache=cache)
self.key = key
def __iter__(self):
return iterduplicates(self.source, self.key)
def iterduplicates(source, key):
# assume source is sorted
def wrapper(*args, **kwargs):
_innerresult = f(*args, **kwargs)
if isinstance(_innerresult, Table):
return InteractiveWrapper(_innerresult)
else:
return _innerresult
wrapper.__name__ = f.__name__
| 'B' | 2 | False |
+-----+----+-------+
| 'C' | 9 | None |
+-----+----+-------+
See also :func:`petl.transform.reshape.flatten`.
"""
return UnflattenView(*args, **kwargs)
Table.unflatten = unflatten
class UnflattenView(Table):
def __init__(self, *args, **kwargs):
if len(args) == 2:
self.input = args[0]
self.period = args[1]
elif len(args) == 3:
self.input = values(args[0], args[1])
self.period = args[2]
else:
assert False, 'invalid arguments'
self.missing = kwargs.get('missing', None)
def __iter__(self):
inpt = self.input
period = self.period
missing = self.missing
# convenience for working with sqlite3
if isinstance(dbo, string_types):
import sqlite3
dbo = sqlite3.connect(dbo)
needs_closing = True
try:
_todb(table, dbo, tablename, schema=schema, commit=commit,
truncate=False)
finally:
if needs_closing:
dbo.close()
Table.appenddb = appenddb
| foo | bar | baz | quux |
+=====+=====+========+=======+
| 1 | 3.0 | (9+3j) | 'aaa' |
+-----+-----+--------+-------+
| 2 | 1.3 | (7+2j) | None |
+-----+-----+--------+-------+
"""
return convertall(table, numparser(strict), **kwargs)
Table.convertnumbers = convertnumbers
class FieldConvertView(Table):
def __init__(self, source, converters=None, failonerror=None,
errorvalue=None, where=None, pass_row=False):
self.source = source
if converters is None:
self.converters = dict()
elif isinstance(converters, dict):
self.converters = converters
elif isinstance(converters, (tuple, list)):
self.converters = dict([(i, v) for i, v in enumerate(converters)])
else:
raise ArgumentError('unexpected converters: %r' % converters)
self.failonerror = (config.failonerror if failonerror is None
else failonerror)
self.errorvalue = errorvalue
self.where = where
| 2 | 0.56 | 0.56 |
+-----+------+------+
| 2 | 'c' | None |
+-----+------+------+
| 'c' | 'c' | 0.72 |
+-----+------+------+
Use the `missing` keyword argument to control which value is treated as
missing (`None` by default).
"""
return FillLeftView(table, missing=missing)
Table.fillleft = fillleft
class FillLeftView(Table):
def __init__(self, table, missing=None):
self.table = table
self.missing = missing
def __iter__(self):
return iterfillleft(self.table, self.missing)
def iterfillleft(table, missing):
it = iter(table)
hdr = next(it)
yield tuple(hdr)
| 24 | 42 | 'M' | 12 |
+----------+-----+-----+-----+
| 68 | 42 | 'F' | 34 |
+----------+-----+-----+-----+
| 112 | 42 | '-' | 56 |
+----------+-----+-----+-----+
"""
return AddFieldsView(table, field_defs, missing=missing)
Table.addfields = addfields
class AddFieldsView(Table):
def __init__(self, source, field_defs, missing=None):
# ensure rows are all the same length
self.source = stack(source, missing=missing)
# convert tuples to FieldDefinitions, if necessary
self.field_defs = field_defs
def __iter__(self):
return iteraddfields(self.source, self.field_defs)
def iteraddfields(source, field_defs):
it = iter(source)
hdr = next(it)
flds = list(map(text_type, hdr))
| 'girl' | 19 | 24 | 9 |
+--------+-------+------+-----+
See also :func:`petl.transform.reshape.recast`.
"""
return PivotView(table, f1, f2, f3, aggfun, missing=missing,
presorted=presorted, buffersize=buffersize,
tempdir=tempdir, cache=cache)
Table.pivot = pivot
class PivotView(Table):
def __init__(self, source, f1, f2, f3, aggfun, missing=None,
presorted=False, buffersize=None, tempdir=None, cache=True):
if presorted:
self.source = source
else:
self.source = sort(source, key=(f1, f2), buffersize=buffersize,
tempdir=tempdir, cache=cache)
self.f1, self.f2, self.f3 = f1, f2, f3
self.aggfun = aggfun
self.missing = missing
def __iter__(self):
return iterpivot(self.source, self.f1, self.f2, self.f3, self.aggfun,
self.missing)
If `presorted` is True, it is assumed that the data are already sorted by
the given key, and the `buffersize`, `tempdir` and `cache` arguments are
ignored. Otherwise, the data are sorted, see also the discussion of the
`buffersize`, `tempdir` and `cache` arguments under the
:func:`petl.transform.sorts.sort` function.
See also :func:`petl.transform.dedup.unique` and
:func:`petl.transform.dedup.distinct`.
"""
return DuplicatesView(table, key=key, presorted=presorted,
buffersize=buffersize, tempdir=tempdir, cache=cache)
Table.duplicates = duplicates
class DuplicatesView(Table):
def __init__(self, source, key=None, presorted=False, buffersize=None,
tempdir=None, cache=True):
if presorted:
self.source = source
else:
self.source = sort(source, key, buffersize=buffersize,
tempdir=tempdir, cache=cache)
self.key = key
def __iter__(self):
return iterduplicates(self.source, self.key)
+----------------------+----------------------+---------------------+
| 0.029797219438070344 | 0.21863797480360336 | 0.5053552881033624 |
+----------------------+----------------------+---------------------+
| 0.026535969683863625 | 0.1988376506866485 | 0.6498844377795232 |
+----------------------+----------------------+---------------------+
...
Note that the data are generated on the fly and are not stored in memory,
so this function can be used to simulate very large tables.
"""
return RandomTable(numflds, numrows, wait=wait, seed=seed)
class RandomTable(Table):
def __init__(self, numflds=5, numrows=100, wait=0, seed=None):
self.numflds = numflds
self.numrows = numrows
self.wait = wait
if seed is None:
self.seed = datetime.datetime.now()
else:
self.seed = seed
def __iter__(self):
nf = self.numflds
nr = self.numrows
seed = self.seed