Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
or streaming item-by-item.
Args:
filepath (str): Path to file on disk from which data will be read.
mode (str): Mode with which ``filepath`` is opened.
encoding (str): Name of the encoding used to decode or encode the data
in ``filepath``. Only applicable in text mode.
lines (bool): If False, all data is read in at once; otherwise, data is
read in one line at a time.
Yields:
object: Next JSON item; could be a dict, list, int, float, str,
depending on the value of ``lines``.
"""
_validate_read_mode(mode)
with open_sesame(filepath, mode=mode, encoding=encoding) as f:
if lines is False:
yield json.load(f)
else:
for line in f:
yield json.loads(line)
def write_file(content, filepath, mode='wt', encoding=None,
make_dirs=False):
"""
Write ``content`` to disk at ``filepath``. Files with appropriate extensions
are compressed with gzip or bz2 automatically. Any intermediate folders
not found on disk may automatically be created.
See Also:
:func:`open_sesame() `
"""
with open_sesame(filepath, mode=mode, encoding=encoding,
make_dirs=make_dirs) as f:
f.write(content)
Args:
filepath (str): /path/to/file on disk from which json objects will be streamed,
where all json objects are mashed together, end-to-end, on a single line,;
for example::
{"title": "Harrison Bergeron", "text": "The year was 2081, and everybody was finally equal."}{"title": "2BR02B", "text": "Everything was perfectly swell."}
mode (str, optional)
encoding (str, optional)
buffersize (int, optional): number of bytes to read in as a chunk
Yields:
dict: Next valid JSON object, converted to native Python equivalent.
"""
with open_sesame(filepath, mode=mode, encoding=encoding) as f:
buffer = ''
for chunk in iter(functools.partial(f.read, buffersize), ''):
buffer += chunk
while buffer:
try:
result, index = JSON_DECODER.raw_decode(buffer)
yield result
buffer = buffer[index:]
# not enough data to decode => read another chunk
except ValueError:
break
Yields:
List[obj]: Next row, whose elements are strings and/or floats.
If ``fieldnames`` is None or 'infer' doesn't detect a header row.
*or*
Dict[str, obj]: Next row, as an ordered dictionary of (key, value) pairs,
where keys are column names and values are the corresponding strings
and/or floats. If ``fieldnames`` is a list of column names or 'infer'
detects a header row.
See Also:
https://docs.python.org/3/library/csv.html#csv.reader
"""
has_header = False
with open_sesame(filepath, mode="rt", encoding=encoding, newline="") as f:
if dialect == "infer" or fieldnames == "infer":
sniffer = compat.csv.Sniffer()
# add pipes to the list of preferred delimiters, and put spaces last
sniffer.preferred = [",", "\t", "|", ";", ":", " "]
# sample = "".join(f.readline() for _ in range(5)) # f.read(1024)
sample = f.read(1024)
if dialect == "infer":
dialect = sniffer.sniff(sample)
if fieldnames == "infer":
has_header = sniffer.has_header(sample)
f.seek(0)
if has_header is True:
csv_reader = compat.csv.DictReader(
f,
fieldnames=None,
dialect=dialect,
def read_spacy_docs(filepath):
"""
Stream ``spacy.Doc`` s from disk at ``filepath`` where they were serialized
via pickle.
Args:
filepath (str): /path/to/file on disk from which spacy docs will be streamed
Yields:
Next deserialized ``spacy.Doc``.
"""
with open_sesame(filepath, mode='rb') as f:
for spacy_doc in compat.pickle.load(f):
yield spacy_doc
]
mode (str, optional)
encoding (str, optional)
prefix (str, optional): if '', the entire JSON object will be read in at once;
if 'item', each item in a top-level array will be read in successively;
if 'item.text', each array item's 'text' value will be read in successively
Yields:
Next matching JSON object; could be a dict, list, int, float, str,
depending on the value of ``prefix``.
Note:
Refer to ``ijson`` at https://pypi.python.org/pypi/ijson/ for usage details.
"""
with open_sesame(filepath, mode=mode, encoding=encoding) as f:
if prefix == '':
yield json.load(f)
else:
for item in ijson.items(f, prefix):
yield item
Args:
filepath (str): /path/to/file on disk from which rows will be streamed
encoding (str)
dialect (str): a grouping of formatting parameters that determine how
the tabular data is parsed when reading/writing; if 'infer', the
first 1024 bytes of the file is analyzed, producing a best guess for
the correct dialect
delimiter (str): 1-character string used to separate fields in a row
Yields:
List[obj]: Next row, whose elements are strings and/or numbers.
.. seealso:: https://docs.python.org/3/library/csv.html#csv.reader
"""
with open_sesame(filepath, mode='rt', encoding=encoding, newline='') as f:
if dialect == 'infer':
sniffer = compat.csv.Sniffer()
# add pipes to the list of preferred delimiters, and put spaces last
sniffer.preferred = [',', '\t', '|', ';', ':', ' ']
dialect = sniffer.sniff(f.read(1024))
f.seek(0)
for row in compat.csv.reader(f, dialect=dialect, delimiter=delimiter):
yield row
encoding (str)
make_dirs (bool)
dialect (str): a grouping of formatting parameters that determine how
the tabular data is parsed when reading/writing
delimiter (str): 1-character string used to separate fields in a row
See Also:
https://docs.python.org/3/library/csv.html#csv.writer
Note:
Here, CSV is used as a catch-all term for *any* delimited file
format, and ``delimiter=','`` is merely the function's default value.
Other common delimited formats are TSV (tab-separated-value, with
``delimiter='\\t'``) and PSV (pipe-separated-value, with ``delimiter='|'``.
"""
with open_sesame(filepath, mode='wt', encoding=encoding, newline='') as f:
csv_writer = compat.csv.writer(f, dialect=dialect, delimiter=delimiter)
csv_writer.writerows(rows)
Args:
filepath (str): Path to file on disk from which data will be read.
mode (str): Mode with which ``filepath`` is opened.
encoding (str): Name of the encoding used to decode or encode the data
in ``filepath``. Only applicable in text mode.
lines (bool): If False, all data is read in at once; otherwise, data is
read in one line at a time.
Yields:
str: Next line of text to read in.
If ``lines`` is False, wrap this output in :func:`next()` to conveniently
access the full text.
"""
_validate_read_mode(mode)
with open_sesame(filepath, mode=mode, encoding=encoding) as f:
if lines is False:
yield f.read()
else:
for line in f:
yield line
def read_file(filepath, mode='rt', encoding=None):
"""
Read the full contents of a file. Files compressed with gzip, bz2, or lzma
are handled automatically.
"""
with open_sesame(filepath, mode=mode, encoding=encoding) as f:
return f.read()