Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
verbose: bool
Whether or not to show detection progress.
Returns
-------
rows: generator
Returns file as a generator over rows as dictionaries.
Raises
------
NoDetectionResult
When the dialect detection fails.
"""
if encoding is None:
encoding = get_encoding(filename)
with open(filename, "r", newline="", encoding=encoding) as fid:
if dialect is None:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data, verbose=verbose)
fid.seek(0)
r = DictReader(fid, dialect=dialect)
for row in r:
yield row
dialect = det.detect(sample)
except clevercsv.Error:
raise DetectionError
if dialect is None:
return None
return dict(
delimiter=dialect.delimiter,
quotechar=dialect.quotechar,
escapechar=dialect.escapechar,
)
if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
encoding = clevercsv.utils.get_encoding(filename)
print(detector(filename, encoding))
else:
print(f"Usage: {sys.argv[0]} filename", file=sys.stderr)
Note that using less than the entire file will speed up detection, but
can reduce the accuracy of the detected dialect.
**kwargs:
Additional keyword arguments for the ``pandas.read_csv`` function. You
can specify the file encoding here if needed, and it will be used
during dialect detection.
"""
if not (os.path.exists(filename) and os.path.isfile(filename)):
raise ValueError("Filename must be a regular file")
pd = import_optional_dependency("pandas")
# Use provided encoding or detect it, and record it for pandas
enc = kwargs.get("encoding") or get_encoding(filename)
kwargs["encoding"] = enc
with open(filename, "r", newline="", encoding=enc) as fid:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data)
csv_dialect = dialect.to_csv_dialect()
# This is used to catch pandas' warnings when a dialect is supplied.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="^Conflicting values for .*",
category=pd.errors.ParserWarning,
)
df = pd.read_csv(filename, *args, dialect=csv_dialect, **kwargs)
return df
verbose : bool
Enable verbose mode during detection.
method : str
Dialect detection method to use. Either 'normal' for normal form
detection, 'consistency' for the consistency measure, or 'auto' for
first normal and then consistency.
Returns
-------
dialect : SimpleDialect
The detected dialect as a :class:`SimpleDialect`, or None if detection
failed.
"""
enc = encoding or get_encoding(filename)
with open(filename, "r", newline="", encoding=enc) as fp:
data = fp.read(num_chars) if num_chars else fp.read()
dialect = Detector().detect(data, verbose=verbose, method=method)
return dialect
verbose: bool
Whether or not to show detection progress.
Returns
-------
rows: generator
Returns file as a generator over rows.
Raises
------
NoDetectionResult
When the dialect detection fails.
"""
if encoding is None:
encoding = get_encoding(filename)
with open(filename, "r", newline="", encoding=encoding) as fid:
if dialect is None:
data = fid.read(num_chars) if num_chars else fid.read()
dialect = Detector().detect(data, verbose=verbose)
if dialect is None:
raise NoDetectionResult()
fid.seek(0)
r = reader(fid, dialect)
yield from r