Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_null_scan_column(self):
col = Column("col")
col.scan(None, [RegexScanner(), NERScanner()])
self.assertFalse(col.has_pii())
self.assertEqual({"pii_types": [], "name": "col"}, col.get_dict())
def test_positive_scan_column(self):
col = Column("col")
col.scan("Jonathan Smith", [RegexScanner(), NERScanner()])
self.assertTrue(col.has_pii())
self.assertEqual(
{"pii_types": [PiiTypes.PERSON], "name": "col"}, col.get_dict()
)
def test_negative_scan_column(self):
col = Column("col")
col.scan("abc", [RegexScanner(), NERScanner()])
self.assertFalse(col.has_pii())
self.assertEqual({"pii_types": [], "name": "col"}, col.get_dict())
def setUp(self):
self.parser = RegexScanner()
def scan(self, generator):
self.logger.debug("Scanning table name %s" % self.get_name())
scanners = [RegexScanner(), NERScanner()]
for row in generator(
column_list=self.get_children(), schema_name=self._schema, table_name=self
):
for col, val in zip(self.get_children(), row):
col.scan(val, scanners)
for col in self.get_children():
[self._pii.add(p) for p in col.get_pii_types()]
self.logger.debug("%s has %s", self.get_name(), self.get_pii_types_str())
% (os.path.abspath(self._path), mime_type)
)
else:
for root, subdirs, files in os.walk(self._path):
for filename in files:
file_path = os.path.join(root, filename)
mime_type = magic.from_file(file_path, mime=True)
logging.debug(
"\t- full path: %s, mime_type: %s" % (file_path, mime_type)
)
self._files.append(File(file_path, mime_type))
context = {
"tokenizer": Tokenizer(),
"regex": RegexScanner(),
"ner": NERScanner(),
}
for f in self._files:
f.scan(context)