Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.explorer = MockExplorer(
Namespace(
host="mock_connection",
include_schema=(),
exclude_schema=(),
include_table=(),
exclude_table=(),
catalog=None,
)
)
col1 = Column("c1")
col2 = Column("c2")
col2._pii = [PiiTypes.LOCATION]
schema = Schema("s1")
table = Table(schema, "t1")
table.add_child(col1)
table.add_child(col2)
schema = Schema("testSchema")
schema.add_child(table)
self.explorer._database = Database("database")
self.explorer._database.add_child(schema)
def test_location(self):
types = self.parser.scan("Jonathan is in Bangalore")
self.assertTrue(PiiTypes.LOCATION in types)
def setUp(self):
col1 = Column("c1")
col2 = Column("c2")
col2._pii = [PiiTypes.LOCATION]
self.schema = Schema("testSchema")
table = Table(self.schema, "t1")
table.add_child(col1)
table.add_child(col2)
self.schema.add_child(table)
def get_full_pii_table():
full_pii_table = Table("test_store", "full_pii")
full_pii_a = Column("a")
full_pii_a.add_pii_type(PiiTypes.PHONE)
full_pii_b = Column("b")
full_pii_b.add_pii_type(PiiTypes.ADDRESS)
full_pii_b.add_pii_type(PiiTypes.LOCATION)
full_pii_table.add_child(full_pii_a)
full_pii_table.add_child(full_pii_b)
return full_pii_table
schema = Schema("public")
table = Table(schema, "full_pii")
table.add_child(Column("name"))
table.add_child(Column("location"))
table.scan(self.data_generator)
self.assertTrue(table.has_pii())
cols = table.get_children()
self.assertTrue(cols[0].has_pii())
self.assertTrue(cols[1].has_pii())
self.assertEqual(
{
"columns": [
{"name": "name", "pii_types": [PiiTypes.PERSON]},
{"name": "location", "pii_types": [PiiTypes.LOCATION]},
],
"has_pii": True,
"name": "full_pii",
},
table.get_dict(),
)
def scan(self, text):
"""Scan the text and return an array of PiiTypes that are found"""
logging.debug("Processing '%s'", text)
doc = self.nlp(text)
types = set()
for ent in doc.ents:
logging.debug("Found %s", ent.label_)
if ent.label_ == "PERSON":
types.add(PiiTypes.PERSON)
if ent.label_ == "GPE":
types.add(PiiTypes.LOCATION)
if ent.label_ == "DATE":
types.add(PiiTypes.BIRTH_DATE)
logging.debug("PiiTypes are %s", ",".join(str(x) for x in list(types)))
return list(types)