Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def scan(self):
f1 = File("/tmp/1", "text/plain")
f1._pii.add(PiiTypes.BIRTH_DATE)
f2 = File("/tmp/2", "application/pdf")
f2._pii.add(PiiTypes.UNSUPPORTED)
self._files.append(f1)
self._files.append(f2)
def test_address(self):
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("address"))
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("city"))
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("state"))
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("country"))
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("zipcode"))
self.assertTrue(PiiTypes.ADDRESS in self.parser.scan("postal"))
def get_full_pii_table():
full_pii_table = Table("test_store", "full_pii")
full_pii_a = Column("a")
full_pii_a.add_pii_type(PiiTypes.PHONE)
full_pii_b = Column("b")
full_pii_b.add_pii_type(PiiTypes.ADDRESS)
full_pii_b.add_pii_type(PiiTypes.LOCATION)
full_pii_table.add_child(full_pii_a)
full_pii_table.add_child(full_pii_b)
return full_pii_table
def test_gender(self):
self.assertTrue(PiiTypes.GENDER in self.parser.scan("gender"))
def get_partial_pii_table():
partial_pii_table = Table("test_store", "partial_pii")
partial_pii_a = Column("a")
partial_pii_a.add_pii_type(PiiTypes.PHONE)
partial_pii_b = Column("b")
partial_pii_b.add_pii_type(PiiTypes.ADDRESS)
partial_pii_table.add_child(partial_pii_a)
partial_pii_table.add_child(partial_pii_b)
return partial_pii_table
if ent.label_ == "PERSON":
types.add(PiiTypes.PERSON)
if ent.label_ == "GPE":
types.add(PiiTypes.LOCATION)
if ent.label_ == "DATE":
types.add(PiiTypes.BIRTH_DATE)
logging.debug("PiiTypes are %s", ",".join(str(x) for x in list(types)))
return list(types)
class ColumnNameScanner(Scanner):
regex = {
PiiTypes.PERSON: re.compile(
"^.*(firstname|fname|lastname|lname|"
"fullname|maidenname|_name|"
"nickname|name_suffix|name).*$",
re.IGNORECASE,
),
PiiTypes.EMAIL: re.compile("^.*(email|e-mail|mail).*$", re.IGNORECASE),
PiiTypes.BIRTH_DATE: re.compile(
"^.*(date_of_birth|dateofbirth|dob|"
"birthday|date_of_death|dateofdeath).*$",
re.IGNORECASE,
),
PiiTypes.GENDER: re.compile("^.*(gender).*$", re.IGNORECASE),
PiiTypes.NATIONALITY: re.compile("^.*(nationality).*$", re.IGNORECASE),
PiiTypes.ADDRESS: re.compile(
"^.*(address|city|state|county|country|" "zipcode|postal|zone|borough).*$",
re.IGNORECASE,
),
PiiTypes.EMAIL: re.compile("^.*(email|e-mail|mail).*$", re.IGNORECASE),
PiiTypes.BIRTH_DATE: re.compile(
"^.*(date_of_birth|dateofbirth|dob|"
"birthday|date_of_death|dateofdeath).*$",
re.IGNORECASE,
),
PiiTypes.GENDER: re.compile("^.*(gender).*$", re.IGNORECASE),
PiiTypes.NATIONALITY: re.compile("^.*(nationality).*$", re.IGNORECASE),
PiiTypes.ADDRESS: re.compile(
"^.*(address|city|state|county|country|" "zipcode|postal|zone|borough).*$",
re.IGNORECASE,
),
PiiTypes.USER_NAME: re.compile("^.*user(id|name|).*$", re.IGNORECASE),
PiiTypes.PASSWORD: re.compile("^.*pass.*$", re.IGNORECASE),
PiiTypes.SSN: re.compile("^.*(ssn|social).*$", re.IGNORECASE),
}
def scan(self, text):
types = set()
for pii_type in self.regex:
if self.regex[pii_type].match(text) is not None:
types.add(pii_type)
logging.debug("PiiTypes are %s", ",".join(str(x) for x in list(types)))
return list(types)
def scan(self, text):
"""Scan the text and return an array of PiiTypes that are found"""
regex_result = CommonRegex(text)
types = []
if regex_result.phones: # pylint: disable=no-member
types.append(PiiTypes.PHONE)
if regex_result.emails: # pylint: disable=no-member
types.append(PiiTypes.EMAIL)
if regex_result.credit_cards: # pylint: disable=no-member
types.append(PiiTypes.CREDIT_CARD)
if regex_result.street_addresses: # pylint: disable=no-member
types.append(PiiTypes.ADDRESS)
return types