Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def ingest(self, file_path, entity):
"""Ingestor implementation."""
entity.schema = model.get('Pages')
self.ooxml_extract_metadata(file_path, entity)
pdf_path = self.document_to_pdf(file_path, entity)
self.pdf_alternative_extract(entity, pdf_path)
def make_entity(self, schema, parent=None):
schema = model.get(schema)
prefix = self.stage.job.dataset.name
entity = model.make_entity(schema, key_prefix=prefix)
self.make_child(parent, entity)
return entity
def suggest_property():
prefix = request.args.get('prefix', '').lower().strip()
tag_request(prefix=prefix)
schema = request.args.get('schema', Entity.THING)
matches = []
for prop in model.get(schema).properties.values():
match = not len(prefix)
match = prefix in prop.name.lower()
match = match or prefix in prop.label.lower()
if match:
matches.append({
'id': prop.name,
'quid': prop.name,
'name': prop.label,
'r:score': 100,
'n:type': {
'id': '/properties/property',
'name': 'Property'
}
})
return jsonify({
"code": "/api/status/ok",
def ingest(self, file_path, entity):
entity.schema = model.get('Workbook')
self.ooxml_extract_metadata(file_path, entity)
try:
book = load_workbook(file_path, read_only=True)
except Exception as err:
raise ProcessingException('Invalid Excel file: %s' % err)
try:
for name in book.sheetnames:
table = self.manager.make_entity('Table', parent=entity)
table.make_id(entity.id, name)
table.set('title', name)
log.debug('Sheet: %s', name)
self.emit_row_tuples(table, self.generate_rows(book[name]))
if table.has('csvHash'):
self.manager.emit_entity(table)
except Exception as err:
def model(self):
return model.get(self.schema)
def ingest(self, file_path, entity):
"""Ingestor implementation."""
if entity.schema == model.get('Document'):
entity.schema = model.get('Folder')
if file_path is None or not file_path.is_dir():
return
self.crawl(self.manager, file_path, parent=entity)
def parse_entry(emitter, group, rows):
entity = emitter.make('LegalEntity')
entity.make_id(group)
sanction = emitter.make('Sanction')
sanction.make_id(entity.id, 'Sanction')
sanction.add('entity', entity)
sanction.add('authority', 'HM Treasury Financial sanctions targets')
sanction.add('country', 'gb')
for row in rows:
if row.pop('Group Type') == 'Individual':
entity.schema = model.get('Person')
row.pop('Alias Type', None)
name1 = row.pop('Name 1')
entity.add('firstName', name1, quiet=True)
name2 = row.pop('Name 2')
name3 = row.pop('Name 3')
name4 = row.pop('Name 4')
name5 = row.pop('Name 5')
name6 = row.pop('Name 6')
entity.add('lastName', name6, quiet=True)
name = jointext(name1, name2, name3, name4, name5, name6)
if not entity.has('name'):
entity.add('name', name)
else:
entity.add('alias', name)
entity.add('title', row.pop('Title'), quiet=True)
sanction.add('program', row.pop('Regime'))
def ingest(self, file_path, entity):
entity.schema = model.get('Email')
try:
with open(file_path, 'rb') as fh:
msg = email.message_from_binary_file(fh, policy=default)
except (MessageError, ValueError, IndexError) as err:
raise ProcessingException('Cannot parse email: %s' % err) from err
self.extract_msg_headers(entity, msg)
self.resolve_message_ids(entity)
for part in msg.walk():
self.parse_part(entity, part)
def _iter_value_entities(type_, value):
query = {
'query': {'term': {type_.group: value}},
'_source': {'includes': ['schema', 'properties']}
}
schemata = model.get_type_schemata(type_)
index = entities_read_index(schema=schemata)
for res in scan(es, index=index, query=query):
entity_id = res.get('_id')
source = res.get('_source')
properties = source.get('properties')
schema = model.get(source.get('schema'))
for prop in schema.properties.values():
if prop.type != type_:
continue
values = properties.get(prop.name)
values = type_.normalize_set(values)
if value in values:
yield entity_id, prop
def ingest(self, file_path, entity):
entity.schema = model.get('HyperText')
html_body = self.read_file_decoded(entity, file_path)
text = self.extract_html_content(entity, html_body)
entity.add('bodyText', text)