Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Event generator for streaming results.
events = self.event_stream(
query_string=self._config.query,
return_fields=[self._config.field]
)
lsh, minhashes = similarity.new_lsh_index(
events, field=self._config.field,
delimiters=self._config.delimiters, num_perm=self._config.num_perm,
threshold=self._config.threshold)
total_num_events = len(minhashes)
for key, minhash in minhashes.items():
event_id, event_type, index_name = key
event_dict = dict(_id=event_id, _type=event_type, _index=index_name)
event = interface.Event(event_dict, self.datastore)
score = similarity.calculate_score(lsh, minhash, total_num_events)
attributes_to_add = {'similarity_score': score}
event.add_attributes(attributes_to_add)
# Commit the event to the datastore.
event.commit()
msg = 'Similarity scorer processed {0:d} events for data_type {1:s}'
return msg.format(total_num_events, self._config.data_type)
"""The sketch analyzer for chained events."""
from __future__ import unicode_literals
import collections
import uuid
from timesketch.lib import emojis
from timesketch.lib.analyzers import interface
from timesketch.lib.analyzers import manager
from timesketch.lib.analyzers import chain_plugins # pylint: disable=unused-import
from timesketch.lib.analyzers.chain_plugins import manager as chain_manager
class ChainSketchPlugin(interface.BaseSketchAnalyzer):
"""Sketch analyzer for chained events.
The purpose of the chain analyzer is to chain together events that can
be described as linked, either by sharing some common entitites, or
one event being a derivative of another event. An example of this
would be that a browser downloads an executable, which then later gets
executed. The signs of execution could lie in multiple events, from
different sources, but they are all linked or chained together. This
could help an analyst see the connection between these separate but
chained events. Another example could be a document written and then
compressed into a ZIP file, which would then be exfilled through some
means. If the document and the ZIP file are chained together it could be
easier for the analyst to track the meaning of an exfil event involving the
compressed file.
"""
Dictionary with configuration parameters.
"""
config_dict = self.CONFIG_REGISTRY.get(self._data_type)
# If there is no config for this data_type, use default config and set
# the query based on the data_type.
if not config_dict:
config_dict = self.DEFAULT_CONFIG
config_dict['query'] = 'data_type:"{0}"'.format(self._data_type)
config_dict['index_name'] = self._index_name
config_dict['data_type'] = self._data_type
return config_dict
class SimilarityScorer(interface.BaseSketchAnalyzer):
"""Score events based on Jaccard distance."""
NAME = 'similarity_scorer'
DEPENDENCIES = frozenset()
def __init__(self, index_name, sketch_id, data_type=None):
"""Initializes a similarity scorer.
Args:
index_name: Elasticsearch index name.
data_type: Name of the data_type.
"""
self._config = None
if data_type:
self._config = SimilarityScorerConfig(index_name, data_type)