Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def filters_from_filtered_tasks(filtered_tasks):
filters = []
if filtered_tasks:
for t in filtered_tasks:
spec = t.split(":")
if len(spec) == 1:
filters.append(track.TaskNameFilter(spec[0]))
elif len(spec) == 2:
if spec[0] == "type":
filters.append(track.TaskOpTypeFilter(spec[1]))
else:
raise exceptions.SystemSetupError(
"Invalid format for filtered tasks: [%s]. Expected [type] but got [%s]." % (t, spec[0]))
else:
raise exceptions.SystemSetupError("Invalid format for filtered tasks: [%s]" % t)
return filters
number_of_documents=2000000,
compressed_size_in_bytes=123502,
uncompressed_size_in_bytes=148039748
),
# The type used for documents being percolated:
track.Type(
name="content",
mapping_file_name="document-mapping.json"
)
])
],
challenges=[track.Challenge(
name="append-no-conflicts",
description="Append documents without any ID conflicts",
benchmark={
track.BenchmarkPhase.index: track.IndexBenchmarkSettings(index_settings=percolatorIndexSettings),
track.BenchmarkPhase.stats: track.LatencyBenchmarkSettings(warmup_iteration_count=100, iteration_count=100),
track.BenchmarkPhase.search: track.LatencyBenchmarkSettings(warmup_iteration_count=100, iteration_count=100,
queries=[
PercolatorQuery(content="president bush"),
PercolatorQuery(content="saddam hussein"),
PercolatorQuery(content="hurricane katrina"),
PercolatorQuery(content="google"),
PercolatorQueryNoScoring(content="google"),
PercolatorQueryWithHighlighting(),
PercolatorQuery(content="ignore me"),
PercolatorQueryNoScoring(content="ignore me")
])
}
),
track.Challenge(
name="append-fast-no-conflicts",
description="append-only, using 4 GB heap, and these settings: <pre>%s</pre>" % track.benchmarkFastSettings,
benchmark={
track.BenchmarkPhase.index: track.IndexBenchmarkSettings(index_settings=track.benchmarkFastSettings)
}
),
track.Challenge(
name="append-fast-with-conflicts",
description="the same as fast, except we pass in an ID (worst case random UUID) for each document and 25% of the time the ID "
"already exists in the index.",
benchmark={
track.BenchmarkPhase.index: track.IndexBenchmarkSettings(index_settings=track.benchmarkFastSettings,
id_conflicts=track.IndexIdConflict.SequentialConflicts)
}
description="This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk "
"request against Elasticsearch",
source_root_url="http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geonames",
index_name=GEO_NAMES_INDEX_NAME,
type_name=GEO_NAMES_TYPE_NAME,
number_of_documents=8647880,
compressed_size_in_bytes=197857614,
uncompressed_size_in_bytes=2790927196,
document_file_name="documents.json.bz2",
mapping_file_name="mappings.json",
challenges=[
track.Challenge(
name="append-no-conflicts",
description="Append documents without any ID conflicts",
benchmark={
track.BenchmarkPhase.index: track.IndexBenchmarkSettings(index_settings=track.greenNodeSettings),
track.BenchmarkPhase.stats: track.LatencyBenchmarkSettings(warmup_iteration_count=100, iteration_count=100),
track.BenchmarkPhase.search: track.LatencyBenchmarkSettings(warmup_iteration_count=1000, iteration_count=1000,
queries=[
DefaultQuery(),
TermQuery(),
PhraseQuery(),
CountryAggQuery(use_request_cache=False),
CountryAggQuery(suffix="_cached", use_request_cache=True),
ScrollQuery()
])
}
),
track.Challenge(
name="append-fast-no-conflicts",
description="append-only, using 4 GB heap, and these settings: <pre>%s</pre>" % track.benchmarkFastSettings,
benchmark={
mapping_file_name="mappings.json",
document_file_name="documents-%s.json.bz2" % date,
number_of_documents=num_docs,
compressed_size_in_bytes=compressed,
uncompressed_size_in_bytes=uncompressed)]))
return indices
loggingTrackSpec = track.Track(
name="logging",
short_description="Logging benchmark",
description="This benchmark indexes HTTP server log data from the 1998 world cup.",
source_root_url="http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/logging",
indices=create_indices(),
challenges=[
track.Challenge(
name="append-no-conflicts",
description="Append documents without any ID conflicts",
benchmark={
track.BenchmarkPhase.index: track.IndexBenchmarkSettings(index_settings=track.greenNodeSettings),
track.BenchmarkPhase.stats: track.LatencyBenchmarkSettings(warmup_iteration_count=100, iteration_count=100),
track.BenchmarkPhase.search: track.LatencyBenchmarkSettings(warmup_iteration_count=1000, iteration_count=1000,
queries=[
DefaultQuery(),
TermQuery(),
RangeQuery(),
HourlyAggQuery(),
ScrollQuery()
])
}
),
track.Challenge(
def parse_task(self, task_spec, ops, challenge_name, default_warmup_iterations=None, default_iterations=None,
default_warmup_time_period=None, default_time_period=None, completed_by_name=None):
op_spec = task_spec["operation"]
if isinstance(op_spec, str) and op_spec in ops:
op = ops[op_spec]
else:
# may as well an inline operation
op = self.parse_operation(op_spec, error_ctx="inline operation in challenge %s" % challenge_name)
schedule = self._r(task_spec, "schedule", error_ctx=op.name, mandatory=False, default_value="deterministic")
task = track.Task(name=self._r(task_spec, "name", error_ctx=op.name, mandatory=False, default_value=op.name),
operation=op,
meta_data=self._r(task_spec, "meta", error_ctx=op.name, mandatory=False),
warmup_iterations=self._r(task_spec, "warmup-iterations", error_ctx=op.name, mandatory=False,
default_value=default_warmup_iterations),
iterations=self._r(task_spec, "iterations", error_ctx=op.name, mandatory=False, default_value=default_iterations),
warmup_time_period=self._r(task_spec, "warmup-time-period", error_ctx=op.name, mandatory=False,
default_value=default_warmup_time_period),
time_period=self._r(task_spec, "time-period", error_ctx=op.name, mandatory=False,
default_value=default_time_period),
clients=self._r(task_spec, "clients", error_ctx=op.name, mandatory=False, default_value=1),
# this will work because op_name must always be set, i.e. it is never `None`.
completes_parent=(op.name == completed_by_name),
schedule=schedule,
# this is to provide scheduler-specific parameters for custom schedulers.
params=task_spec)
if task.warmup_iterations is not None and task.time_period is not None:
param_source = self._r(op_spec, "param-source", error_ctx=error_ctx, mandatory=False)
# just pass-through all parameters by default
params = op_spec
try:
op = track.OperationType.from_hyphenated_string(op_type_name)
if "include-in-reporting" not in params:
params["include-in-reporting"] = not op.admin_op
op_type = op.name
self.logger.debug("Using built-in operation type [%s] for operation [%s].", op_type, op_name)
except KeyError:
self.logger.info("Using user-provided operation type [%s] for operation [%s].", op_type_name, op_name)
op_type = op_type_name
try:
return track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=params, param_source=param_source)
except exceptions.InvalidSyntax as e:
raise TrackSyntaxError("Invalid operation [%s]: %s" % (op_name, str(e)))
},
"highlight": {
"fields": {
"body": {}
}
}
}''')
percolatorTrackSpec = track.Track(
name="percolator",
short_description="Percolator benchmark based on 2M AOL queries",
description="This benchmark indexes 2M AOL queries and use the percolate query to match",
source_root_url="http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/percolator",
indices=[
track.Index(name="queries", types=[
# The type for the percolator queries:
track.Type(
name="percolator",
mapping_file_name="queries-mapping.json",
document_file_name="queries.json.bz2",
number_of_documents=2000000,
compressed_size_in_bytes=123502,
uncompressed_size_in_bytes=148039748
),
# The type used for documents being percolated:
track.Type(
name="content",
mapping_file_name="document-mapping.json"
)
])
],
# Note that starting with ES 2.0, the initial call to search() returns already the first result page
# so we have to retrieve one page less
for i in range(self.PAGES - 1):
hit_count = len(r["hits"]["hits"])
if hit_count == 0:
# done
break
r = es.scroll(scroll_id=self.scroll_id, scroll="10s")
def close(self, es):
if self.scroll_id:
es.clear_scroll(scroll_id=self.scroll_id)
self.scroll_id = None
geonamesTrackSpec = track.Track(
name="geonames",
short_description="Standard benchmark in Rally (8.6M POIs from Geonames)",
description="This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk "
"request against Elasticsearch",
source_root_url="http://benchmarks.elasticsearch.org.s3.amazonaws.com/corpora/geonames",
index_name=GEO_NAMES_INDEX_NAME,
type_name=GEO_NAMES_TYPE_NAME,
number_of_documents=8647880,
compressed_size_in_bytes=197857614,
uncompressed_size_in_bytes=2790927196,
document_file_name="documents.json.bz2",
mapping_file_name="mappings.json",
challenges=[
track.Challenge(
name="append-no-conflicts",
description="Append documents without any ID conflicts",
def create_indices():
indices = []
for index in logging_indices:
if index:
date, uncompressed, compressed, num_docs = index
indices.append(track.Index(name=LOGGING_INDEX_PREFIX + date, types=[
track.Type(
name=LOGGING_TYPE_NAME,
mapping_file_name="mappings.json",
document_file_name="documents-%s.json.bz2" % date,
number_of_documents=num_docs,
compressed_size_in_bytes=compressed,
uncompressed_size_in_bytes=uncompressed)]))
return indices