Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _reverse_inputs_and_indices(encoded_sentence_forward, output_index_list_forward):
"""Reverse sequence of character codes and list of output indices."""
if len(encoded_sentence_forward) >= 2: # sentence should at least have start, end characters
start_sentence_value = first(encoded_sentence_forward)
end_sentence_value = last(encoded_sentence_forward)
encoded_sentence_length = len(encoded_sentence_forward)
# Reverse all character codes in the sentence without affecting the first and last elements
# (those are special start_sentence_value and end_sentence_value)
encoded_sentence_back = [start_sentence_value]
encoded_sentence_back.extend(encoded_sentence_forward[-2:0:-1]) # skip start and end
encoded_sentence_back.append(end_sentence_value)
else:
encoded_sentence_back = []
# compute backward output indices
if len(output_index_list_forward) == 0:
locations_before_tokens = []
else:
locations_before_tokens = [0] + output_index_list_forward[:-1]
def update_canonical(canonical_pk):
canonical = SeriesAnnotation.objects.select_for_update().get(pk=canonical_pk)
raw_annos = canonical.raw_annotations.prefetch_related('sample_annotations') \
.filter(is_active=True).order_by('pk')
# Disable if no raw sources
canonical.is_active = bool(raw_annos)
kappas = [a.best_kappa for a in raw_annos if a.best_kappa]
best_cohens_kappa = max(kappas) if kappas else None
# Update canonical sample annotations
source = first(a for a in raw_annos if a.agrees_with_id) \
or first(a for a in raw_annos if a.best_kappa == best_cohens_kappa
and a.best_kappa is not None and a.best_kappa > 0) \
or first(raw_annos)
if source and not is_samples_concordant(canonical, source):
canonical.sample_annotations.all().delete()
canonical.fill_samples([(s.sample_id, s.annotation)
for s in source.sample_annotations.all()])
# Update canonical stats
if source:
canonical.column = source.column
canonical.regex = source.regex
# Calculate fleiss kappa for all existing annotations/validations
canonical.fleiss_kappa = _fleiss_kappa([a.sample_annotations.all() for a in raw_annos]) \
if raw_annos else None
canonical.best_cohens_kappa = best_cohens_kappa
canonical.annotations = raw_annos.count()
canonical.authors = len(set(a.created_by_id for a in raw_annos))
canonical.save()
validate_semantics_of_table_or_ctas(table_design)
if "depends_on" in table_design:
raise TableDesignSemanticError("upstream table '%s' has dependencies listed" % table_design["name"])
constraints = table_design.get("constraints", [])
constraint_types_in_design = [constraint_type for constraint in constraints for constraint_type in constraint]
for constraint_type in constraint_types_in_design:
if constraint_type in ("natural_key", "surrogate_key"):
raise TableDesignSemanticError(
"upstream table '%s' has unexpected %s constraint" % (table_design["name"], constraint_type)
)
[split_by_name] = table_design.get("extract_settings", {}).get("split_by", [None])
if split_by_name:
split_by_column = fy.first(fy.where(table_design["columns"], name=split_by_name))
if split_by_column.get("skipped", False):
raise TableDesignSemanticError("split-by column must not be skipped")
if not split_by_column.get("not_null", False):
raise TableDesignSemanticError("split-by column must have not-null constraint")
if split_by_column["type"] not in ("int", "long", "date", "timestamp"):
raise TableDesignSemanticError(
"type of split-by column must be int, long, date or timestamp, not '{}'".format(split_by_column["type"])
)
token_lists: list of list of str. sentences and tokens in document.
document: str. raw text of predicted document
Returns:
document_span_lists: A list of lists of tuples of int (start, end)
"""
sentence_lengths = [last(span_list)[-1] for span_list in span_lists]
sentence_starts = []
offset = 0
# We have to base our location off of the original document to deal with weird sentences
# For example: "Yuliya loves cats. Ray loves dogs." or the case where as sentence is split
# Mid-word due to exceeding the max sentence list
# We select the first length, and the second sentence and so on to get the offsets
for length, token_list in zip(sentence_lengths, token_lists):
next_start = document[offset:].find(first(token_list))
offset = offset + next_start
sentence_starts.append(offset)
offset = offset + length
# Modify our sentence indices so that the sentences line up with the original text
document_span_lists = []
for start, span_list in zip(sentence_starts, span_lists):
document_span_lists.append(
[[span_start + start, span_end + start] for (span_start, span_end) in span_list]
)
return document_span_lists
def get_only_element_from_collection(one_element_collection):
"""Assert that the collection has exactly one element, then return that element."""
if len(one_element_collection) != 1:
raise AssertionError(
u"Expected a collection with exactly one element, but got: {}".format(
one_element_collection
)
)
return funcy.first(one_element_collection)
def _shift_spans_to_start_at_zero(spans):
"""Shift all spans in the sentence by the same amount so the first token starts at zero.
Args:
spans: list of lists of character-level spans, one span per token, one list per sentence
Returns:
list of list of spans shifted so that first token in each sentence starts at zero
"""
adjusted_spans = []
for span_list in spans:
if len(span_list) > 0:
offset = first(span_list)[0]
adjusted_spans.append([(span[0] - offset, span[1] - offset) for span in span_list])
else:
adjusted_spans.append([])
return adjusted_spans
def account_from_auths():
return first(op.get('required_auths', op.get('required_posting_auths')))
def build_sqoop_partition_options(
self, relation: RelationDescription, partition_key: Optional[str], table_size: int
) -> List[str]:
"""
Build the partitioning-related arguments for Sqoop.
"""
if partition_key:
column = fy.first(fy.where(relation.table_design["columns"], name=partition_key))
if column["type"] in ("date", "timestamp"):
quoted_key_arg = """CAST(DATE_PART('epoch', "{}") AS BIGINT)""".format(partition_key)
else:
quoted_key_arg = '"{}"'.format(partition_key)
if relation.num_partitions:
# num_partitions explicitly set in the design file overrides the dynamic determination.
num_mappers = min(relation.num_partitions, self.max_partitions)
else:
num_mappers = self.maximize_partitions(table_size)
if num_mappers > 1:
return ["--split-by", quoted_key_arg, "--num-mappers", str(num_mappers)]
# Use 1 mapper if either there is no partition key, or if the partitioner returns only one partition
return ["--num-mappers", "1"]