Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns: word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids;
it numpy tensors for placeholders (is_training - bool)
If length of the longest sentence in the document is greater than parameter "max_training_sentences",
the returning method calls the 'truncate_example' function.
"""
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in custom_layers.flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"]
num_words = sum(len(s) for s in sentences)
speakers = custom_layers.flatten(example["speakers"])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
text_len = np.array([len(s) for s in sentences])
doc_key = example["doc_key"]
if self.emb_lowercase:
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):
sentences[i][j] = word.lower()
for i, sentence in enumerate(sentences):
sentences[i] = list(sentences[i])
if isinstance(example["clusters"], tuple):
clusters = example["clusters"][0]
else:
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in custom_layers.flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"][0]
num_words = sum(len(s) for s in sentences)
speakers = custom_layers.flatten(example["speakers"][0])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
text_len = np.array([len(s) for s in sentences])
doc_key = example["doc_key"][0]
if self.emb_lowercase:
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):
sentences[i][j] = word.lower()
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):
for tensorflow placeholders.
Args:
example: dict from observation
is_training: True or False value, use as a returned parameter or flag
Returns: word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids;
it numpy tensors for placeholders (is_training - bool)
If length of the longest sentence in the document is greater than parameter "max_training_sentences",
the returning method calls the 'truncate_example' function.
"""
if isinstance(example["clusters"], tuple):
clusters = example["clusters"][0]
else:
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in custom_layers.flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"][0]
num_words = sum(len(s) for s in sentences)
speakers = custom_layers.flatten(example["speakers"][0])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
Takes a dictionary from the observation and transforms it into a set of tensors
for tensorflow placeholders.
Args:
example: dict from observation
is_training: True or False value, use as a returned parameter or flag
Returns: word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids;
it numpy tensors for placeholders (is_training - bool)
If length of the longest sentence in the document is greater than parameter "max_training_sentences",
the returning method calls the 'truncate_example' function.
"""
if isinstance(example["clusters"], tuple):
clusters = example["clusters"]
else:
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"]
num_words = sum(len(s) for s in sentences)
speakers = flatten(example["speakers"])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
def tensorize_example(self, example, is_training):
"""
Takes a dictionary from the observation and transforms it into a set of tensors
for tensorflow placeholders.
Args:
example: dict from observation
is_training: True or False value, use as a returned parameter or flag
Returns: word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids;
it numpy tensors for placeholders (is_training - bool)
If length of the longest sentence in the document is greater than parameter "max_training_sentences",
the returning method calls the 'truncate_example' function.
"""
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in custom_layers.flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"]
num_words = sum(len(s) for s in sentences)
speakers = custom_layers.flatten(example["speakers"])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
text_len = np.array([len(s) for s in sentences])
if isinstance(example["clusters"], tuple):
clusters = example["clusters"]
else:
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in flatten(clusters))
gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id
sentences = example["sentences"]
num_words = sum(len(s) for s in sentences)
speakers = flatten(example["speakers"])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.filter_widths))
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
text_len = np.array([len(s) for s in sentences])
doc_key = example["doc_key"]
if self.emb_lowercase:
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):
sentences[i][j] = word.lower()
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):