Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
metadata = tf.train.SequenceExample()
filepath = os.path.join(video_dir, "%s.mp4" % row["id"])
actions = row["actions"].split(";")
action_indices = []
action_strings = []
action_start_times = []
action_end_times = []
for action in actions:
if not action:
continue
string, start, end = action.split(" ")
action_indices.append(int(string[1:]))
action_strings.append(bytes23(string))
action_start_times.append(int(float(start) * SECONDS_TO_MICROSECONDS))
action_end_times.append(int(float(end) * SECONDS_TO_MICROSECONDS))
ms.set_example_id(bytes23(row["id"]), metadata)
ms.set_clip_data_path(bytes23(filepath), metadata)
ms.set_clip_start_timestamp(0, metadata)
ms.set_clip_end_timestamp(
int(float(row["length"]) * SECONDS_TO_MICROSECONDS), metadata)
ms.set_segment_start_timestamp(action_start_times, metadata)
ms.set_segment_end_timestamp(action_end_times, metadata)
ms.set_segment_label_string(action_strings, metadata)
ms.set_segment_label_index(action_indices, metadata)
yield metadata
urlretrieve = urllib.request.urlretrieve
else:
urlretrieve = urllib.urlretrieve
for split in SPLITS:
reader = csv.DictReader(SPLITS[split].split("\n"))
all_metadata = []
for row in reader:
url = row["url"]
basename = url.split("/")[-1]
local_path = os.path.join(self.path_to_data, basename)
if not tf.io.gfile.exists(local_path):
urlretrieve(url, local_path)
for start_time in range(0, int(row["duration"]), SECONDS_PER_EXAMPLE):
metadata = tf.train.SequenceExample()
ms.set_example_id(bytes23(basename + "_" + str(start_time)),
metadata)
ms.set_clip_data_path(bytes23(local_path), metadata)
ms.set_clip_start_timestamp(start_time * MICROSECONDS_PER_SECOND,
metadata)
ms.set_clip_end_timestamp(
(start_time + SECONDS_PER_EXAMPLE) * MICROSECONDS_PER_SECOND,
metadata)
ms.set_clip_label_index((int(row["label index"]),), metadata)
ms.set_clip_label_string((bytes23(row["label string"]),),
metadata)
all_metadata.append(metadata)
random.seed(47)
random.shuffle(all_metadata)
shard_names = [self._indexed_shard(split, i) for i in range(NUM_SHARDS)]
writers = [tf.io.TFRecordWriter(shard_name) for shard_name in shard_names]
with _close_on_exit(writers) as writers:
"""
annotations_files, label_map = download_output
with open(annotations_files[key], "r") as annotations:
reader = csv.reader(annotations)
for i, csv_row in enumerate(reader):
if i == 0: # the first row is the header
continue
# rename the row with a constitent set of names.
if len(csv_row) == 5:
row = dict(zip(["label_name", "video", "start", "end", "split"],
csv_row))
else:
row = dict(zip(["video", "start", "end", "split"],
csv_row))
metadata = tf.train.SequenceExample()
ms.set_example_id(bytes23(row["video"] + "_" + row["start"]),
metadata)
ms.set_clip_media_id(bytes23(row["video"]), metadata)
ms.set_clip_alternative_media_id(bytes23(row["split"]), metadata)
if video_path_format_string:
filepath = video_path_format_string.format(**row)
ms.set_clip_data_path(bytes23(filepath), metadata)
assert row["start"].isdigit(), "Invalid row: %s" % str(row)
assert row["end"].isdigit(), "Invalid row: %s" % str(row)
if "label_name" in row:
ms.set_clip_label_string([bytes23(row["label_name"])], metadata)
if label_map:
ms.set_clip_label_index([label_map[row["label_name"]]], metadata)
yield metadata