Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
with open('/app/data/embs/sevenyears_embs.bin', 'wb') as f:
for i in tqdm(list(range(len(videos)))):
path = '/app/data/embs/{:07d}.bin'.format(i)
if os.path.isfile(path):
byts = open(path, 'rb').read()
if len(byts) / (4 * 128) != len(all_ids[i]):
print(i)
f.write(byts)
f.flush()
print('done')
exit()
if __name__ == "__main__":
cfg = cluster_config(
num_workers=0, worker=worker_config('n1-standard-64'),
pipelines=[face_embedding.FaceEmbeddingPipeline])
with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
# if True:
# db_wrapper = ScannerWrapper.create()
db = db_wrapper.db
embs = embed_faces(
db,
videos=[v.for_scannertools() for v in videos],
frames=frames,
faces=[ScannerSQLTable(Face, v, num_elements=len(f),
filter='query_frame.shot_boundary = false')
for v, f in zip(videos, frames)],
if False:
with Timer('benchmark'):
videos = videos[:50]
def run_pipeline(db, videos, frames, **kwargs):
return detect_genders(
db,
db_videos=videos,
videos=[v.for_scannertools() for v in videos],
frames=frames,
faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
for v, f in zip(videos, frames)],
cache=False,
**kwargs)
cfg = cluster_config(
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[GenderDetectionPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=16)
])]
bench('gender', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
exit()
videos = videos
cfg = cluster_config(
num_workers=100, worker=worker_config('n1-standard-64'),
pipelines=[gender_detection.GenderDetectionPipeline])
.values('number').order_by('number')]
if False:
with Timer('benchmark'):
videos = videos[:30]
def run_pipeline(db, videos, frames, **kwargs):
return embed_faces(
db,
videos=[v.for_scannertools() for v in videos],
frames=frames,
faces=[ScannerSQLTable(Face, v) #num_elements=len(f))
for v, f in zip(videos, frames)],
cache=False,
**kwargs)
cfg = cluster_config(
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
])]
bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
exit()
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
def load_frames():
return par_for(frames_for_video, videos, workers=8)
frames = pcache.get('emb_frames', load_frames, force=True)
cfg = cluster_config(
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[GenderDetectionPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=16)
])]
bench('gender', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
exit()
videos = videos
cfg = cluster_config(
num_workers=100, worker=worker_config('n1-standard-64'),
pipelines=[gender_detection.GenderDetectionPipeline])
with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
db = db_wrapper.db
# if True:
# db_wrapper = ScannerWrapper.create()
frames = pcache.get('gender_frames', lambda: par_for(frames_for_video, videos, workers=8))
videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0])
videos = list(videos)
frames = list(frames)
detect_genders(
db,
videos=[v.for_scannertools() for v in videos],
# [ScannerJobConfig(
# io_packet_size=10000,
# work_packet_size=400,
# batch=400)]),
# ]
# configs = [
# (cluster_config,
# [ScannerJobConfig(io_packet_size=30000, work_packet_size=400, batch=400),
# ScannerJobConfig(io_packet_size=20000, work_packet_size=400, batch=400),
# ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=400),
# ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=40),
# ScannerJobConfig(io_packet_size=10000, work_packet_size=1000, batch=1000)])
# ]
configs = [(cluster_config, [ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=400)])]
bench('hist', videos, run_pipeline, configs, sample_size=50, no_delete=True, force=True)
import sys
import os
@scannerpy.register_python_op()
class BoundariesToJson(scannerpy.Kernel):
def new_stream(self, args):
self._video_id = args['video_id']
print(os.getpid(), 'NEW STREAM {}'.format(self._video_id))
sys.stdout.flush()
def execute(self, boundaries: bytes) -> bytes:
if boundaries == b'\0':
return json.dumps([])
else:
class ClothingBboxesPipeline(clothing_detection.ClothingBboxesPipeline):
def build_pipeline(self):
bboxes = self._db.ops.BboxesFromJson(bboxes=self._sources['bboxes'].op)
return {
'bboxes':
self._db.ops.PrepareClothingBbox(
frame=self._sources['frame_sampled'].op, bboxes=bboxes)
}
detect_clothing_bboxes = ClothingBboxesPipeline.make_runner()
detect_clothing = ClothingDetectionPipeline.make_runner()
videos = list(Video.objects.all().order_by('id'))
cfg = cluster_config(
num_workers=100,
worker=worker_config('n1-standard-16', gpu=1),
pipelines=[clothing_detection.ClothingDetectionPipeline])
with make_cluster(cfg, sql_pool=2, no_delete=True) as db_wrapper:
# if True:
# db_wrapper = ScannerWrapper.create()
db = db_wrapper.db
print('Fetching frames')
frames = pcache.get('clothing_frames', lambda: par_for(frames_for_video, videos, workers=8))
videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames) if len(f) > 0])
videos = list(videos)
frames = list(frames)
cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
with Timer('run'):
print('Getting frames')
def load_frames():
return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
for v in tqdm(videos)]
frames = pcache.get('face_frames', load_frames)
cfg = cluster_config(
num_workers=100,
worker=worker_config('n1-standard-64'),
num_load_workers=2,
num_save_workers=2)
with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper:
# if True:
# db_wrapper = ScannerWrapper.create(enable_watchdog=False)
db = db_wrapper.db
print('Starting detection')
detect_faces(
db,
videos=[v.for_scannertools() for v in videos],
db_videos=videos,
compute_shot_boundaries = ShotBoundaryPipeline.make_runner()
# with Timer('Histogram'):
# cfg = cluster_config(
# num_workers=300,
# worker=worker_config('n1-standard-16'))
# with make_cluster(cfg, no_delete=True) as db_wrapper:
# videos = videos
#videos = list(Video.objects.filter(id__gte=91250, id__lte=91350))
# videos = [Video.objects.get(id=63970)]
videos = videos
with Timer('Shot boundaries'):
cfg = cluster_config(
num_workers=60,
worker=worker_config('n1-highmem-16'),
workers_per_node=2,
num_load_workers=1,
num_save_workers=2)
with make_cluster(cfg, no_delete=True) as db_wrapper:
# from esper.scannerutil import ScannerWrapper
# if True:
# db_wrapper = ScannerWrapper.create()
db = db_wrapper.db
job_config = ScannerJobConfig(io_packet_size=10000, work_packet_size=400, batch=400)
hists = run_pipeline(db, videos, batch=job_config.batch, run_opts={
'io_packet_size': job_config.io_packet_size,