Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
num_workers=5, worker=worker_config('n1-standard-32'), pipelines=[face_embedding.FaceEmbeddingPipeline])
configs = [(cfg, [
ScannerJobConfig(io_packet_size=500, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=80, pipelines_per_worker=4),
ScannerJobConfig(io_packet_size=1000, work_packet_size=20, pipelines_per_worker=8),
])]
bench('embedding', {'videos': videos, 'frames': [frames_for_video(v) for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
exit()
videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id'))
def load_frames():
return par_for(frames_for_video, videos, workers=8)
frames = pcache.get('emb_frames', load_frames, force=True)
videos, frames = unzip([(v, f) for (v, f) in zip(videos, frames)
if len(f) > 0])
videos = list(videos)
frames = list(frames)
# Export packed embeddings and IDs into single files
if False:
def get_ids(video):
return [f['id'] for f in Face.objects.filter(frame__video=video).order_by('frame__number', 'id').values('id')]
all_ids = pcache.get('emb_ids', (lambda: par_for(get_ids, videos, workers=4)))
import struct
with open('/app/data/embs/sevenyears_ids.bin', 'wb') as f:
for i, ids in tqdm(enumerate(all_ids)):
path = '/app/data/embs/{:07d}.bin'.format(i)
def word_counts():
r = requests.get('http://localhost:8111/wordcounts')
return r.json()
VOCAB_THRESHOLD = 100
def load_vocab():
counts = word_counts()
print('Full vocabulary size: {}'.format(len(counts)))
vocabulary = sorted([word for (word, count) in counts.items() if count > VOCAB_THRESHOLD])
print('Filtered vocabulary size: {}'.format(len(vocabulary)))
return vocabulary
vocabulary = pcache.get('vocabulary', load_vocab)
vocab_size = len(vocabulary)
class SegmentTextDataset(Dataset):
def __init__(self, docs, vocabulary=None, segment_size=SEGMENT_SIZE, segment_stride=SEGMENT_STRIDE, use_cuda=False):
self._segment_size = segment_size
self._use_cuda = use_cuda
self._vocabulary = vocabulary
self._doc_names = docs
self._doc_lens = doc_len()
self._num_segs = np.array([
len(range(0, self._doc_lens[doc]-segment_size+1, segment_stride))
for doc in self._doc_names
])
self._back_index = [
(i, j, k)
def run_pipeline(db, videos, frames, **kwargs):
return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs)
cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32'))
configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])]
bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]},
run_pipeline, configs, no_delete=True, force=True)
with Timer('run'):
print('Getting frames')
def load_frames():
return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')]
for v in tqdm(videos)]
frames = pcache.get('face_frames', load_frames)
cfg = cluster_config(
num_workers=100,
worker=worker_config('n1-standard-64'),
num_load_workers=2,
num_save_workers=2)
with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper:
# if True:
# db_wrapper = ScannerWrapper.create(enable_watchdog=False)
db = db_wrapper.db
print('Starting detection')
detect_faces(
db,
def try_config_cached(job_config):
return pcache.get(run_name(cluster_config, job_config), force=force, fn=lambda: try_config(job_config))
def test_config(args, db_wrapper, cluster_config, job_config):
time, metrics = run_config(args, db_wrapper, job_config)
if time is not None:
price_per_hour = cluster_config.price(no_master=True)
price_per_video = (time / 3600.0) * price_per_hour / float(sample_size)
return price_per_video, metrics
else:
return None
results = []
for (cluster_config, job_configs) in configs:
# Only bring up the cluster if there exists a job config that hasn't been computed
if not force and all([pcache.has(run_name(cluster_config, job_config)) for job_config in job_configs]):
results.append([pcache.get(run_name(cluster_config, job_config)) for job_config in job_configs])
else:
with make_cluster(cluster_config, no_delete=no_delete) as db_wrapper:
log.info('Cluster config: {}'.format(cluster_config))
def try_config(job_config):
log.info('Job config: {}'.format(job_config))
try:
return test_config(
args, db_wrapper, cluster_config, job_config)
except TestFailure as e:
print(e)
return (str(e), None)
except Exception as e:
traceback.print_exc()
list(
Face.objects.filter(frame__video=video).order_by('frame__number', 'id').values(
'id', 'frame__number')), lambda f: f['frame__number'])
for (frame_num, frame_cloth) in zip(vid_frames, list(outp.load())):
faces = list(
Face.objects.filter(frame__video=video,
frame__number=frame_num).order_by('id').values('id'))
for (cloth, face) in zip(frame_cloth, face_ids[frame_num]):
hcs.append(
HairColor(
labeler=labeler,
face_id=face['id'],
color_id=hc_names[cloth.to_dict()['Hair color 3']]))
pcache.set(video.item_name() + '-haircolor', hcs)