Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_tas(exp_id, default_project, ta_folders):
possible_files = []
for base_folder in ta_folders:
if ':' in base_folder:
project_name, path = base_folder.split(':')
project = resolve_project(project_name)
project = project.get_id()
project_name += ":"
else:
project = default_project
project_name = ""
path = base_folder
if not path.startswith('/'):
path = '/' + path
print project, project_name, path
for dxfile in dxpy.find_data_objects(classname='file', state='closed', folder=path, describe=True, recurse=True, project=project):
desc = dxfile.get('describe')
if exp_id in desc.get('folder') and '/bams' in desc.get('folder') and desc.get('name').endswith(('tagAlign', 'tagAlign.gz')):
possible_files.append(desc)
print "%s %i possible files" %(exp_id, len(possible_files))
rep1_files = [f for f in possible_files if 'rep1' in f.get('folder')]
rep2_files = [f for f in possible_files if 'rep2' in f.get('folder')]
if len(rep1_files) != 1:
print "Tried to find one rep1 ta, found %d" %(len(rep1_files))
if len(rep1_files) > 0:
if len(rep1_files) > 1:
print "Using first one found"
rep1 = rep1_files[0].get('project') + ':' + rep1_files[0].get('folder') + '/' + rep1_files[0].get('name')
else:
rep1 = None
if ':' in base_folder:
project_name, path = base_folder.split(':')
project = resolve_project(project_name)
project_id = project.get_id()
project_name += ":"
else:
project_id = default_project
project_name = ""
path = base_folder
if not path.startswith('/'):
path = '/' + path
if not path.endswith('/'):
path += '/'
logging.debug(
"Looking for TA's in %s %s %s" % (project_id, project_name, path))
for dxfile in dxpy.find_data_objects(
classname='file',
state='closed',
folder=path + 'bams/',
project=project_id,
describe=True,
recurse=True,
name='*tagAlign.gz',
name_mode='glob'
):
possible_files.append(dxfile.get('describe'))
matched_files = \
[f for f in possible_files if all([acc in f['name'] for acc in accessions])]
if not matched_files:
logging.error(
'Could not find tagAlign with accessions %s' % (accessions))
return None
if ':' in base_folder:
project_name, path = base_folder.split(':')
project = resolve_project(project_name)
project_id = project.get_id()
project_name += ":"
else:
project_id = default_project
project_name = ""
path = base_folder
if not path.startswith('/'):
path = '/' + path
if not path.endswith('/'):
path += '/'
logging.debug(
"Looking for TA's in %s %s %s" % (project_id, project_name, path))
for dxfile in dxpy.find_data_objects(
classname='file',
state='closed',
folder=path + 'bams/',
project=project_id,
describe=True,
recurse=True,
name='*tagAlign.gz',
name_mode='glob'
):
possible_files.append(dxfile.get('describe'))
matched_files = \
[f for f in possible_files if all([acc in f['name'] for acc in accessions])]
if not matched_files:
logging.error(
'Could not find tagAlign with accessions %s' % (accessions))
return None
dest_proj.new_folder(folder, parents=True)
region2projid[region] = dest_proj.get_id()
print(region2projid)
# Fire off a clone process for each region
# Wait for the cloning to complete
for i in [1, 2, 3]:
jobs = _clone_to_all_regions(region2projid, regions, asset_file_name, folder, url)
retval = _wait_for_completion(jobs)
if retval:
break
# make records for each file
for region in regions:
dest_proj_id = region2projid[region]
results = list(dxpy.find_data_objects(classname = "file",
visibility = "hidden",
name = asset_file_name,
project = dest_proj_id,
folder = folder))
file_ids = [p["id"] for p in results]
if len(file_ids) == 0:
raise RuntimeError("Found no files {}:{}/{}".format(dest_proj_id, folder, asset_file_name))
if len(file_ids) > 1:
raise RuntimeError("Found {} files {}:{}/{}, instead of just one"
.format(len(dxfiles), dest_proj_id, folder, asset_file_name))
dest_asset = dxpy.new_dxrecord(name=record.name,
types=['AssetBundle'],
details={'archiveFileId': dxpy.dxlink(file_ids[0])},
properties=record.get_properties(),
project=dest_proj_id,
folder=folder,
applet_spec['folder'] = override_folder
if 'folder' not in applet_spec:
applet_spec['folder'] = '/'
if override_name:
applet_spec['name'] = override_name
if 'dxapi' not in applet_spec:
applet_spec['dxapi'] = dxpy.API_VERSION
applets_to_overwrite = []
archived_applet = None
if check_name_collisions and not dry_run:
destination_path = applet_spec['folder'] + ('/' if not applet_spec['folder'].endswith('/') else '') + applet_spec['name']
logger.debug("Checking for existing applet at " + destination_path)
for result in dxpy.find_data_objects(classname="applet", name=applet_spec["name"], folder=applet_spec['folder'], project=dest_project, recurse=False):
if overwrite:
# Don't remove the old applet until after the new one
# has been created. This avoids a race condition where
# we remove the old applet, but that causes garbage
# collection of the bundled resources that will be
# shared with the new applet
applets_to_overwrite.append(result['id'])
elif archive:
logger.debug("Archiving applet %s" % (result['id']))
proj = dxpy.DXProject(dest_project)
archive_folder = '/.Applet_archive'
try:
proj.list_folder(archive_folder)
except dxpy.DXAPIError:
proj.new_folder(archive_folder)
project = resolve_project(project_name)
project = project.get_id()
project_name += ":"
else:
project = default_project
project_name = ""
base_path = base_folder
if not base_path.startswith('/'):
base_path = '/' + base_path
if not base_path.endswith('/'):
base_path = base_path + '/'
path = base_path + 'bams/' + exp_id + '/'
logging.debug(
"get_all_tas: find_data objects in project %s project_name %s path %s"
% (project, project_name, path))
for dxfile in dxpy.find_data_objects(classname='file', state='closed', folder=path, describe=True, recurse=True, project=project):
desc = dxfile.get('describe')
logging.debug(
"get_all_tas: checking object for match: folder %s name %s"
% (desc.get('folder'), desc.get('name')))
if exp_id in desc.get('folder') and '/bams' in desc.get('folder') and desc.get('name').endswith(('tagAlign', 'tagAlign.gz')):
possible_files.append(desc)
logging.debug(
"get_all_tas: exit with possible_files %s" % (possible_files))
return possible_files
'List and choose from available data in the DNAnexus Reference Genomes project',
'Select another project to list and choose available data',
'Select an output from a previously-run job (current project only)',
'Return to original prompt (specify an ID or path directly)'])
except KeyboardInterrupt:
opt_num = 4
if opt_num == 0:
query_project = dxpy.WORKSPACE_ID
elif opt_num == 1:
query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id']
elif opt_num == 2:
project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
print('\nProjects to choose from:')
query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
if opt_num in range(3):
result_generator = dxpy.find_data_objects(classname=in_class,
typename=param_desc.get('type'),
describe=dict(fields=get_ls_l_desc_fields()),
project=query_project)
print('\nAvailable data:')
result_choice = paginate_and_pick(result_generator,
(lambda result: get_ls_l_desc(result['describe'])))
if result_choice == 'none found':
print('No compatible data found')
continue
elif result_choice == 'none picked':
continue
else:
return [result_choice['project'] + ':' + result_choice['id']]
elif opt_num == 3:
# Select from previous jobs in current project
result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
proj_id = self.canonical_project
proj_name = self.virtual_project
kwargs = {
'project': proj_id,
'name': pattern,
'name_mode': 'glob',
# the query performance is similar w/wo describe field,
# hence no need to customize query based on canonicalize flag
'describe': {'fields': {'name': True, 'folder': True}},
'recurse': recurse,
'classname': classname,
'limit': limit,
'folder': ('/' + (self.resource or '')) + (starts_with or '')
}
with _wrap_dx_calls():
list_gen = dxpy.find_data_objects(**kwargs)
for obj in list_gen:
if canonicalize:
yield DXCanonicalPath('dx://{}:/{}'.format(obj['project'], obj['id']))
else:
yield DXVirtualPath('{drive}{proj_name}:{folder}/{name}'.format(
drive=self.drive,
proj_name=proj_name,
folder=obj['describe']['folder'].rstrip('/'),
name=obj['describe']['name'])
)
if override_folder:
applet_spec['folder'] = override_folder
if 'folder' not in applet_spec:
applet_spec['folder'] = '/'
if override_name:
applet_spec['name'] = override_name
if 'dxapi' not in applet_spec:
applet_spec['dxapi'] = dxpy.API_VERSION
archived_applet = None
if check_name_collisions and not dry_run:
destination_path = applet_spec['folder'] + ('/' if not applet_spec['folder'].endswith('/') else '') + applet_spec['name']
logger.debug("Checking for existing applet at " + destination_path)
for result in dxpy.find_data_objects(classname="applet", name=applet_spec["name"], folder=applet_spec['folder'], project=dest_project, recurse=False):
if overwrite:
logger.info("Deleting applet %s" % (result['id']))
# TODO: test me
dxpy.DXProject(dest_project).remove_objects([result['id']])
elif archive:
logger.debug("Archiving applet %s" % (result['id']))
proj = dxpy.DXProject(dest_project)
archive_folder = '/.Applet_archive'
try:
proj.list_folder(archive_folder)
except dxpy.DXAPIError:
proj.new_folder(archive_folder)
proj.move(objects=[result['id']], destination=archive_folder)
archived_applet = dxpy.DXApplet(result['id'], project=dest_project)
now = datetime.datetime.fromtimestamp(archived_applet.created/1000).ctime()