Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
regions = set(regions) - set([curr_region])
if len(regions) == 0:
# there is nothing to do
return
app_supported_regions = set(COPY_FILE_APP.describe()['regionalOptions'].keys())
if len(regions - app_supported_regions) > 0:
print('Currently no support for the following region(s): [{regions}]'
.format(regions=', '.join(regions - app_supported_regions)),
file=sys.stderr)
sys.exit(1)
# Get information about the asset
asset_properties = record.get_properties()
asset_properties['cloned_from'] = record.get_id()
asset_file_name = dxpy.describe(fid)['name']
url = dxpy.DXFile(fid).get_download_url(preauthenticated=True,
project=dxpy.DXFile.NO_PROJECT_HINT,
duration=URL_DURATION)[0]
# setup target folders
region2projid = {}
for region in regions:
dest_proj = util.get_project(project_dict[region])
dest_proj.new_folder(folder, parents=True)
region2projid[region] = dest_proj.get_id()
print(region2projid)
# Fire off a clone process for each region
# Wait for the cloning to complete
for i in [1, 2, 3]:
jobs = _clone_to_all_regions(region2projid, regions, asset_file_name, folder, url)
def analysis_files(analysis_id, keypair, server, assembly):
analysis_id = analysis_id.strip()
analysis = dxpy.describe(analysis_id)
project = analysis.get('project')
m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['executableName'])
if m:
experiment_accession = m.group(1)
else:
logger.info("No accession in %s, skipping." %(analysis['executableName']))
return
experiment = common.encoded_get(urlparse.urljoin(server,'/experiments/%s' %(experiment_accession)), keypair)
bams = get_rep_bams(experiment, keypair, server)
rep1_bam = bams[0]['accession']
rep2_bam = bams[1]['accession']
common_metadata = {
'assembly': assembly,
def flagstat_parse(dxlink):
desc = dxpy.describe(dxlink)
with dxpy.DXFile(desc['id'], mode='r') as flagstat_file:
if not flagstat_file:
return None
flagstat_lines = flagstat_file.read().splitlines()
qc_dict = {
# values are regular expressions,
# will be replaced with scores [hiq, lowq]
'in_total': 'in total',
'duplicates': 'duplicates',
'mapped': 'mapped',
'paired_in_sequencing': 'paired in sequencing',
'read1': 'read1',
'read2': 'read2',
'properly_paired': 'properly paired',
'with_self_mate_mapped': 'with itself and mate mapped',
def get_attachment(dxlink):
desc = dxpy.describe(dxlink)
filename = desc['name']
mime_type = desc['media']
if mime_type == 'text/plain' and not filename.endswith(".txt"):
filename += ".txt"
with dxpy.DXFile(desc['id'], mode='r') as stream:
obj = {
'download': filename,
'type': mime_type,
'href': 'data:%s;base64,%s' % (mime_type, b64encode(stream.read()))
}
return obj
def infer_pipeline_version(analysis):
try:
workflow = dxpy.describe(
analysis['workflow']['id'], fields={'properties': True})
except dxpy.exceptions.ResourceNotFound:
analysis_date = analysis.get('created')
# get the largest version number that was activated on a date before
# this analysis was created
pipeline_version = str(max([
float(version) for version in VERSION_TIMES
if VERSION_TIMES[version] < analysis_date])) or None
logger.warning(
"Workflow for %s is missing. Inferred version %s"
% (analysis.get('id'), pipeline_version))
else:
pipeline_version = workflow['properties'].get('pipeline_version')
return pipeline_version or 'default'
def accession_analysis(analysis_id, keypair, server, assembly, dryrun, force):
analysis_id = analysis_id.strip()
analysis = dxpy.describe(analysis_id)
project = analysis.get('project')
m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['executableName'])
if m:
experiment_accession = m.group(1)
logger.info(experiment_accession)
else:
logger.info("No accession in %s, skipping." %(analysis['executableName']))
return
experiment = common.encoded_get(urlparse.urljoin(server,'/experiments/%s' %(experiment_accession)), keypair)
bams = get_rep_bams(experiment, keypair, server)
rep1_bam = bams[0]['accession']
rep2_bam = bams[1]['accession']
common_metadata = {
def describe(dxid_or_link):
"""Return json description for the given dxid"""
key = json.dumps(dxid_or_link, sort_keys=True)
if key not in DxTool._describe_result:
DxTool._describe_result[key] = dxpy.describe(dxid_or_link)
return DxTool._describe_result[key]
#return _run_get_json('dx', 'describe', '--verbose', '--details', '--json', dxid)
def main():
args = get_args()
first_analysis = True
for (i, analysis_id) in enumerate(args.infile):
analysis_id = analysis_id.strip()
try:
analysis = dxpy.describe(analysis_id)
except:
print "Invalid analysis ID %s. Skipping." % (analysis_id)
continue
experiment_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3})', analysis['name'])
if not experiment_m:
print "No accession in %s, skipping." % (analysis['name'])
continue
# print "Temporary hack"
# experiment_accession = "ENCSR048KZD"
else:
experiment_accession = experiment_m.group(1)
if args.pipeline:
pipeline = args.pipeline
elif analysis['executableName'] == 'histone_chip_seq':
else:
logger.warning(
'fastqs with different or non-integer read_lengths: %s But fqcheck is False so ignoring'
% ([(fq.get('accession'), fq.get('read_length'))
for fq in fastqs]))
except:
raise
mapped_read_length = int(next(l for l in native_lengths))
else:
mapped_read_length = int(crop_length)
# here we get the actual DNAnexus file that was used as the reference
# need to remain backwards-compatible with analyses that used output_JSON
input_stage_output = \
input_stage['output'].get('output_JSON') or input_stage['output']
reference_file = dxpy.describe(input_stage_output['reference_tar'])
# and construct the alias to find the corresponding file at ENCODEd
reference_alias = "dnanexus:" + reference_file.get('id')
logger.debug('looking for reference file with alias %s'
% (reference_alias))
reference = common.encoded_get(
urlparse.urljoin(server, 'files/%s' % (reference_alias)), keypair)
assert reference, "Reference file %s not found on Portal" % (reference_alias)
logger.debug('found reference file %s' % (reference.get('accession')))
bam_metadata = common.merge_dicts({
'file_format': 'bam',
'output_type': 'unfiltered alignments',
'assembly': reference.get('assembly'),