Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mdata = [
self.mdata['covdist'],
self.mdata['covdist_q40'],
self.mdata['covdist_q40_botgc'],
self.mdata['covdist_q40_topgc'],
self.mdata['covdist_cpg'],
self.mdata['covdist_cpg_q40'],
self.mdata['covdist_cpg_q40_botgc'],
self.mdata['covdist_cpg_q40_topgc']]
self.add_section(
name = 'Cumulative Base Coverage',
anchor = 'biscuit-coverage-base',
description = "This plot shows the cummulative base coverage. High and low GC content region are the top and bottom 10% 100bp window in GC content.",
helptext = "Q40 means only reads mapped with mapping quality (Q) greater than or equal to 40 are considered.",
plot = linegraph.plot(mdata, {'id':'biscuit_coverage_base',
'title': 'BISCUIT: Cumulative Base Coverage',
'xLabelFormat':'{value}X',
'xlab': 'Sequencing Depth',
'data_labels': [
{'name': 'All', 'ylab':'Million Bases'},
{'name': 'Q40', 'ylab':'Million Bases (Q40)'},
{'name': 'Q40 low GC', 'ylab':'Million Low-GC Bases (Q40)'},
{'name': 'Q40 high GC', 'ylab':'Million High-GC Bases (Q40)'},
{'name': 'CpG (all)', 'ylab':'Million CpGs'},
{'name': 'CpG Q40', 'ylab':'Million CpGs (Q40)'},
{'name': 'CpG Q40 low GC', 'ylab':'Million Low-GC CpGs (Q40)'},
{'name': 'CpG Q40 high GC', 'ylab':'Million High-GC CpGs (Q40)'},
]})
)
for sid, dd in mdata[0].items():
log.info("Too many chromosomes found: %s, limiting to %s" % (len(chroms), max_chroms))
chroms = chroms[:max_chroms]
pconfig = {
'id': 'goleft_indexcov-roc-plot',
'title': 'goleft indexcov: ROC - genome coverage per scaled depth by chromosome',
'xlab': 'Scaled coverage',
'ylab': 'Proportion of regions covered',
'ymin': 0, 'ymax': 1.0,
'xmin': 0, 'xmax': 1.5,
'data_labels': [{"name": self._short_chrom(c)} for c in chroms]}
self.add_section (
name = 'Scaled coverage ROC plot',
anchor = 'goleft_indexcov-roc',
description = 'Coverage (ROC) plot that shows genome coverage at at given (scaled) depth.',
helptext = helptext,
plot = linegraph.plot([data[c] for c in chroms], pconfig)
)
return True
else:
return False
(see the <a href="http://qualimap.bioinfo.cipf.es/doc_html/index.html">Qualimap 2 documentation</a>), where the `TLEN` (or
'observed Template LENgth') field contains 'the number of bases from the
leftmost mapped base to the rightmost mapped base'
(<a href="https://samtools.github.io/hts-specs/">SAM
format specification</a>). Note that because it is defined in terms of
alignment to a reference sequence, the value of the `TLEN` field may
differ from the insert size due to factors such as alignment clipping,
alignment errors, or structural variation or splicing in a gap between
reads from the same fragment.'''
self.add_section (
name = 'Insert size histogram',
anchor = 'qualimap-insert-size-histogram',
description = 'Distribution of estimated insert sizes of mapped reads.',
helptext = insert_size_helptext,
plot = linegraph.plot(self.qualimap_bamqc_insert_size_hist, {
'id': 'qualimap_insert_size',
'title': 'Qualimap BamQC: Insert size histogram',
'ylab': 'Fraction of reads',
'xlab': 'Insert Size (bp)',
'ymin': 0,
'xmin': 0,
'tt_label': '<b>{point.x} bp</b>: {point.y}',
})
)
# Section 4 - GC-content distribution
if len(self.qualimap_bamqc_gc_content_dist) > 0:
gc_content_helptext = '''
GC bias is the difference between the guanine-cytosine content
(GC-content) of a set of sequencing reads and the GC-content of the DNA
or RNA in the original sample. It is a well-known issue with sequencing
# Calculate the coverage rates for this range of coverages
rates_within_threshs[s_name] = _calculate_bases_within_thresholds(hist, total, depth_range)
# Add requested coverage levels to the General Statistics table
for c in self.covs:
if int(c) in rates_within_threshs[s_name]:
self.general_stats_data[s_name]['{}_x_pc'.format(c)] = rates_within_threshs[s_name][int(c)]
else:
self.general_stats_data[s_name]['{}_x_pc'.format(c)] = 0
# Section 1 - BamQC Coverage Histogram
self.add_section (
name = 'Coverage histogram',
anchor = 'qualimap-coverage-histogram',
description = 'Distribution of the number of locations in the reference genome with a given depth of coverage.',
helptext = coverage_histogram_helptext,
plot = linegraph.plot(self.qualimap_bamqc_coverage_hist, {
'id': 'qualimap_coverage_histogram',
'title': 'Qualimap BamQC: Coverage histogram',
'ylab': 'Genome bin counts',
'xlab': 'Coverage (X)',
'ymin': 0,
'xmin': 0,
'xmax': max_x,
'xDecimals': False,
'tt_label': '<b>{point.x}X</b>: {point.y}',
})
)
# Section 2 - BamQC cumulative coverage genome fraction
self.add_section (
name = 'Cumulative genome coverage',
anchor = 'qualimap-cumulative-genome-fraction-coverage',
description = 'Percentage of the reference genome with at least the given depth of coverage.',
'xmax': 100,
'tt_label': '<b>{point.x}%</b>: {point.y:.3f}'
}
if len(extra_series) == 1:
desc += ' The dotted line represents a pre-calculated GC distribution for the reference genome.'
lg_config['extra_series'] = extra_series
elif len(extra_series) > 1:
desc += ' Each dotted line represents a pre-calculated GC distribution for a specific reference genome.'
lg_config['extra_series'] = extra_series
self.add_section (
name = 'GC content distribution',
anchor = 'qualimap-gc-distribution',
description = desc,
helptext = gc_content_helptext,
plot = linegraph.plot(self.qualimap_bamqc_gc_content_dist, lg_config)
)
pconfig = {
'id': 'cutadapt_plot',
'title': 'Cutadapt: Lengths of Trimmed Sequences',
'ylab': 'Counts',
'xlab': 'Length Trimmed (bp)',
'xDecimals': False,
'ymin': 0,
'tt_label': '<b>{point.x} bp trimmed</b>: {point.y:.0f}',
'data_labels': [{'name': 'Counts', 'ylab': 'Count'},
{'name': 'Obs/Exp', 'ylab': 'Observed / Expected'}]
}
self.add_section(
description = description,
plot = linegraph.plot([self.cutadapt_length_counts, self.cutadapt_length_obsexp], pconfig)
)
def chart_align_isize(self):
pd_isize = {}
for sid, dd in self.mdata['align_isize'].items():
pd_isize[sid] = dd['I']
self.add_section(
name = 'Insert Size Distribution',
anchor = 'biscuit-isize',
description = "This plot shows the distribution of insert size.",
plot = linegraph.plot(pd_isize,
{'id':'biscuit_isize', 'title': 'BISCUIT: Insert Size Distribution',
'ymin': 0, 'yLabelFormat': '{value}%',
'smooth_points': 500, # limit number of points / smooth data
'tt_label': '<strong>Q{point.x}:</strong> {point.y:.2f}% of reads',
'ylab': '% Mapped Reads', 'xlab': 'Insert Size'}))
}
)
plot_params = {
'id': 'bbmap-' + file_type + '_plot',
'title': 'BBTools: ' + plot_args['plot_title'],
'xlab': 'Percent identity',
'ylab': 'Read count',
'data_labels': [
{'name': 'Reads', 'ylab': 'Read count'},
{'name': 'Bases', 'ylab': 'Number of bases'},
]
}
plot_params.update(plot_args['plot_params'])
plot = linegraph.plot(
plot_data,
plot_params
)
return plot
## cytosine retention distribution
mdata_meth = self.mdata['retention_dist']
mdata = self.mdata['retention_dist_byread']
pd = [
mdata_meth,
dict([(sid, dd['CA']) for sid, dd in mdata.items()]),
dict([(sid, dd['CC']) for sid, dd in mdata.items()]),
dict([(sid, dd['CG']) for sid, dd in mdata.items()]),
dict([(sid, dd['CT']) for sid, dd in mdata.items()]),
]
self.add_section(
name = 'Number of Retention Distribution',
anchor = 'biscuit-retention-read',
description = "This plot shows the distribution of the number of retained cytosine in each read, up to 10.",
plot = linegraph.plot(pd, {
'id': 'biscuit_retention_read_cpa',
'xlab': 'Number of Retention within Read',
'title': 'BISCUIT: Retention Distribution',
'data_labels': [
{'name': 'CpG retention', 'ylab': 'Fraction of cytosine in CpG context', 'xlab': 'Retention Level (%)'},
{'name': 'Within-read CpA', 'ylab': 'Number of Reads'},
{'name': 'Within-read CpC', 'ylab': 'Number of Reads'},
{'name': 'Within-read CpG', 'ylab': 'Number of Reads'},
{'name': 'Within-read CpT', 'ylab': 'Number of Reads'},
]})