Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
f = pypairix.open(fp, 'r')
header = f.get_header()
if len(header):
header_groups = toolz.groupby(lambda x: x.split(':')[0], header)
if '#chromsize' in header_groups and chromsizes is None:
items = [line.split()[1:] for line in header_groups['#chromsize']]
if len(items) and chromsizes is None:
names, lengths = zip(*((item[0], int(item[1])) for item in items))
chromsizes = pd.Series(index=names, data=lengths)
if '#columns' in header_groups and columns is None:
columns = header_groups['#columns'][0].split()[1:]
chrom1, start1, end1 = parse_region(region1, chromsizes)
if region2 is not None:
chrom2, start2, end2 = parse_region(region2, chromsizes)
else:
chrom2, start2, end2 = chrom1, start1, end1
it = f.query2D(chrom1, start1, end1, chrom2, start2, end2)
if usecols is not None:
argusecols = [columns.index(col) for col in usecols]
records = [
(record[i] for i in argusecols) for record in it
]
columns = usecols
else:
records = it
df = pd.DataFrame.from_records(records, columns=columns)
if columns is not None:
for col in columns:
if dtypes is None:
dtypes = {}
f = pypairix.open(fp, 'r')
header = f.get_header()
if len(header):
header_groups = toolz.groupby(lambda x: x.split(':')[0], header)
if '#chromsize' in header_groups and chromsizes is None:
items = [line.split()[1:] for line in header_groups['#chromsize']]
if len(items) and chromsizes is None:
names, lengths = zip(*((item[0], int(item[1])) for item in items))
chromsizes = pd.Series(index=names, data=lengths)
if '#columns' in header_groups and columns is None:
columns = header_groups['#columns'][0].split()[1:]
chrom1, start1, end1 = parse_region(region1, chromsizes)
if region2 is not None:
chrom2, start2, end2 = parse_region(region2, chromsizes)
else:
chrom2, start2, end2 = chrom1, start1, end1
it = f.query2D(chrom1, start1, end1, chrom2, start2, end2)
if usecols is not None:
argusecols = [columns.index(col) for col in usecols]
records = [
(record[i] for i in argusecols) for record in it
]
columns = usecols
else:
records = it
df = pd.DataFrame.from_records(records, columns=columns)
def bedbisect(bedf, region):
"""Returns the span of a block of rows corresponding to
the genomic region.
Rows must be sorted by `start` and `end`;
`chrom` must be grouped, but does not have to be sorted.
"""
chrom, start, end = parse_region(region)
lo, hi = _find_block_span(bedf.chrom.values, chrom)
lo += bedf['end'].values[lo:hi].searchsorted(start, side='right')
if end is not None:
hi = lo + bedf['start'].values[lo:hi].searchsorted(end, side='left')
# else:
# hi = None This only works when bedf is a groupby object.
return lo, hi