Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def arrayitem(context, builder, idx, ptrs, lens, at, dtype):
offset = builder.mul(idx, literal_int64(numba.types.intp.bitwidth // 8))
ptrposition = builder.inttoptr(
builder.add(builder.ptrtoint(ptrs, llvmlite.llvmpy.core.Type.int(numba.types.intp.bitwidth)), offset),
llvmlite.llvmpy.core.Type.pointer(context.get_value_type(numba.types.intp)))
lenposition = builder.inttoptr(
builder.add(builder.ptrtoint(lens, llvmlite.llvmpy.core.Type.int(numba.types.intp.bitwidth)), offset),
llvmlite.llvmpy.core.Type.pointer(context.get_value_type(numba.types.intp)))
ptr = numba.targets.arrayobj.load_item(context, builder, numba.types.intp[:], ptrposition)
len = numba.targets.arrayobj.load_item(context, builder, numba.types.intp[:], lenposition)
raise_exception(context, builder, builder.icmp_unsigned(">=", at, len), RuntimeError("array index out of range"))
finalptr = builder.inttoptr(
builder.add(ptr, builder.mul(at, literal_int64(dtype.itemsize))),
llvmlite.llvmpy.core.Type.pointer(context.get_value_type(numba.from_dtype(dtype))))
return numba.targets.arrayobj.load_item(context, builder, numba.from_dtype(dtype)[:], finalptr)
def gen_arg_addrspace_md(fn):
"""
Generate kernel_arg_addr_space metadata
"""
mod = fn.module
fnty = fn.type.pointee
codes = []
for a in fnty.args:
if cgutils.is_pointer(a):
codes.append(SPIR_GLOBAL_ADDRSPACE)
else:
codes.append(SPIR_PRIVATE_ADDRSPACE)
consts = [lc.Constant.int(lc.Type.int(), x) for x in codes]
name = lc.MetaDataString.get(mod, "kernel_arg_addr_space")
return lc.MetaData.get(mod, [name] + consts)
var = expr_args[i]
aty = expr_arg_types[i]
dst = builder.gep(args, [context.get_constant(types.intp, i + 1)])
if i >= ninouts: # reduction variables
ary = context.make_array(aty)(context, builder, arg)
strides = cgutils.unpack_tuple(builder, ary.strides, aty.ndim)
ary_shapes = cgutils.unpack_tuple(builder, ary.shape, aty.ndim)
# Start from 1 because we skip the first dimension of length num_threads just like sched.
for j in range(1, len(strides)):
array_strides.append(strides[j])
red_shapes[i] = ary_shapes[1:]
builder.store(builder.bitcast(ary.data, byte_ptr_t), dst)
elif isinstance(aty, types.ArrayCompatible):
if var in races:
typ = context.get_data_type(
aty.dtype) if aty.dtype != types.boolean else lc.Type.int(1)
rv_arg = cgutils.alloca_once(builder, typ)
builder.store(arg, rv_arg)
builder.store(builder.bitcast(rv_arg, byte_ptr_t), dst)
rv_to_arg_dict[var] = (arg, rv_arg)
array_strides.append(context.get_constant(types.intp, context.get_abi_sizeof(typ)))
else:
ary = context.make_array(aty)(context, builder, arg)
strides = cgutils.unpack_tuple(builder, ary.strides, aty.ndim)
for j in range(len(strides)):
array_strides.append(strides[j])
builder.store(builder.bitcast(ary.data, byte_ptr_t), dst)
else:
if i < num_inps:
# Scalar input, need to store the value in an array of size 1
func = lc.Function.new(module, fnty, name="sum")
bb_entry = func.append_basic_block('entry')
bb_loop = func.append_basic_block('loop')
bb_exit = func.append_basic_block('exit')
builder = lc.Builder.new(bb_entry)
builder.position_at_end(bb_entry)
builder.branch(bb_loop)
builder.position_at_end(bb_loop)
index = builder.phi(int32)
index.add_incoming(lc.Constant.int(index.type, 0), bb_entry)
accum = builder.phi(int32)
accum.add_incoming(lc.Constant.int(accum.type, 0), bb_entry)
ptr = builder.gep(func.args[0], [index])
value = builder.load(ptr)
added = builder.add(accum, value)
accum.add_incoming(added, bb_loop)
indexp1 = builder.add(index, lc.Constant.int(index.type, 1))
index.add_incoming(indexp1, bb_loop)
cond = builder.icmp(lc.ICMP_ULT, indexp1, func.args[1])
builder.cbranch(cond, bb_loop, bb_exit)
builder.position_at_end(bb_exit)
builder.ret(added)
pyapi = c.pyapi
# gil_state = pyapi.gil_ensure() # acquire GIL
mod_name = context.insert_const_string(c.builder.module, "pandas")
class_obj = pyapi.import_module_noblock(mod_name)
df_obj = pyapi.call_method(class_obj, "DataFrame", ())
for i, cname, arr, arr_typ, dtype in zip(range(n_cols), col_names, col_arrs, arr_typs, dtypes):
# df['cname'] = boxed_arr
# TODO: datetime.date, DatetimeIndex?
name_str = context.insert_const_string(c.builder.module, cname)
cname_obj = pyapi.string_from_string(name_str)
# if column not unboxed, just used the boxed version from parent
unboxed_val = builder.extract_value(dataframe.unboxed, i)
not_unboxed = builder.icmp(lc.ICMP_EQ, unboxed_val, context.get_constant(types.int8, 0))
use_parent = builder.and_(has_parent, not_unboxed)
with builder.if_else(use_parent) as (then, orelse):
with then:
arr_obj = pyapi.object_getattr_string(dataframe.parent, cname)
pyapi.object_setitem(df_obj, cname_obj, arr_obj)
with orelse:
if dtype == string_type:
arr_obj = box_str_arr(arr_typ, arr, c)
elif isinstance(dtype, PDCategoricalDtype):
arr_obj = box_categorical_array(arr_typ, arr, c)
# context.nrt.incref(builder, arr_typ, arr)
elif arr_typ == string_array_split_view_type:
arr_obj = box_str_arr_split_view(arr_typ, arr, c)
elif dtype == types.List(string_type):
def check_int_status(self, num, ok_value=0):
"""
Raise an exception if *num* is smaller than *ok_value*.
"""
ok = lc.Constant.int(num.type, ok_value)
pred = self.builder.icmp(lc.ICMP_SLT, num, ok)
with cgutils.if_unlikely(self.builder, pred):
self.return_exception_raised()
for i in range(num_dim):
start, stop, step = loop_ranges[i]
start = load_range(start)
stop = load_range(stop)
assert(step == 1) # We do not support loop steps other than 1
step = load_range(step)
loop_ranges[i] = (start, stop, step)
if config.DEBUG_ARRAY_OPT:
print("call_parallel_gufunc loop_ranges[{}] = ".format(i), start,
stop, step)
cgutils.printf(builder, "loop range[{}]: %d %d (%d)\n".format(i),
start, stop, step)
# Commonly used LLVM types and constants
byte_t = lc.Type.int(8)
byte_ptr_t = lc.Type.pointer(byte_t)
byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t)
intp_t = context.get_value_type(types.intp)
uintp_t = context.get_value_type(types.uintp)
intp_ptr_t = lc.Type.pointer(intp_t)
uintp_ptr_t = lc.Type.pointer(uintp_t)
zero = context.get_constant(types.uintp, 0)
one = context.get_constant(types.uintp, 1)
one_type = one.type
sizeof_intp = context.get_abi_sizeof(intp_t)
# Prepare sched, first pop it out of expr_args, outer_sig, and gu_signature
expr_args.pop(0)
sched_sig = sin.pop(0)
if config.DEBUG_ARRAY_OPT:
"""
[iterty] = sig.args
[iter] = args
arrayty = iterty.array_type
if arrayty.ndim != 1:
raise NotImplementedError("iterating over %dD array" % arrayty.ndim)
iterobj = context.make_helper(builder, iterty, value=iter)
ary = make_array(arrayty)(context, builder, value=iterobj.array)
nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1)
index = builder.load(iterobj.index)
is_valid = builder.icmp(lc.ICMP_SLT, index, nitems)
result.set_valid(is_valid)
with builder.if_then(is_valid):
value = _getitem_array_single_int(
context, builder, iterty.yield_type, arrayty, ary, index
)
result.yield_(value)
nindex = cgutils.increment_index(builder, index)
builder.store(nindex, iterobj.index)
from numba import cgutils
from numba.utils import IS_PY3
from . import llvm_types as lt
from numba.compiler import compile_extra, Flags
from numba.compiler_lock import global_compiler_lock
from numba.targets.registry import cpu_target
from numba.runtime import nrtdynmod
logger = logging.getLogger(__name__)
__all__ = ['Compiler']
NULL = lc.Constant.null(lt._void_star)
ZERO = lc.Constant.int(lt._int32, 0)
ONE = lc.Constant.int(lt._int32, 1)
METH_VARARGS_AND_KEYWORDS = lc.Constant.int(lt._int32, 1|2)
def get_header():
import numpy
import textwrap
return textwrap.dedent("""\
#include
#ifndef HAVE_LONGDOUBLE
#define HAVE_LONGDOUBLE %d
#endif
The parallel backend then stages multiple calls to this kernel concurrently
across a number of threads. Practically, for each item of work, the backend
duplicates `dimensions` and adjusts the first entry to reflect the size of
the item of work, it also forms up an array of pointers into the args for
offsets to read/write from/to with respect to its position in the items of
work. This allows the same kernel to be used for each item of work, with
simply adjusted reads/writes/domain sizes and is safe by virtue of the
domain partitioning.
NOTE: The execution backend is passed the requested thread count, but it can
choose to ignore it (TBB)!
"""
assert isinstance(info, tuple) # guard against old usage
# Declare types and function
byte_t = lc.Type.int(8)
byte_ptr_t = lc.Type.pointer(byte_t)
byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t)
intp_t = ctx.get_value_type(types.intp)
intp_ptr_t = lc.Type.pointer(intp_t)
fnty = lc.Type.function(lc.Type.void(), [lc.Type.pointer(byte_ptr_t),
lc.Type.pointer(intp_t),
lc.Type.pointer(intp_t),
byte_ptr_t])
wrapperlib = ctx.codegen().create_library('parallelgufuncwrapper')
mod = wrapperlib.create_ir_module('parallel.gufunc.wrapper')
kernel_name = ".kernel.{}_{}".format(id(info.env), info.name)
lfunc = mod.add_function(fnty, name=kernel_name)
bb_entry = lfunc.append_basic_block('')