diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index f435820b23e8da909f0cff14ff5a1272874e865f..d24b61c12e43cd16431c9727d8fb057319475633 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -135,13 +135,39 @@ def get_le_neutral(dtype): if dtype.numpy_dtype.kind == "f": # OpenCL 1.1, section 6.11.2 return var("INFINITY") + elif dtype.numpy_dtype.kind == "i": + # OpenCL 1.1, section 6.11.3 + if dtype.numpy_dtype.itemsize == 4: + #32 bit integer + return var("INT_MAX") + elif dtype.numpy_dtype.itemsize == 8: + #64 bit integer + return var('LONG_MAX') + else: + raise NotImplementedError("less") + + +def get_ge_neutral(dtype): + """Return a number y that satisfies (x >= y) for all y.""" + + if dtype.numpy_dtype.kind == "f": + # OpenCL 1.1, section 6.11.2 + return -var("INFINITY") + elif dtype.numpy_dtype.kind == "i": + # OpenCL 1.1, section 6.11.3 + if dtype.numpy_dtype.itemsize == 4: + #32 bit integer + return var("INT_MIN") + elif dtype.numpy_dtype.itemsize == 8: + #64 bit integer + return var('LONG_MIN') else: raise NotImplementedError("less") class MaxReductionOperation(ScalarReductionOperation): def neutral_element(self, dtype, inames): - return -get_le_neutral(dtype) + return get_ge_neutral(dtype) def __call__(self, dtype, operand1, operand2, inames): return var("max")(operand1, operand2) @@ -213,6 +239,8 @@ def get_argext_preamble(kernel, func_id): c_code_mapper = CCodeMapper() + neutral = get_ge_neutral if op.neutral_sign < 0 else get_le_neutral + return (prefix, """ inline %(scalar_t)s %(prefix)s_init(%(index_t)s *index_out) { @@ -240,8 +268,7 @@ def get_argext_preamble(kernel, func_id): scalar_t=kernel.target.dtype_to_typename(func_id.scalar_dtype), prefix=prefix, index_t=kernel.target.dtype_to_typename(kernel.index_dtype), - neutral=c_code_mapper( - op.neutral_sign*get_le_neutral(func_id.scalar_dtype)), + neutral=c_code_mapper(neutral(func_id.scalar_dtype)), comp=op.update_comparison, )) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 31cf7c6b648ebf370a17d8beb2538b9748ddb30a..01e56405e30285705be7cb8eb6d75479c8658ef5 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -249,6 +249,10 @@ def opencl_symbol_mangler(kernel, name): return NumpyType(np.dtype(np.float64)), name elif name == "INFINITY": return NumpyType(np.dtype(np.float32)), name + elif name.startswith("INT_"): + return NumpyType(np.dtype(np.int32)), name + elif name.startswith("LONG_"): + return NumpyType(np.dtype(np.int64)), name else: return None diff --git a/test/test_loopy.py b/test/test_loopy.py index 9eaece6a83ae86de4af0eeaf4356f6502024ee40..e41d55b85e504bcd39db37bd888ddbedbf6122f4 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1933,6 +1933,46 @@ def test_unscheduled_insn_detection(): lp.generate_code(knl) +def test_integer_reduction(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + from loopy.kernel.data import temp_var_scope as scopes + from loopy.types import to_loopy_type + + n = 200 + for vtype in [np.int32, np.int64]: + var_int = np.random.randint(1000, size=n).astype(vtype) + var_lp = lp.TemporaryVariable('var', initializer=var_int, + read_only=True, + scope=scopes.PRIVATE, + dtype=to_loopy_type(vtype), + shape=lp.auto) + + reductions = [('max', lambda x: x == np.max(var_int)), + ('min', lambda x: x == np.min(var_int)), + ('sum', lambda x: x == np.sum(var_int)), + ('product', lambda x: x == np.prod(var_int)), + ('argmax', lambda x: (x[0] == np.max(var_int) and + var_int[out[1]] == np.max(var_int))), + ('argmin', lambda x: (x[0] == np.min(var_int) and + var_int[out[1]] == np.min(var_int)))] + + for reduction, function in reductions: + kstr = ("out" if 'arg' not in reduction + else "out[0], out[1]") + kstr += ' = {0}(k, var[k])'.format(reduction) + knl = lp.make_kernel('{[k]: 0<=k 1: exec(sys.argv[1])