diff --git a/.gitignore b/.gitignore index a0b59ed67a90d89ecb6921ff9175201249ef52ee..7be271c37ca0d6d1d67185ce4fbf202bbc488240 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ loopy/_git_rev.py .cache .env virtualenv-[0-9]*[0-9] + +*.so diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py new file mode 100644 index 0000000000000000000000000000000000000000..5e05ec6884c3c6b5b6c58d0080c6c0a52b91e2e4 --- /dev/null +++ b/contrib/c-integer-semantics.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +from os import system +import ctypes + +C_SRC = """ +#include +#include + +int64_t cdiv(int64_t a, int64_t b) +{ + return a/b; +} + +int64_t cmod(int64_t a, int64_t b) +{ + return a%b; +} + +#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \ + MACRO_NAME(int8, char) \ + MACRO_NAME(int16, short) \ + MACRO_NAME(int32, int) \ + MACRO_NAME(int64, long long) + +#define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \ + TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \ + { \ + if ((a<0) != (b<0)) \ + a = a - (b + (b<0) - (b>=0)); \ + return a/b; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV) +#undef LOOPY_DEFINE_FLOOR_DIV + +#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \ + TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + if (a<0) \ + a = a - (b-1); \ + return a/b; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) +#undef LOOPY_DEFINE_FLOOR_DIV_POS_B + + +#define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \ + TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0) \ + result += b; \ + return result; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B) +#undef LOOPY_DEFINE_MOD_POS_B + +#define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \ + TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0 && b > 0) \ + result += b; \ + if (result > 0 && b < 0) \ + result = result + b; \ + return result; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD) +#undef LOOPY_DEFINE_MOD + + +""" + + +def main(): + with open("int-experiments.c", "w") as outf: + outf.write(C_SRC) + + system('gcc -Wall -shared int-experiments.c -o int-experiments.so') + + int_exp = ctypes.CDLL("int-experiments.so") + for func in [ + int_exp.cdiv, + int_exp.cmod, + int_exp.loopy_floor_div_int64, + int_exp.loopy_floor_div_pos_b_int64, + int_exp.loopy_mod_pos_b_int64, + int_exp.loopy_mod_int64, + ]: + func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] + func.restype = ctypes.c_longlong + + cdiv = int_exp.cdiv # noqa + cmod = int_exp.cmod # noqa + int_floor_div = int_exp.loopy_floor_div_int64 + int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 + int_mod_pos_b = int_exp.loopy_mod_pos_b_int64 + int_mod = int_exp.loopy_mod_int64 + + m = 50 + + for a in range(-m, m): + for b in range(1, m): + cresult = int_floor_div_pos_b(a, b) + presult = a // b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + for a in range(-m, m): + for b in range(-m, m): + if b == 0: + continue + + cresult = int_floor_div(a, b) + presult = a // b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + for a in range(-m, m): + for b in range(1, m): + cresult = int_mod_pos_b(a, b) + presult = a % b + assert cresult == presult + + for a in range(-m, m): + for b in range(-m, m): + if b == 0: + continue + + cresult = int_mod(a, b) + presult = a % b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + #print(int_mod(552, -918), 552 % -918) + print(cmod(23, -11), 23 % -11) + + +if __name__ == "__main__": + main() diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 3c85060dacf03b52f6e0b1faf05ad4697b6a5d07..753b09b5da42835b88a000bc0400fa18a254d80f 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -547,7 +547,7 @@ Consider this example: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= -1 + ((15 + n) / 16); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + (15 + n) / 16; ++i_outer) for (int i_inner = 0; i_inner <= (-16 + n + -16 * i_outer >= 0 ? 15 : -1 + n + -16 * i_outer); ++i_inner) a[16 * i_outer + i_inner] = 0.0f; ... @@ -579,7 +579,7 @@ relation to loop nesting. For example, it's perfectly possible to request #define lid(N) ((int) get_local_id(N)) ... for (int i_inner = 0; i_inner <= (-17 + n >= 0 ? 15 : -1 + n); ++i_inner) - for (int i_outer = 0; i_outer <= -1 + -1 * i_inner + ((15 + n + 15 * i_inner) / 16); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + -1 * i_inner + (15 + n + 15 * i_inner) / 16; ++i_outer) a[16 * i_outer + i_inner] = 0.0f; ... @@ -603,8 +603,8 @@ commonly called 'loop tiling': >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= ((-16 + n) / 16); ++i_outer) - for (int j_outer = 0; j_outer <= ((-16 + n) / 16); ++j_outer) + for (int i_outer = 0; i_outer <= (-16 + n) / 16; ++i_outer) + for (int j_outer = 0; j_outer <= (-16 + n) / 16; ++j_outer) for (int i_inner = 0; i_inner <= 15; ++i_inner) for (int j_inner = 0; j_inner <= 15; ++j_inner) out[n * (16 * i_outer + i_inner) + 16 * j_outer + j_inner] = a[n * (16 * j_outer + j_inner) + 16 * i_outer + i_inner]; @@ -645,9 +645,8 @@ loop's tag to ``"unr"``: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) - #define int_floor_div_pos_b(a,b) ( ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) ) ... - for (int i_outer = 0; i_outer <= int_floor_div_pos_b(-4 + n, 4); ++i_outer) + for (int i_outer = 0; i_outer <= loopy_floor_div_pos_b_int32(-4 + n, 4); ++i_outer) { a[4 * i_outer] = 0.0f; a[1 + 4 * i_outer] = 0.0f; @@ -767,7 +766,7 @@ assumption: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= -1 + ((3 + n) / 4); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + (3 + n) / 4; ++i_outer) { a[4 * i_outer] = 0.0f; if (-2 + -4 * i_outer + n >= 0) @@ -797,7 +796,7 @@ enabling some cost savings: #define lid(N) ((int) get_local_id(N)) ... /* bulk slab for 'i_outer' */ - for (int i_outer = 0; i_outer <= -2 + ((3 + n) / 4); ++i_outer) + for (int i_outer = 0; i_outer <= -2 + (3 + n) / 4; ++i_outer) { a[4 * i_outer] = 0.0f; a[1 + 4 * i_outer] = 0.0f; @@ -806,7 +805,7 @@ enabling some cost savings: } /* final slab for 'i_outer' */ { - int const i_outer = -1 + n + -1 * (3 * n / 4); + int const i_outer = -1 + n + -1 * ((3 * n) / 4); if (-1 + n >= 0) { @@ -1220,7 +1219,7 @@ should call :func:`loopy.get_one_scheduled_kernel`: 2: RETURN FROM KERNEL rotate_v2 3: ... gbarrier 4: CALL KERNEL rotate_v2_0(extra_args=[], extra_inames=[]) - 5: arr[((1 + i_inner + i_outer*16) % n)] = tmp {id=rotate} + 5: arr[(1 + i_inner + i_outer*16) % n] = tmp {id=rotate} 6: RETURN FROM KERNEL rotate_v2_0 --------------------------------------------------------------------------- @@ -1260,7 +1259,7 @@ put those instructions into the schedule. 4: ... gbarrier 5: CALL KERNEL rotate_v2_0(extra_args=['tmp_save_slot'], extra_inames=[]) 6: tmp = tmp_save_slot[tmp_reload_hw_dim_0_rotate_v2_0, tmp_reload_hw_dim_1_rotate_v2_0] {id=tmp.reload} - 7: arr[((1 + i_inner + i_outer*16) % n)] = tmp {id=rotate} + 7: arr[(1 + i_inner + i_outer*16) % n] = tmp {id=rotate} 8: RETURN FROM KERNEL rotate_v2_0 --------------------------------------------------------------------------- @@ -1297,7 +1296,7 @@ The kernel translates into two OpenCL kernels. int tmp; tmp = tmp_save_slot[16 * gid(0) + lid(0)]; - arr[((1 + lid(0) + gid(0) * 16) % n)] = tmp; + arr[(1 + lid(0) + gid(0) * 16) % n] = tmp; } Now we can execute the kernel. diff --git a/loopy/expression.py b/loopy/expression.py index 3269bc09f064f57857eaa5218c8370383e0f735e..8414efaa5dd614d39e93f55aea3836141e5a6d6e 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -53,7 +53,7 @@ def dtype_to_type_context(target, dtype): return None -# {{{ vetorizability checker +# {{{ vectorizability checker class VectorizabilityChecker(RecursiveMapper): """The return value from this mapper is a :class:`bool` indicating whether diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 664d09d2ec74f9f898cfa9a3910c9267659b28b7..1b2a83aad970e4f7af9027bb43a1e71ab20560ad 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -329,7 +329,8 @@ def is_nonnegative(expr, over_set): space = over_set.get_space() from loopy.symbolic import aff_from_expr try: - aff = aff_from_expr(space, -expr-1) + with isl.SuppressedWarnings(space.get_ctx()): + aff = aff_from_expr(space, -expr-1) except Exception: return None expr_neg_set = isl.BasicSet.universe(space).add_constraint( diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 17dd9dc1034f4572e2bcf1d3abc806354c73336e..25b190809fdc38341c811ede15a8baae693a3116 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -78,23 +78,83 @@ class DTypeRegistryWrapper(object): # {{{ preamble generator def _preamble_generator(preamble_info): - c_funcs = set(func.c_name for func in preamble_info.seen_functions) - if "int_floor_div" in c_funcs: - yield ("05_int_floor_div", """ - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) + integer_type_names = ["int8", "int16", "int32", "int64"] + + def_integer_types_macro = ("03_def_integer_types", r""" + #define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \ + MACRO_NAME(int8, char) \ + MACRO_NAME(int16, short) \ + MACRO_NAME(int32, int) \ + MACRO_NAME(int64, long) """) - if "int_floor_div_pos_b" in c_funcs: - yield ("05_int_floor_div_pos_b", """ - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) + undef_integer_types_macro = ("05_undef_integer_types", """ + #undef LOOPY_CALL_WITH_INTEGER_TYPES """) + function_defs = { + "loopy_floor_div": r""" + #define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \ + inline TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \ + { \ + if ((a<0) != (b<0)) \ + a = a - (b + (b<0) - (b>=0)); \ + return a/b; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV) + #undef LOOPY_DEFINE_FLOOR_DIV + """, + + "loopy_floor_div_pos_b": r""" + #define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \ + inline TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + if (a<0) \ + a = a - (b-1); \ + return a/b; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) + #undef LOOPY_DEFINE_FLOOR_DIV_POS_B + """, + + "loopy_mod": r""" + #define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \ + inline TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0 && b > 0) \ + result += b; \ + if (result > 0 && b < 0) \ + result = result + b; \ + return result; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD) + #undef LOOPY_DEFINE_MOD + """, + + "loopy_mod_pos_b": r""" + #define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \ + inline TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0) \ + result += b; \ + return result; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B) + #undef LOOPY_DEFINE_MOD_POS_B + """, + } + + c_funcs = set(func.c_name for func in preamble_info.seen_functions) + + for func_name, func_body in six.iteritems(function_defs): + if any((func_name + "_" + tpname) in c_funcs + for tpname in integer_type_names): + yield def_integer_types_macro + yield ("04_%s" % func_name, func_body) + yield undef_integer_types_macro + # }}} diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index a6742c225cce42bee5f48d44662f0e515d14b556..1bf197c086659da9d5da9d4c7f6b463f04a1544f 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -29,7 +29,7 @@ import numpy as np from pymbolic.mapper import RecursiveMapper, IdentityMapper from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT, - PREC_POWER, PREC_SHIFT, + PREC_SHIFT, PREC_UNARY, PREC_LOGICAL_OR, PREC_LOGICAL_AND, PREC_BITWISE_AND, PREC_BITWISE_OR) @@ -297,7 +297,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): def make_subscript(self, array, base_expr, subscript): return base_expr[subscript] - def map_floor_div(self, expr, type_context): + def map_integer_div_operator(self, base_func_name, op_func, expr, type_context): from loopy.symbolic import get_dependencies iname_deps = get_dependencies(expr) & self.kernel.all_inames() domain = self.kernel.get_inames_domain(iname_deps) @@ -310,31 +310,46 @@ class ExpressionToCExpressionMapper(IdentityMapper): num_nonneg = is_nonnegative(expr.numerator, domain) den_nonneg = is_nonnegative(expr.denominator, domain) + result_dtype = self.infer_type(expr) + suffix = result_dtype.numpy_dtype.type.__name__ + def seen_func(name): - idt = self.kernel.index_dtype from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( - SeenFunction(name, name, (idt, idt))) + SeenFunction( + name, "%s_%s" % (name, suffix), + (result_dtype, result_dtype))) if den_nonneg: if num_nonneg: - # parenthesize to avoid negative signs being dragged in from the - # outside by associativity - return ( - self.rec(expr.numerator, type_context) - // + return op_func( + self.rec(expr.numerator, type_context), self.rec(expr.denominator, type_context)) else: - seen_func("int_floor_div_pos_b") - return var("int_floor_div_pos_b")( + seen_func("%s_pos_b" % base_func_name) + return var("%s_pos_b_%s" % (base_func_name, suffix))( self.rec(expr.numerator, 'i'), self.rec(expr.denominator, 'i')) else: - seen_func("int_floor_div") - return var("int_floor_div")( + seen_func(base_func_name) + return var("%s_%s" % (base_func_name, suffix))( self.rec(expr.numerator, 'i'), self.rec(expr.denominator, 'i')) + def map_floor_div(self, expr, type_context): + import operator + return self.map_integer_div_operator( + "loopy_floor_div", operator.floordiv, expr, type_context) + + def map_remainder(self, expr, type_context): + tgt_dtype = self.infer_type(expr) + if tgt_dtype.is_complex(): + raise RuntimeError("complex remainder not defined") + + import operator + return self.map_integer_div_operator( + "loopy_mod", operator.mod, expr, type_context) + def map_if(self, expr, type_context): result_type = self.infer_type(expr) return type(expr)( @@ -397,7 +412,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): elif isinstance(expr, np.integer): suffix = "" iinfo = np.iinfo(expr) - if iinfo.min < 0: + if iinfo.min == 0: suffix += "u" if iinfo.max > (2**31-1): suffix += "l" @@ -678,14 +693,6 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.rec(expr.numerator, type_context, tgt_dtype), self.rec(expr.denominator, type_context, tgt_dtype)) - def map_remainder(self, expr, type_context): - tgt_dtype = self.infer_type(expr) - if tgt_dtype.is_complex(): - raise RuntimeError("complex remainder not defined") - - return super(ExpressionToCExpressionMapper, self).map_remainder( - expr, type_context) - def map_power(self, expr, type_context): def base_impl(expr, type_context): from pymbolic.primitives import is_constant, is_zero @@ -748,10 +755,21 @@ class CExpressionToCodeMapper(RecursiveMapper): else: return s - def join_rec(self, joiner, iterable, prec): + def join_rec(self, joiner, iterable, prec, force_parens_around=()): f = joiner.join("%s" for i in iterable) return f % tuple( - self.rec(i, prec) for i in iterable) + self.rec_with_force_parens_around( + i, prec, force_parens_around=force_parens_around) + for i in iterable) + + def rec_with_force_parens_around( + self, expr, enclosing_prec, force_parens_around=()): + result = self.rec(expr, enclosing_prec) + + if isinstance(expr, force_parens_around): + result = "(%s)" % result + + return result def join(self, joiner, iterable): f = joiner.join("%s" for i in iterable) @@ -798,14 +816,6 @@ class CExpressionToCodeMapper(RecursiveMapper): self.rec(expr.index, PREC_NONE)), enclosing_prec, PREC_CALL) - def map_floor_div(self, expr, enclosing_prec): - # parenthesize to avoid negative signs being dragged in from the - # outside by associativity - return "(%s / %s)" % ( - self.rec(expr.numerator, PREC_PRODUCT), - # analogous to ^{-1} - self.rec(expr.denominator, PREC_POWER)) - def map_min(self, expr, enclosing_prec): what = type(expr).__name__.lower() @@ -899,33 +909,42 @@ class CExpressionToCodeMapper(RecursiveMapper): self.join_rec(" + ", expr.children, PREC_SUM), enclosing_prec, PREC_SUM) + multiplicative_primitives = (p.Product, p.Quotient, p.FloorDiv, p.Remainder) + def map_product(self, expr, enclosing_prec): - # Spaces prevent '**z' (times dereference z), which - # is hard to read. + force_parens_around = (p.Quotient, p.FloorDiv, p.Remainder) + + # Spaces prevent '**z' (times dereference z), which is hard to read. return self.parenthesize_if_needed( - self.join_rec(" * ", expr.children, PREC_PRODUCT), + self.join_rec(" * ", expr.children, PREC_PRODUCT, + force_parens_around=force_parens_around), enclosing_prec, PREC_PRODUCT) - def map_quotient(self, expr, enclosing_prec): - num = self.rec(expr.numerator, PREC_PRODUCT) + def _map_division_operator(self, operator, expr, enclosing_prec): + num_s = self.rec_with_force_parens_around(expr.numerator, PREC_PRODUCT, + force_parens_around=self.multiplicative_primitives) - # analogous to ^{-1} - denom = self.rec(expr.denominator, PREC_POWER) + denom_s = self.rec_with_force_parens_around(expr.denominator, PREC_PRODUCT, + force_parens_around=self.multiplicative_primitives) return self.parenthesize_if_needed( - "%s / %s" % ( + "%s %s %s" % ( # Space is necessary--otherwise '/*' # (i.e. divide-dererference) becomes # start-of-comment in C. - num, - denom), + num_s, + operator, + denom_s), enclosing_prec, PREC_PRODUCT) + def map_quotient(self, expr, enclosing_prec): + return self._map_division_operator("/", expr, enclosing_prec) + + def map_floor_div(self, expr, enclosing_prec): + return self._map_division_operator("/", expr, enclosing_prec) + def map_remainder(self, expr, enclosing_prec): - return "(%s %% %s)" % ( - self.rec(expr.numerator, PREC_PRODUCT), - # PREC_POWER analogous to ^{-1} - self.rec(expr.denominator, PREC_POWER)) + return self._map_division_operator("%", expr, enclosing_prec) def map_power(self, expr, enclosing_prec): return "pow(%s, %s)" % ( diff --git a/test/test_expression.py b/test/test_expression.py new file mode 100644 index 0000000000000000000000000000000000000000..7a26e7e2e2c753f08f1b5eb5fc00f300a709e25e --- /dev/null +++ b/test/test_expression.py @@ -0,0 +1,486 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2019 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +from six.moves import range + +import sys +import numpy as np +import loopy as lp +import pyopencl as cl +import pyopencl.clmath # noqa +import pyopencl.clrandom # noqa +import pytest + +from pymbolic.mapper.evaluator import EvaluationMapper + + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # 'cl.create_some_context' + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ code generator fuzzing + +class BoundsCheckError(ValueError): + pass + + +class BoundsCheckingEvaluationMapper(EvaluationMapper): + def __init__(self, context, lbound, ubound): + super(BoundsCheckingEvaluationMapper, self).__init__(context) + self.lbound = lbound + self.ubound = ubound + + def rec(self, expr): + result = super(BoundsCheckingEvaluationMapper, self).rec(expr) + + if result > self.ubound: + raise BoundsCheckError() + if result < self.lbound: + raise BoundsCheckError() + + return result + + +def make_random_fp_value(use_complex): + from random import randrange, uniform + v = randrange(3) + if v == 0: + while True: + z = randrange(-1000, 1000) + if z: + return z + + elif v == 1 or not use_complex: + return uniform(-10, 10) + else: + cval = uniform(-10, 10) + 1j*uniform(-10, 10) + if randrange(0, 2) == 0: + return np.complex128(cval) + else: + return np.complex128(cval) + + +def make_random_fp_expression(prefix, var_values, size, use_complex): + from random import randrange + import pymbolic.primitives as p + v = randrange(1500) + size[0] += 1 + if v < 500 and size[0] < 40: + term_count = randrange(2, 5) + if randrange(2) < 1: + cls = p.Sum + else: + cls = p.Product + return cls(tuple( + make_random_fp_expression(prefix, var_values, size, use_complex) + for i in range(term_count))) + elif v < 750: + return make_random_fp_value(use_complex=use_complex) + elif v < 1000: + var_name = "var_%s_%d" % (prefix, len(var_values)) + assert var_name not in var_values + var_values[var_name] = make_random_fp_value(use_complex=use_complex) + return p.Variable(var_name) + elif v < 1250: + # FIXME: What does this comment mean? + # Cannot use '-' because that destroys numpy constants. + return p.Sum(( + make_random_fp_expression(prefix, var_values, size, use_complex), + - make_random_fp_expression(prefix, var_values, size, use_complex))) + elif v < 1500: + # FIXME: What does this comment mean? + # Cannot use '/' because that destroys numpy constants. + return p.Quotient( + make_random_fp_expression(prefix, var_values, size, use_complex), + make_random_fp_expression(prefix, var_values, size, use_complex)) + else: + assert False + + +def make_random_int_value(nonneg): + from random import randrange + if nonneg: + return randrange(1, 100) + + else: + while True: + z = randrange(-100, 100) + if z: + return z + + +def make_random_int_expression(prefix, var_values, size, nonneg): + from random import randrange + import pymbolic.primitives as p + if size[0] < 10: + v = randrange(800) + else: + v = randrange(1000) + + size[0] += 1 + + if v < 10: + var_name = "var_%s_%d" % (prefix, len(var_values)) + assert var_name not in var_values + var_values[var_name] = make_random_int_value(nonneg) + return p.Variable(var_name) + elif v < 200 and size[0] < 40: + term_count = randrange(2, 5) + if randrange(2) < 1: + cls = p.Sum + else: + cls = p.Product + return cls(tuple( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg) + for i in range(term_count))) + elif v < 400 and size[0] < 40: + return p.FloorDiv( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + make_nonzero_random_int_expression( + prefix, var_values, size, nonneg=nonneg)) + elif v < 600 and size[0] < 40: + return p.Remainder( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + make_nonzero_random_int_expression( + prefix, var_values, size, nonneg=nonneg)) + elif v < 800 and not nonneg and size[0] < 40: + return p.Sum(( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + - make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + )) + else: + return make_random_int_value(nonneg) + + +def make_nonzero_random_int_expression(prefix, var_values, size, nonneg): + while True: + var_values_new = var_values.copy() + size_new = size[:] + result = make_random_int_expression( + prefix, var_values_new, size_new, nonneg) + + result_eval = EvaluationMapper(var_values_new)(result) + + if result_eval != 0: + var_values.update(var_values_new) + size[:] = size_new + + return result + + assert False + + +def generate_random_fuzz_examples(expr_type): + i = 0 + while True: + size = [0] + var_values = {} + if expr_type in ["real", "complex"]: + expr = make_random_fp_expression("e%d" % i, var_values, size, + use_complex=expr_type == "complex") + elif expr_type == "int": + expr = make_random_int_expression( + "e%d" % i, var_values, size, nonneg=False) + elif expr_type == "int_nonneg": + expr = make_random_int_expression( + "e%d" % i, var_values, size, nonneg=True) + else: + raise ValueError("unknown expr_type: %s" % expr_type) + + yield i, expr, var_values + + i += 1 + + +def assert_parse_roundtrip(expr): + from pymbolic.mapper.stringifier import StringifyMapper + strified = StringifyMapper()(expr) + from pymbolic import parse + parsed_expr = parse(strified) + print(expr) + print(parsed_expr) + assert expr == parsed_expr + + +@pytest.mark.parametrize("random_seed", [0, 1, 2, 3, 4, 5]) +@pytest.mark.parametrize("expr_type", ["int", "int_nonneg", "real", "complex"]) +def test_fuzz_expression_code_gen(ctx_factory, expr_type, random_seed): + from pymbolic import evaluate + + def get_numpy_type(x): + if expr_type in ["real", "complex"]: + if isinstance(x, (complex, np.complexfloating)): + return np.complex128 + else: + return np.float64 + + elif expr_type in ["int", "int_nonneg"]: + return np.int64 + + else: + raise ValueError("unknown expr_type: %s" % expr_type) + + from random import seed + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + seed(random_seed) + + data = [] + instructions = [] + + ref_values = {} + + if expr_type in ["real", "complex"]: + result_type = np.complex128 + elif expr_type in ["int", "int_nonneg"]: + result_type = np.int64 + else: + assert False + + var_names = [] + + fuzz_iter = iter(generate_random_fuzz_examples(expr_type)) + count = 0 + + while True: + if count == 10: + break + + i, expr, var_values = next(fuzz_iter) + + var_name = "expr%d" % i + + print(expr) + #assert_parse_roundtrip(expr) + + if expr_type in ["int", "int_nonneg"]: + result_type_iinfo = np.iinfo(np.int32) + bceval_mapper = BoundsCheckingEvaluationMapper( + var_values, + lbound=result_type_iinfo.min, + ubound=result_type_iinfo.max) + print(expr) + try: + ref_values[var_name] = bceval_mapper(expr) + except BoundsCheckError: + print(expr) + print("BOUNDS CHECK FAILED") + continue + else: + try: + ref_values[var_name] = evaluate(expr, var_values) + except ZeroDivisionError: + continue + + count += 1 + + data.append(lp.GlobalArg(var_name, + result_type, + shape=())) + data.extend([ + lp.TemporaryVariable(name, get_numpy_type(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.extend([ + lp.Assignment(name, get_numpy_type(val)(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.append(lp.Assignment(var_name, expr)) + + if expr_type == "int_nonneg": + var_names.extend(var_values) + + knl = lp.make_kernel("{ : }", instructions, data, seq_dependencies=True) + + import islpy as isl + knl = lp.assume(knl, isl.BasicSet( + "[%s] -> { : %s}" + % ( + ", ".join(var_names), + " and ".join("%s >= 0" % name for name in var_names)))) + + knl = lp.set_options(knl, return_dict=True) + print(knl) + evt, lp_values = knl(queue, out_host=True) + + for name, ref_value in six.iteritems(ref_values): + lp_value = lp_values[name] + if expr_type in ["real", "complex"]: + err = abs(ref_value-lp_value)/abs(ref_value) + elif expr_type in ["int", "int_nonneg"]: + err = abs(ref_value-lp_value) + else: + assert False + + if abs(err) > 1e-10: + print(80*"-") + print(knl) + print(80*"-") + print(lp.generate_code_v2(knl).device_code()) + print(80*"-") + print("WRONG: %s rel error=%g" % (name, err)) + print("reference=%r" % ref_value) + print("loopy=%r" % lp_value) + print(80*"-") + 1/0 + + print(lp.generate_code_v2(knl).device_code()) + +# }}} + + +def test_sci_notation_literal(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + set_kernel = lp.make_kernel( + ''' { [i]: 0<=i<12 } ''', + ''' out[i] = 1e-12''') + + set_kernel = lp.set_options(set_kernel, write_cl=True) + + evt, (out,) = set_kernel(queue) + + assert (np.abs(out.get() - 1e-12) < 1e-20).all() + + +def test_indexof(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + ''' { [i,j]: 0<=i,j<5 } ''', + ''' out[i,j] = indexof(out[i,j])''') + + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + out = out.get() + + assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + +def test_indexof_vec(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + if ctx.devices[0].platform.name.startswith("Portable"): + # Accurate as of 2015-10-08 + pytest.skip("POCL miscompiles vector code") + + knl = lp.make_kernel( + ''' { [i,j,k]: 0<=i,j,k<4 } ''', + ''' out[i,j,k] = indexof_vec(out[i,j,k])''') + + knl = lp.tag_inames(knl, {"i": "vec"}) + knl = lp.tag_data_axes(knl, "out", "vec,c,c") + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + #out = out.get() + #assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + +def test_is_expression_equal(): + from loopy.symbolic import is_expression_equal + from pymbolic import var + + x = var("x") + y = var("y") + + assert is_expression_equal(x+2, 2+x) + + assert is_expression_equal((x+2)**2, x**2 + 4*x + 4) + assert is_expression_equal((x+y)**2, x**2 + 2*x*y + y**2) + + +def test_integer_associativity(): + knl = lp.make_kernel( + "{[i] : 0<=i 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 119d57adf2c850eba3bb6ad5df3c0a8d0644b70c..0a89eee0ac352dc954f63798bd5be6266a092441 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six +import six # noqa: F401 from six.moves import range import sys @@ -260,119 +260,6 @@ def test_multi_cse(ctx_factory): print(compiled.get_code()) -# {{{ code generator fuzzing - -def make_random_value(): - from random import randrange, uniform - v = randrange(3) - if v == 0: - while True: - z = randrange(-1000, 1000) - if z: - return z - - elif v == 1: - return uniform(-10, 10) - else: - cval = uniform(-10, 10) + 1j*uniform(-10, 10) - if randrange(0, 2) == 0: - return np.complex128(cval) - else: - return np.complex128(cval) - - -def make_random_expression(var_values, size): - from random import randrange - import pymbolic.primitives as p - v = randrange(1500) - size[0] += 1 - if v < 500 and size[0] < 40: - term_count = randrange(2, 5) - if randrange(2) < 1: - cls = p.Sum - else: - cls = p.Product - return cls(tuple( - make_random_expression(var_values, size) - for i in range(term_count))) - elif v < 750: - return make_random_value() - elif v < 1000: - var_name = "var_%d" % len(var_values) - assert var_name not in var_values - var_values[var_name] = make_random_value() - return p.Variable(var_name) - elif v < 1250: - # Cannot use '-' because that destroys numpy constants. - return p.Sum(( - make_random_expression(var_values, size), - - make_random_expression(var_values, size))) - elif v < 1500: - # Cannot use '/' because that destroys numpy constants. - return p.Quotient( - make_random_expression(var_values, size), - make_random_expression(var_values, size)) - - -def generate_random_fuzz_examples(count): - for i in range(count): - size = [0] - var_values = {} - expr = make_random_expression(var_values, size) - yield expr, var_values - - -def test_fuzz_code_generator(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - if ctx.devices[0].platform.vendor.startswith("Advanced Micro"): - pytest.skip("crashes on AMD 15.12") - - #from expr_fuzz import get_fuzz_examples - #for expr, var_values in get_fuzz_examples(): - for expr, var_values in generate_random_fuzz_examples(50): - from pymbolic import evaluate - try: - true_value = evaluate(expr, var_values) - except ZeroDivisionError: - continue - - def get_dtype(x): - if isinstance(x, (complex, np.complexfloating)): - return np.complex128 - else: - return np.float64 - - knl = lp.make_kernel("{ : }", - [lp.Assignment("value", expr)], - [lp.GlobalArg("value", np.complex128, shape=())] - + [ - lp.ValueArg(name, get_dtype(val)) - for name, val in six.iteritems(var_values) - ]) - ck = lp.CompiledKernel(ctx, knl) - evt, (lp_value,) = ck(queue, out_host=True, **var_values) - err = abs(true_value-lp_value)/abs(true_value) - if abs(err) > 1e-10: - print(80*"-") - print("WRONG: rel error=%g" % err) - print("true=%r" % true_value) - print("loopy=%r" % lp_value) - print(80*"-") - print(ck.get_code()) - print(80*"-") - print(var_values) - print(80*"-") - print(repr(expr)) - print(80*"-") - print(expr) - print(80*"-") - 1/0 - -# }}} - - def test_bare_data_dependency(ctx_factory): dtype = np.dtype(np.float32) ctx = ctx_factory() @@ -970,21 +857,6 @@ def test_auto_test_zero_warmup_rounds(ctx_factory): warmup_rounds=0) -def test_sci_notation_literal(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - set_kernel = lp.make_kernel( - ''' { [i]: 0<=i<12 } ''', - ''' out[i] = 1e-12''') - - set_kernel = lp.set_options(set_kernel, write_cl=True) - - evt, (out,) = set_kernel(queue) - - assert (np.abs(out.get() - 1e-12) < 1e-20).all() - - def test_variable_size_temporary(): knl = lp.make_kernel( ''' { [i,j]: 0<=i,j