From 19743eecd898decf96e535c5e3c6799ca22f8452 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 19 Aug 2019 16:18:40 -0500 Subject: [PATCH 01/14] Refactor test_fuzz_expression_code_gen to avoid having to compile 50 kernels --- test/test_loopy.py | 90 ++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 119d57adf..94c5a4543 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -281,7 +281,7 @@ def make_random_value(): return np.complex128(cval) -def make_random_expression(var_values, size): +def make_random_expression(prefix, var_values, size): from random import randrange import pymbolic.primitives as p v = randrange(1500) @@ -293,74 +293,94 @@ def make_random_expression(var_values, size): else: cls = p.Product return cls(tuple( - make_random_expression(var_values, size) + make_random_expression(prefix, var_values, size) for i in range(term_count))) elif v < 750: return make_random_value() elif v < 1000: - var_name = "var_%d" % len(var_values) + var_name = "var_%s_%d" % (prefix, len(var_values)) assert var_name not in var_values var_values[var_name] = make_random_value() return p.Variable(var_name) elif v < 1250: # Cannot use '-' because that destroys numpy constants. return p.Sum(( - make_random_expression(var_values, size), - - make_random_expression(var_values, size))) + make_random_expression(prefix, var_values, size), + - make_random_expression(prefix, var_values, size))) elif v < 1500: # Cannot use '/' because that destroys numpy constants. return p.Quotient( - make_random_expression(var_values, size), - make_random_expression(var_values, size)) + make_random_expression(prefix, var_values, size), + make_random_expression(prefix, var_values, size)) def generate_random_fuzz_examples(count): for i in range(count): size = [0] var_values = {} - expr = make_random_expression(var_values, size) + expr = make_random_expression("e%d" % i, var_values, size) yield expr, var_values -def test_fuzz_code_generator(ctx_factory): +@pytest.mark.parametrize("random_seed", [0, 1, 2]) +def test_fuzz_expression_code_gen(ctx_factory, random_seed=0): + from pymbolic import evaluate + + def get_numpy_type(x): + if isinstance(x, (complex, np.complexfloating)): + return np.complex128 + else: + return np.float64 + + from random import seed + ctx = ctx_factory() queue = cl.CommandQueue(ctx) - if ctx.devices[0].platform.vendor.startswith("Advanced Micro"): - pytest.skip("crashes on AMD 15.12") + seed(random_seed) + + data = [] + instructions = [] + + ref_values = {} + + for i, (expr, var_values) in enumerate( + generate_random_fuzz_examples(50)): + + var_name = "expr%d" % i - #from expr_fuzz import get_fuzz_examples - #for expr, var_values in get_fuzz_examples(): - for expr, var_values in generate_random_fuzz_examples(50): - from pymbolic import evaluate try: - true_value = evaluate(expr, var_values) + ref_values[var_name] = evaluate(expr, var_values) except ZeroDivisionError: continue - def get_dtype(x): - if isinstance(x, (complex, np.complexfloating)): - return np.complex128 - else: - return np.float64 - - knl = lp.make_kernel("{ : }", - [lp.Assignment("value", expr)], - [lp.GlobalArg("value", np.complex128, shape=())] - + [ - lp.ValueArg(name, get_dtype(val)) - for name, val in six.iteritems(var_values) - ]) - ck = lp.CompiledKernel(ctx, knl) - evt, (lp_value,) = ck(queue, out_host=True, **var_values) - err = abs(true_value-lp_value)/abs(true_value) + data.append(lp.GlobalArg(var_name, np.complex128, shape=())) + data.extend([ + lp.TemporaryVariable(name, get_numpy_type(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.extend([ + lp.Assignment(name, get_numpy_type(val)(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.append(lp.Assignment(var_name, expr)) + + knl = lp.make_kernel("{ : }", instructions, data) + print(knl) + + knl = lp.set_options(knl, return_dict=True) + evt, lp_values = knl(queue, out_host=True) + + for name, ref_value in six.iteritems(ref_values): + lp_value = lp_values[name] + err = abs(ref_value-lp_value)/abs(ref_value) if abs(err) > 1e-10: print(80*"-") - print("WRONG: rel error=%g" % err) - print("true=%r" % true_value) + print("WRONG: %s rel error=%g" % (name, err)) + print("true=%r" % ref_value) print("loopy=%r" % lp_value) print(80*"-") - print(ck.get_code()) + print(knl) print(80*"-") print(var_values) print(80*"-") -- GitLab From e4ffd81852d297b77847cc4fae05e1ba741769e7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 19 Aug 2019 20:17:23 -0500 Subject: [PATCH 02/14] Add C Integer semantics experiment --- contrib/c-integer-semantics.py | 145 +++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 contrib/c-integer-semantics.py diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py new file mode 100644 index 000000000..ffdeecc85 --- /dev/null +++ b/contrib/c-integer-semantics.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# coding: utf-8 + +from os import system +import ctypes + +C_SRC = """ +#include +#include + +int64_t cdiv(int64_t a, int64_t b) +{ + return a/b; +} + +int64_t cmod(int64_t a, int64_t b) +{ + return a%b; +} + +#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \ + MACRO_NAME(short) \ + MACRO_NAME(int) \ + MACRO_NAME(long) \ + MACRO_NAME(int64_t) + +#define LOOPY_DEFINE_FLOOR_DIV(TYPE) \ + TYPE loopy_floor_div_##TYPE(TYPE a, TYPE b) \ + { \ + if ((a<0) != (b<0)) \ + a = a - (b + (b<0) - (b>=0)); \ + return a/b; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV) +#undef LOOPY_DEFINE_FLOOR_DIV + +#define LOOPY_DEFINE_FLOOR_DIV_POS_B(TYPE) \ + TYPE loopy_floor_div_pos_b_##TYPE(TYPE a, TYPE b) \ + { \ + if (a<0) \ + a = a - (b-1); \ + return a/b; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) +#undef LOOPY_DEFINE_FLOOR_DIV_POS_B + + +#define LOOPY_DEFINE_MOD_POS_B(TYPE) \ + TYPE loopy_mod_pos_b_##TYPE(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0) \ + result += b; \ + return result; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B) +#undef LOOPY_DEFINE_MOD_POS_B + +#define LOOPY_DEFINE_MOD(TYPE) \ + TYPE loopy_mod_##TYPE(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0 && b > 0) \ + result += b; \ + if (result > 0 && b < 0) \ + result = result + b; \ + return result; \ + } + +LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD) +#undef LOOPY_DEFINE_MOD + + +""" + + +def main(): + with open("int-experiments.c", "w") as outf: + outf.write(C_SRC) + + system('gcc -Wall -shared int-experiments.c -o int-experiments.so') + + int_exp = ctypes.CDLL("int-experiments.so") + for func in [ + int_exp.cdiv, + int_exp.cmod, + int_exp.loopy_floor_div_int64_t, + int_exp.loopy_floor_div_pos_b_int64_t, + int_exp.loopy_mod_pos_b_int64_t, + int_exp.loopy_mod_int64_t, + ]: + func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] + func.restype = ctypes.c_longlong + + cdiv = int_exp.cdiv # noqa + cmod = int_exp.cmod # noqa + int_floor_div = int_exp.loopy_floor_div_int64_t + int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64_t + int_mod_pos_b = int_exp.loopy_mod_pos_b_int64_t + int_mod = int_exp.loopy_mod_int64_t + + m = 50 + + for a in range(-m, m): + for b in range(1, m): + cresult = int_floor_div_pos_b(a, b) + presult = a // b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + for a in range(-m, m): + for b in range(-m, m): + if b == 0: + continue + + cresult = int_floor_div(a, b) + presult = a // b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + for a in range(-m, m): + for b in range(1, m): + cresult = int_mod_pos_b(a, b) + presult = a % b + assert cresult == presult + + for a in range(-m, m): + for b in range(-m, m): + if b == 0: + continue + + cresult = int_mod(a, b) + presult = a % b + assert cresult == presult + if cresult != presult: + print(a, b, cresult, presult) + + +if __name__ == "__main__": + main() -- GitLab From 9b492201b9e34ec207fe1cc7d3d8c1bcfb64852d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 16:38:56 -0500 Subject: [PATCH 03/14] Silence ISL warnings in is_nonnegative --- loopy/isl_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 664d09d2e..1b2a83aad 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -329,7 +329,8 @@ def is_nonnegative(expr, over_set): space = over_set.get_space() from loopy.symbolic import aff_from_expr try: - aff = aff_from_expr(space, -expr-1) + with isl.SuppressedWarnings(space.get_ctx()): + aff = aff_from_expr(space, -expr-1) except Exception: return None expr_neg_set = isl.BasicSet.universe(space).add_constraint( -- GitLab From e351b685ebb77cb4586352e6959825436bde9361 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 16:39:36 -0500 Subject: [PATCH 04/14] Tweak C integer semantics experiment to match loopy-generated code --- contrib/c-integer-semantics.py | 43 ++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index ffdeecc85..5e05ec688 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -19,13 +19,13 @@ int64_t cmod(int64_t a, int64_t b) } #define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \ - MACRO_NAME(short) \ - MACRO_NAME(int) \ - MACRO_NAME(long) \ - MACRO_NAME(int64_t) + MACRO_NAME(int8, char) \ + MACRO_NAME(int16, short) \ + MACRO_NAME(int32, int) \ + MACRO_NAME(int64, long long) -#define LOOPY_DEFINE_FLOOR_DIV(TYPE) \ - TYPE loopy_floor_div_##TYPE(TYPE a, TYPE b) \ +#define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \ + TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \ { \ if ((a<0) != (b<0)) \ a = a - (b + (b<0) - (b>=0)); \ @@ -35,8 +35,8 @@ int64_t cmod(int64_t a, int64_t b) LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV) #undef LOOPY_DEFINE_FLOOR_DIV -#define LOOPY_DEFINE_FLOOR_DIV_POS_B(TYPE) \ - TYPE loopy_floor_div_pos_b_##TYPE(TYPE a, TYPE b) \ +#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \ + TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \ { \ if (a<0) \ a = a - (b-1); \ @@ -47,8 +47,8 @@ LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) #undef LOOPY_DEFINE_FLOOR_DIV_POS_B -#define LOOPY_DEFINE_MOD_POS_B(TYPE) \ - TYPE loopy_mod_pos_b_##TYPE(TYPE a, TYPE b) \ +#define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \ + TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \ { \ TYPE result = a%b; \ if (result < 0) \ @@ -59,8 +59,8 @@ LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B) #undef LOOPY_DEFINE_MOD_POS_B -#define LOOPY_DEFINE_MOD(TYPE) \ - TYPE loopy_mod_##TYPE(TYPE a, TYPE b) \ +#define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \ + TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \ { \ TYPE result = a%b; \ if (result < 0 && b > 0) \ @@ -87,20 +87,20 @@ def main(): for func in [ int_exp.cdiv, int_exp.cmod, - int_exp.loopy_floor_div_int64_t, - int_exp.loopy_floor_div_pos_b_int64_t, - int_exp.loopy_mod_pos_b_int64_t, - int_exp.loopy_mod_int64_t, + int_exp.loopy_floor_div_int64, + int_exp.loopy_floor_div_pos_b_int64, + int_exp.loopy_mod_pos_b_int64, + int_exp.loopy_mod_int64, ]: func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] func.restype = ctypes.c_longlong cdiv = int_exp.cdiv # noqa cmod = int_exp.cmod # noqa - int_floor_div = int_exp.loopy_floor_div_int64_t - int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64_t - int_mod_pos_b = int_exp.loopy_mod_pos_b_int64_t - int_mod = int_exp.loopy_mod_int64_t + int_floor_div = int_exp.loopy_floor_div_int64 + int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 + int_mod_pos_b = int_exp.loopy_mod_pos_b_int64 + int_mod = int_exp.loopy_mod_int64 m = 50 @@ -140,6 +140,9 @@ def main(): if cresult != presult: print(a, b, cresult, presult) + #print(int_mod(552, -918), 552 % -918) + print(cmod(23, -11), 23 % -11) + if __name__ == "__main__": main() -- GitLab From e576b2c1e7c6d2ac1b65841ae76a1c8300fb1b28 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 16:39:49 -0500 Subject: [PATCH 05/14] Comment typo fix --- loopy/expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/expression.py b/loopy/expression.py index 3269bc09f..8414efaa5 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -53,7 +53,7 @@ def dtype_to_type_context(target, dtype): return None -# {{{ vetorizability checker +# {{{ vectorizability checker class VectorizabilityChecker(RecursiveMapper): """The return value from this mapper is a :class:`bool` indicating whether -- GitLab From 3ace8d6d424b84c8b63d23263faafbcb342c72b4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 16:40:13 -0500 Subject: [PATCH 06/14] Add .so files to .gitignore (for C semantics experiment) --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a0b59ed67..7be271c37 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ loopy/_git_rev.py .cache .env virtualenv-[0-9]*[0-9] + +*.so -- GitLab From a5ad8ccf0b875f8f659d2d1c6a7773aaf45bdb9b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 16:47:35 -0500 Subject: [PATCH 07/14] Integer literals: fix when 'u' is emitted --- loopy/target/c/codegen/expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index a6742c225..d31221f6e 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -397,7 +397,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): elif isinstance(expr, np.integer): suffix = "" iinfo = np.iinfo(expr) - if iinfo.min < 0: + if iinfo.min == 0: suffix += "u" if iinfo.max > (2**31-1): suffix += "l" -- GitLab From a9e419cb99bf91a78de8be508ff6081c5f57b688 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 17:07:14 -0500 Subject: [PATCH 08/14] Codegen for signed floordiv and mod: no more macros, fix mod --- loopy/target/c/__init__.py | 86 +++++++++++++++++++++++----- loopy/target/c/codegen/expression.py | 47 ++++++++------- 2 files changed, 100 insertions(+), 33 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 17dd9dc10..25b190809 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -78,23 +78,83 @@ class DTypeRegistryWrapper(object): # {{{ preamble generator def _preamble_generator(preamble_info): - c_funcs = set(func.c_name for func in preamble_info.seen_functions) - if "int_floor_div" in c_funcs: - yield ("05_int_floor_div", """ - #define int_floor_div(a,b) \ - (( (a) - \ - ( ( (a)<0 ) != ( (b)<0 )) \ - *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ - / (b) ) + integer_type_names = ["int8", "int16", "int32", "int64"] + + def_integer_types_macro = ("03_def_integer_types", r""" + #define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \ + MACRO_NAME(int8, char) \ + MACRO_NAME(int16, short) \ + MACRO_NAME(int32, int) \ + MACRO_NAME(int64, long) """) - if "int_floor_div_pos_b" in c_funcs: - yield ("05_int_floor_div_pos_b", """ - #define int_floor_div_pos_b(a,b) ( \ - ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ - ) + undef_integer_types_macro = ("05_undef_integer_types", """ + #undef LOOPY_CALL_WITH_INTEGER_TYPES """) + function_defs = { + "loopy_floor_div": r""" + #define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \ + inline TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \ + { \ + if ((a<0) != (b<0)) \ + a = a - (b + (b<0) - (b>=0)); \ + return a/b; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV) + #undef LOOPY_DEFINE_FLOOR_DIV + """, + + "loopy_floor_div_pos_b": r""" + #define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \ + inline TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + if (a<0) \ + a = a - (b-1); \ + return a/b; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B) + #undef LOOPY_DEFINE_FLOOR_DIV_POS_B + """, + + "loopy_mod": r""" + #define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \ + inline TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0 && b > 0) \ + result += b; \ + if (result > 0 && b < 0) \ + result = result + b; \ + return result; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD) + #undef LOOPY_DEFINE_MOD + """, + + "loopy_mod_pos_b": r""" + #define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \ + inline TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \ + { \ + TYPE result = a%b; \ + if (result < 0) \ + result += b; \ + return result; \ + } + LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B) + #undef LOOPY_DEFINE_MOD_POS_B + """, + } + + c_funcs = set(func.c_name for func in preamble_info.seen_functions) + + for func_name, func_body in six.iteritems(function_defs): + if any((func_name + "_" + tpname) in c_funcs + for tpname in integer_type_names): + yield def_integer_types_macro + yield ("04_%s" % func_name, func_body) + yield undef_integer_types_macro + # }}} diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index d31221f6e..960df67ea 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -297,7 +297,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): def make_subscript(self, array, base_expr, subscript): return base_expr[subscript] - def map_floor_div(self, expr, type_context): + def map_integer_div_operator(self, base_func_name, op_func, expr, type_context): from loopy.symbolic import get_dependencies iname_deps = get_dependencies(expr) & self.kernel.all_inames() domain = self.kernel.get_inames_domain(iname_deps) @@ -310,31 +310,46 @@ class ExpressionToCExpressionMapper(IdentityMapper): num_nonneg = is_nonnegative(expr.numerator, domain) den_nonneg = is_nonnegative(expr.denominator, domain) + result_dtype = self.infer_type(expr) + suffix = result_dtype.numpy_dtype.type.__name__ + def seen_func(name): - idt = self.kernel.index_dtype from loopy.codegen import SeenFunction self.codegen_state.seen_functions.add( - SeenFunction(name, name, (idt, idt))) + SeenFunction( + name, "%s_%s" % (name, suffix), + (result_dtype, result_dtype))) if den_nonneg: if num_nonneg: - # parenthesize to avoid negative signs being dragged in from the - # outside by associativity - return ( - self.rec(expr.numerator, type_context) - // + return op_func( + self.rec(expr.numerator, type_context), self.rec(expr.denominator, type_context)) else: - seen_func("int_floor_div_pos_b") - return var("int_floor_div_pos_b")( + seen_func("%s_pos_b" % base_func_name) + return var("%s_pos_b_%s" % (base_func_name, suffix))( self.rec(expr.numerator, 'i'), self.rec(expr.denominator, 'i')) else: - seen_func("int_floor_div") - return var("int_floor_div")( + seen_func(base_func_name) + return var("%s_%s" % (base_func_name, suffix))( self.rec(expr.numerator, 'i'), self.rec(expr.denominator, 'i')) + def map_floor_div(self, expr, type_context): + import operator + return self.map_integer_div_operator( + "loopy_floor_div", operator.floordiv, expr, type_context) + + def map_remainder(self, expr, type_context): + tgt_dtype = self.infer_type(expr) + if tgt_dtype.is_complex(): + raise RuntimeError("complex remainder not defined") + + import operator + return self.map_integer_div_operator( + "loopy_mod", operator.mod, expr, type_context) + def map_if(self, expr, type_context): result_type = self.infer_type(expr) return type(expr)( @@ -678,14 +693,6 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.rec(expr.numerator, type_context, tgt_dtype), self.rec(expr.denominator, type_context, tgt_dtype)) - def map_remainder(self, expr, type_context): - tgt_dtype = self.infer_type(expr) - if tgt_dtype.is_complex(): - raise RuntimeError("complex remainder not defined") - - return super(ExpressionToCExpressionMapper, self).map_remainder( - expr, type_context) - def map_power(self, expr, type_context): def base_impl(expr, type_context): from pymbolic.primitives import is_constant, is_zero -- GitLab From 28769776b9cc9a33d104f0f7333111888ad8d3b8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 23:14:35 -0500 Subject: [PATCH 09/14] Respect non-associativity of integer arithmetic operators (Closes #188) --- loopy/target/c/codegen/expression.py | 62 +++++++++++++++++----------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 960df67ea..1bf197c08 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -29,7 +29,7 @@ import numpy as np from pymbolic.mapper import RecursiveMapper, IdentityMapper from pymbolic.mapper.stringifier import (PREC_NONE, PREC_CALL, PREC_PRODUCT, - PREC_POWER, PREC_SHIFT, + PREC_SHIFT, PREC_UNARY, PREC_LOGICAL_OR, PREC_LOGICAL_AND, PREC_BITWISE_AND, PREC_BITWISE_OR) @@ -755,10 +755,21 @@ class CExpressionToCodeMapper(RecursiveMapper): else: return s - def join_rec(self, joiner, iterable, prec): + def join_rec(self, joiner, iterable, prec, force_parens_around=()): f = joiner.join("%s" for i in iterable) return f % tuple( - self.rec(i, prec) for i in iterable) + self.rec_with_force_parens_around( + i, prec, force_parens_around=force_parens_around) + for i in iterable) + + def rec_with_force_parens_around( + self, expr, enclosing_prec, force_parens_around=()): + result = self.rec(expr, enclosing_prec) + + if isinstance(expr, force_parens_around): + result = "(%s)" % result + + return result def join(self, joiner, iterable): f = joiner.join("%s" for i in iterable) @@ -805,14 +816,6 @@ class CExpressionToCodeMapper(RecursiveMapper): self.rec(expr.index, PREC_NONE)), enclosing_prec, PREC_CALL) - def map_floor_div(self, expr, enclosing_prec): - # parenthesize to avoid negative signs being dragged in from the - # outside by associativity - return "(%s / %s)" % ( - self.rec(expr.numerator, PREC_PRODUCT), - # analogous to ^{-1} - self.rec(expr.denominator, PREC_POWER)) - def map_min(self, expr, enclosing_prec): what = type(expr).__name__.lower() @@ -906,33 +909,42 @@ class CExpressionToCodeMapper(RecursiveMapper): self.join_rec(" + ", expr.children, PREC_SUM), enclosing_prec, PREC_SUM) + multiplicative_primitives = (p.Product, p.Quotient, p.FloorDiv, p.Remainder) + def map_product(self, expr, enclosing_prec): - # Spaces prevent '**z' (times dereference z), which - # is hard to read. + force_parens_around = (p.Quotient, p.FloorDiv, p.Remainder) + + # Spaces prevent '**z' (times dereference z), which is hard to read. return self.parenthesize_if_needed( - self.join_rec(" * ", expr.children, PREC_PRODUCT), + self.join_rec(" * ", expr.children, PREC_PRODUCT, + force_parens_around=force_parens_around), enclosing_prec, PREC_PRODUCT) - def map_quotient(self, expr, enclosing_prec): - num = self.rec(expr.numerator, PREC_PRODUCT) + def _map_division_operator(self, operator, expr, enclosing_prec): + num_s = self.rec_with_force_parens_around(expr.numerator, PREC_PRODUCT, + force_parens_around=self.multiplicative_primitives) - # analogous to ^{-1} - denom = self.rec(expr.denominator, PREC_POWER) + denom_s = self.rec_with_force_parens_around(expr.denominator, PREC_PRODUCT, + force_parens_around=self.multiplicative_primitives) return self.parenthesize_if_needed( - "%s / %s" % ( + "%s %s %s" % ( # Space is necessary--otherwise '/*' # (i.e. divide-dererference) becomes # start-of-comment in C. - num, - denom), + num_s, + operator, + denom_s), enclosing_prec, PREC_PRODUCT) + def map_quotient(self, expr, enclosing_prec): + return self._map_division_operator("/", expr, enclosing_prec) + + def map_floor_div(self, expr, enclosing_prec): + return self._map_division_operator("/", expr, enclosing_prec) + def map_remainder(self, expr, enclosing_prec): - return "(%s %% %s)" % ( - self.rec(expr.numerator, PREC_PRODUCT), - # PREC_POWER analogous to ^{-1} - self.rec(expr.denominator, PREC_POWER)) + return self._map_division_operator("%", expr, enclosing_prec) def map_power(self, expr, enclosing_prec): return "pow(%s, %s)" % ( -- GitLab From 1d0bb84286544c3cd60763a3549aa06f3fd94f22 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 23:15:43 -0500 Subject: [PATCH 10/14] Add improved expression fuzz test, including integer/nonneg/real/complex modes --- test/test_expression.py | 467 ++++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 198 ----------------- 2 files changed, 467 insertions(+), 198 deletions(-) create mode 100644 test/test_expression.py diff --git a/test/test_expression.py b/test/test_expression.py new file mode 100644 index 000000000..fd45c9d9b --- /dev/null +++ b/test/test_expression.py @@ -0,0 +1,467 @@ +from __future__ import division, absolute_import, print_function + +__copyright__ = "Copyright (C) 2019 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import six +from six.moves import range + +import sys +import numpy as np +import loopy as lp +import pyopencl as cl +import pyopencl.clmath # noqa +import pyopencl.clrandom # noqa +import pytest + +from pymbolic.mapper.evaluator import EvaluationMapper + + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # 'cl.create_some_context' + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ code generator fuzzing + +class BoundsCheckError(ValueError): + pass + + +class BoundsCheckingEvaluationMapper(EvaluationMapper): + def __init__(self, context, lbound, ubound): + super(BoundsCheckingEvaluationMapper, self).__init__(context) + self.lbound = lbound + self.ubound = ubound + + def rec(self, expr): + result = super(BoundsCheckingEvaluationMapper, self).rec(expr) + + if result > self.ubound: + raise BoundsCheckError() + if result < self.lbound: + raise BoundsCheckError() + + return result + + +def make_random_fp_value(use_complex): + from random import randrange, uniform + v = randrange(3) + if v == 0: + while True: + z = randrange(-1000, 1000) + if z: + return z + + elif v == 1 or not use_complex: + return uniform(-10, 10) + else: + cval = uniform(-10, 10) + 1j*uniform(-10, 10) + if randrange(0, 2) == 0: + return np.complex128(cval) + else: + return np.complex128(cval) + + +def make_random_fp_expression(prefix, var_values, size, use_complex): + from random import randrange + import pymbolic.primitives as p + v = randrange(1500) + size[0] += 1 + if v < 500 and size[0] < 40: + term_count = randrange(2, 5) + if randrange(2) < 1: + cls = p.Sum + else: + cls = p.Product + return cls(tuple( + make_random_fp_expression(prefix, var_values, size, use_complex) + for i in range(term_count))) + elif v < 750: + return make_random_fp_value(use_complex=use_complex) + elif v < 1000: + var_name = "var_%s_%d" % (prefix, len(var_values)) + assert var_name not in var_values + var_values[var_name] = make_random_fp_value(use_complex=use_complex) + return p.Variable(var_name) + elif v < 1250: + # FIXME: What does this comment mean? + # Cannot use '-' because that destroys numpy constants. + return p.Sum(( + make_random_fp_expression(prefix, var_values, size, use_complex), + - make_random_fp_expression(prefix, var_values, size, use_complex))) + elif v < 1500: + # FIXME: What does this comment mean? + # Cannot use '/' because that destroys numpy constants. + return p.Quotient( + make_random_fp_expression(prefix, var_values, size, use_complex), + make_random_fp_expression(prefix, var_values, size, use_complex)) + else: + assert False + + +def make_random_int_value(nonneg): + from random import randrange + if nonneg: + return randrange(1, 100) + + else: + while True: + z = randrange(-100, 100) + if z: + return z + + +def make_random_int_expression(prefix, var_values, size, nonneg): + from random import randrange + import pymbolic.primitives as p + if size[0] < 10: + v = randrange(800) + else: + v = randrange(1000) + + size[0] += 1 + + if v < 10: + var_name = "var_%s_%d" % (prefix, len(var_values)) + assert var_name not in var_values + var_values[var_name] = make_random_int_value(nonneg) + return p.Variable(var_name) + elif v < 200 and size[0] < 40: + term_count = randrange(2, 5) + if randrange(2) < 1: + cls = p.Sum + else: + cls = p.Product + return cls(tuple( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg) + for i in range(term_count))) + elif v < 400 and size[0] < 40: + return p.FloorDiv( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + make_nonzero_random_int_expression( + prefix, var_values, size, nonneg=nonneg)) + elif v < 600 and size[0] < 40: + return p.Remainder( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + make_nonzero_random_int_expression( + prefix, var_values, size, nonneg=nonneg)) + elif v < 800 and not nonneg and size[0] < 40: + return p.Sum(( + make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + - make_random_int_expression( + prefix, var_values, size, nonneg=nonneg), + )) + else: + return make_random_int_value(nonneg) + + +def make_nonzero_random_int_expression(prefix, var_values, size, nonneg): + while True: + var_values_new = var_values.copy() + size_new = size[:] + result = make_random_int_expression( + prefix, var_values_new, size_new, nonneg) + + result_eval = EvaluationMapper(var_values_new)(result) + + if result_eval != 0: + var_values.update(var_values_new) + size[:] = size_new + + return result + + assert False + + +def generate_random_fuzz_examples(count, expr_type): + i = 0 + while True: + size = [0] + var_values = {} + if expr_type in ["real", "complex"]: + expr = make_random_fp_expression("e%d" % i, var_values, size, + use_complex=expr_type == "complex") + elif expr_type == "int": + expr = make_random_int_expression( + "e%d" % i, var_values, size, nonneg=False) + elif expr_type == "int_nonneg": + expr = make_random_int_expression( + "e%d" % i, var_values, size, nonneg=True) + else: + raise ValueError("unknown expr_type: %s" % expr_type) + + yield i, expr, var_values + + i += 1 + + +def assert_parse_roundtrip(expr): + from pymbolic.mapper.stringifier import StringifyMapper + strified = StringifyMapper()(expr) + from pymbolic import parse + parsed_expr = parse(strified) + print(expr) + print(parsed_expr) + assert expr == parsed_expr + + +@pytest.mark.parametrize("random_seed", [0, 1, 2]) +@pytest.mark.parametrize("expr_type", ["int", "int_nonneg", "real", "complex"]) +def test_fuzz_expression_code_gen(ctx_factory, expr_type, random_seed): + from pymbolic import evaluate + + def get_numpy_type(x): + if expr_type in ["real", "complex"]: + if isinstance(x, (complex, np.complexfloating)): + return np.complex128 + else: + return np.float64 + + elif expr_type in ["int", "int_nonneg"]: + return np.int64 + + else: + raise ValueError("unknown expr_type: %s" % expr_type) + + from random import seed + + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + seed(random_seed) + + data = [] + instructions = [] + + ref_values = {} + + if expr_type in ["real", "complex"]: + result_type = np.complex128 + elif expr_type in ["int", "int_nonneg"]: + result_type = np.int64 + else: + assert False + + var_names = [] + + fuzz_iter = iter(generate_random_fuzz_examples(50, expr_type)) + count = 0 + + while True: + if count == 20: + break + + i, expr, var_values = next(fuzz_iter) + + var_name = "expr%d" % i + + print(expr) + #assert_parse_roundtrip(expr) + + if expr_type in ["int", "int_nonneg"]: + result_type_iinfo = np.iinfo(np.int32) + bceval_mapper = BoundsCheckingEvaluationMapper( + var_values, + lbound=result_type_iinfo.min, + ubound=result_type_iinfo.max) + print(expr) + try: + ref_values[var_name] = bceval_mapper(expr) + except BoundsCheckError: + print(expr) + print("BOUNDS CHECK FAILED") + continue + else: + try: + ref_values[var_name] = evaluate(expr, var_values) + except ZeroDivisionError: + continue + + count += 1 + + data.append(lp.GlobalArg(var_name, + result_type, + shape=())) + data.extend([ + lp.TemporaryVariable(name, get_numpy_type(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.extend([ + lp.Assignment(name, get_numpy_type(val)(val)) + for name, val in six.iteritems(var_values) + ]) + instructions.append(lp.Assignment(var_name, expr)) + + if expr_type == "int_nonneg": + var_names.extend(var_values) + + knl = lp.make_kernel("{ : }", instructions, data, seq_dependencies=True) + + import islpy as isl + knl = lp.assume(knl, isl.BasicSet( + "[%s] -> { : %s}" + % ( + ", ".join(var_names), + " and ".join("%s >= 0" % name for name in var_names)))) + + knl = lp.set_options(knl, return_dict=True) + print(knl) + evt, lp_values = knl(queue, out_host=True) + + for name, ref_value in six.iteritems(ref_values): + lp_value = lp_values[name] + if expr_type in ["real", "complex"]: + err = abs(ref_value-lp_value)/abs(ref_value) + elif expr_type in ["int", "int_nonneg"]: + err = abs(ref_value-lp_value) + else: + assert False + + if abs(err) > 1e-10: + print(80*"-") + print(knl) + print(80*"-") + print(lp.generate_code_v2(knl).device_code()) + print(80*"-") + print("WRONG: %s rel error=%g" % (name, err)) + print("reference=%r" % ref_value) + print("loopy=%r" % lp_value) + print(80*"-") + 1/0 + + print(lp.generate_code_v2(knl).device_code()) + +# }}} + + +def test_sci_notation_literal(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + set_kernel = lp.make_kernel( + ''' { [i]: 0<=i<12 } ''', + ''' out[i] = 1e-12''') + + set_kernel = lp.set_options(set_kernel, write_cl=True) + + evt, (out,) = set_kernel(queue) + + assert (np.abs(out.get() - 1e-12) < 1e-20).all() + + +def test_indexof(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + ''' { [i,j]: 0<=i,j<5 } ''', + ''' out[i,j] = indexof(out[i,j])''') + + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + out = out.get() + + assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + +def test_indexof_vec(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + if ctx.devices[0].platform.name.startswith("Portable"): + # Accurate as of 2015-10-08 + pytest.skip("POCL miscompiles vector code") + + knl = lp.make_kernel( + ''' { [i,j,k]: 0<=i,j,k<4 } ''', + ''' out[i,j,k] = indexof_vec(out[i,j,k])''') + + knl = lp.tag_inames(knl, {"i": "vec"}) + knl = lp.tag_data_axes(knl, "out", "vec,c,c") + knl = lp.set_options(knl, write_cl=True) + + (evt, (out,)) = knl(queue) + #out = out.get() + #assert np.array_equal(out.ravel(order="C"), np.arange(25)) + + +def test_is_expression_equal(): + from loopy.symbolic import is_expression_equal + from pymbolic import var + + x = var("x") + y = var("y") + + assert is_expression_equal(x+2, 2+x) + + assert is_expression_equal((x+2)**2, x**2 + 4*x + 4) + assert is_expression_equal((x+y)**2, x**2 + 2*x*y + y**2) + + +def test_integer_associativity(): + knl = lp.make_kernel( + "{[i] : 0<=i 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker diff --git a/test/test_loopy.py b/test/test_loopy.py index 94c5a4543..581da4a02 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -260,139 +260,6 @@ def test_multi_cse(ctx_factory): print(compiled.get_code()) -# {{{ code generator fuzzing - -def make_random_value(): - from random import randrange, uniform - v = randrange(3) - if v == 0: - while True: - z = randrange(-1000, 1000) - if z: - return z - - elif v == 1: - return uniform(-10, 10) - else: - cval = uniform(-10, 10) + 1j*uniform(-10, 10) - if randrange(0, 2) == 0: - return np.complex128(cval) - else: - return np.complex128(cval) - - -def make_random_expression(prefix, var_values, size): - from random import randrange - import pymbolic.primitives as p - v = randrange(1500) - size[0] += 1 - if v < 500 and size[0] < 40: - term_count = randrange(2, 5) - if randrange(2) < 1: - cls = p.Sum - else: - cls = p.Product - return cls(tuple( - make_random_expression(prefix, var_values, size) - for i in range(term_count))) - elif v < 750: - return make_random_value() - elif v < 1000: - var_name = "var_%s_%d" % (prefix, len(var_values)) - assert var_name not in var_values - var_values[var_name] = make_random_value() - return p.Variable(var_name) - elif v < 1250: - # Cannot use '-' because that destroys numpy constants. - return p.Sum(( - make_random_expression(prefix, var_values, size), - - make_random_expression(prefix, var_values, size))) - elif v < 1500: - # Cannot use '/' because that destroys numpy constants. - return p.Quotient( - make_random_expression(prefix, var_values, size), - make_random_expression(prefix, var_values, size)) - - -def generate_random_fuzz_examples(count): - for i in range(count): - size = [0] - var_values = {} - expr = make_random_expression("e%d" % i, var_values, size) - yield expr, var_values - - -@pytest.mark.parametrize("random_seed", [0, 1, 2]) -def test_fuzz_expression_code_gen(ctx_factory, random_seed=0): - from pymbolic import evaluate - - def get_numpy_type(x): - if isinstance(x, (complex, np.complexfloating)): - return np.complex128 - else: - return np.float64 - - from random import seed - - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - seed(random_seed) - - data = [] - instructions = [] - - ref_values = {} - - for i, (expr, var_values) in enumerate( - generate_random_fuzz_examples(50)): - - var_name = "expr%d" % i - - try: - ref_values[var_name] = evaluate(expr, var_values) - except ZeroDivisionError: - continue - - data.append(lp.GlobalArg(var_name, np.complex128, shape=())) - data.extend([ - lp.TemporaryVariable(name, get_numpy_type(val)) - for name, val in six.iteritems(var_values) - ]) - instructions.extend([ - lp.Assignment(name, get_numpy_type(val)(val)) - for name, val in six.iteritems(var_values) - ]) - instructions.append(lp.Assignment(var_name, expr)) - - knl = lp.make_kernel("{ : }", instructions, data) - print(knl) - - knl = lp.set_options(knl, return_dict=True) - evt, lp_values = knl(queue, out_host=True) - - for name, ref_value in six.iteritems(ref_values): - lp_value = lp_values[name] - err = abs(ref_value-lp_value)/abs(ref_value) - if abs(err) > 1e-10: - print(80*"-") - print("WRONG: %s rel error=%g" % (name, err)) - print("true=%r" % ref_value) - print("loopy=%r" % lp_value) - print(80*"-") - print(knl) - print(80*"-") - print(var_values) - print(80*"-") - print(repr(expr)) - print(80*"-") - print(expr) - print(80*"-") - 1/0 - -# }}} - - def test_bare_data_dependency(ctx_factory): dtype = np.dtype(np.float32) ctx = ctx_factory() @@ -990,21 +857,6 @@ def test_auto_test_zero_warmup_rounds(ctx_factory): warmup_rounds=0) -def test_sci_notation_literal(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - - set_kernel = lp.make_kernel( - ''' { [i]: 0<=i<12 } ''', - ''' out[i] = 1e-12''') - - set_kernel = lp.set_options(set_kernel, write_cl=True) - - evt, (out,) = set_kernel(queue) - - assert (np.abs(out.get() - 1e-12) < 1e-20).all() - - def test_variable_size_temporary(): knl = lp.make_kernel( ''' { [i,j]: 0<=i,j Date: Mon, 26 Aug 2019 23:20:42 -0500 Subject: [PATCH 11/14] Add test_divide_precedence from Nick's python test.py --- test/test_expression.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_expression.py b/test/test_expression.py index fd45c9d9b..e04745960 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -457,6 +457,25 @@ def test_integer_associativity(): in lp.generate_code_v2(knl).device_code()) +def test_divide_precedence(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + "{:}", + """ + x[0] = c*(a/b) + y[0] = c*(a%b) + """, + [lp.ValueArg('a, b, c', np.int32), lp.GlobalArg('x, y', np.int32)]) + print(lp.generate_code_v2(knl).device_code()) + + evt, (x_out, y_out) = knl(queue, c=2, b=2, a=5) + evt.wait() + assert x_out.get() == 4 + assert y_out.get() == 2 + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) -- GitLab From 15d08175946fe350c632036dfed30e7efb26e005 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 23:24:58 -0500 Subject: [PATCH 12/14] Fix doctests for integer associativity changes --- doc/tutorial.rst | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 3c85060da..753b09b5d 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -547,7 +547,7 @@ Consider this example: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= -1 + ((15 + n) / 16); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + (15 + n) / 16; ++i_outer) for (int i_inner = 0; i_inner <= (-16 + n + -16 * i_outer >= 0 ? 15 : -1 + n + -16 * i_outer); ++i_inner) a[16 * i_outer + i_inner] = 0.0f; ... @@ -579,7 +579,7 @@ relation to loop nesting. For example, it's perfectly possible to request #define lid(N) ((int) get_local_id(N)) ... for (int i_inner = 0; i_inner <= (-17 + n >= 0 ? 15 : -1 + n); ++i_inner) - for (int i_outer = 0; i_outer <= -1 + -1 * i_inner + ((15 + n + 15 * i_inner) / 16); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + -1 * i_inner + (15 + n + 15 * i_inner) / 16; ++i_outer) a[16 * i_outer + i_inner] = 0.0f; ... @@ -603,8 +603,8 @@ commonly called 'loop tiling': >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= ((-16 + n) / 16); ++i_outer) - for (int j_outer = 0; j_outer <= ((-16 + n) / 16); ++j_outer) + for (int i_outer = 0; i_outer <= (-16 + n) / 16; ++i_outer) + for (int j_outer = 0; j_outer <= (-16 + n) / 16; ++j_outer) for (int i_inner = 0; i_inner <= 15; ++i_inner) for (int j_inner = 0; j_inner <= 15; ++j_inner) out[n * (16 * i_outer + i_inner) + 16 * j_outer + j_inner] = a[n * (16 * j_outer + j_inner) + 16 * i_outer + i_inner]; @@ -645,9 +645,8 @@ loop's tag to ``"unr"``: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) - #define int_floor_div_pos_b(a,b) ( ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) ) ... - for (int i_outer = 0; i_outer <= int_floor_div_pos_b(-4 + n, 4); ++i_outer) + for (int i_outer = 0; i_outer <= loopy_floor_div_pos_b_int32(-4 + n, 4); ++i_outer) { a[4 * i_outer] = 0.0f; a[1 + 4 * i_outer] = 0.0f; @@ -767,7 +766,7 @@ assumption: >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... - for (int i_outer = 0; i_outer <= -1 + ((3 + n) / 4); ++i_outer) + for (int i_outer = 0; i_outer <= -1 + (3 + n) / 4; ++i_outer) { a[4 * i_outer] = 0.0f; if (-2 + -4 * i_outer + n >= 0) @@ -797,7 +796,7 @@ enabling some cost savings: #define lid(N) ((int) get_local_id(N)) ... /* bulk slab for 'i_outer' */ - for (int i_outer = 0; i_outer <= -2 + ((3 + n) / 4); ++i_outer) + for (int i_outer = 0; i_outer <= -2 + (3 + n) / 4; ++i_outer) { a[4 * i_outer] = 0.0f; a[1 + 4 * i_outer] = 0.0f; @@ -806,7 +805,7 @@ enabling some cost savings: } /* final slab for 'i_outer' */ { - int const i_outer = -1 + n + -1 * (3 * n / 4); + int const i_outer = -1 + n + -1 * ((3 * n) / 4); if (-1 + n >= 0) { @@ -1220,7 +1219,7 @@ should call :func:`loopy.get_one_scheduled_kernel`: 2: RETURN FROM KERNEL rotate_v2 3: ... gbarrier 4: CALL KERNEL rotate_v2_0(extra_args=[], extra_inames=[]) - 5: arr[((1 + i_inner + i_outer*16) % n)] = tmp {id=rotate} + 5: arr[(1 + i_inner + i_outer*16) % n] = tmp {id=rotate} 6: RETURN FROM KERNEL rotate_v2_0 --------------------------------------------------------------------------- @@ -1260,7 +1259,7 @@ put those instructions into the schedule. 4: ... gbarrier 5: CALL KERNEL rotate_v2_0(extra_args=['tmp_save_slot'], extra_inames=[]) 6: tmp = tmp_save_slot[tmp_reload_hw_dim_0_rotate_v2_0, tmp_reload_hw_dim_1_rotate_v2_0] {id=tmp.reload} - 7: arr[((1 + i_inner + i_outer*16) % n)] = tmp {id=rotate} + 7: arr[(1 + i_inner + i_outer*16) % n] = tmp {id=rotate} 8: RETURN FROM KERNEL rotate_v2_0 --------------------------------------------------------------------------- @@ -1297,7 +1296,7 @@ The kernel translates into two OpenCL kernels. int tmp; tmp = tmp_save_slot[16 * gid(0) + lid(0)]; - arr[((1 + lid(0) + gid(0) * 16) % n)] = tmp; + arr[(1 + lid(0) + gid(0) * 16) % n] = tmp; } Now we can execute the kernel. -- GitLab From 85039a066bb4ccd70650367e08fae7c05e360a1a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 26 Aug 2019 23:37:51 -0500 Subject: [PATCH 13/14] Placate flake8 --- test/test_loopy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 581da4a02..0a89eee0a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -22,7 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import six +import six # noqa: F401 from six.moves import range import sys -- GitLab From 1562d866232e4f0fae2c32dc9ea7fcd82e6cc01e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 27 Aug 2019 00:08:17 -0500 Subject: [PATCH 14/14] Fix test_fuzz_expression_code_gen scalability --- test/test_expression.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_expression.py b/test/test_expression.py index e04745960..7a26e7e2e 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -215,7 +215,7 @@ def make_nonzero_random_int_expression(prefix, var_values, size, nonneg): assert False -def generate_random_fuzz_examples(count, expr_type): +def generate_random_fuzz_examples(expr_type): i = 0 while True: size = [0] @@ -247,7 +247,7 @@ def assert_parse_roundtrip(expr): assert expr == parsed_expr -@pytest.mark.parametrize("random_seed", [0, 1, 2]) +@pytest.mark.parametrize("random_seed", [0, 1, 2, 3, 4, 5]) @pytest.mark.parametrize("expr_type", ["int", "int_nonneg", "real", "complex"]) def test_fuzz_expression_code_gen(ctx_factory, expr_type, random_seed): from pymbolic import evaluate @@ -286,11 +286,11 @@ def test_fuzz_expression_code_gen(ctx_factory, expr_type, random_seed): var_names = [] - fuzz_iter = iter(generate_random_fuzz_examples(50, expr_type)) + fuzz_iter = iter(generate_random_fuzz_examples(expr_type)) count = 0 while True: - if count == 20: + if count == 10: break i, expr, var_values = next(fuzz_iter) -- GitLab