diff --git a/examples/matrix-ops.py b/examples/matrix-ops.py index fb585d58271a313292a1022300cd6ba178c7da2a..fcf0a078b1ff2dc3e77967257a289d64e99634fb 100644 --- a/examples/matrix-ops.py +++ b/examples/matrix-ops.py @@ -178,7 +178,7 @@ def fancy_matrix_mul(ctx_factory=cl.create_some_context): order = "F" - n = 16*10 + n = 16*40 from pymbolic import var a, b, c, i, j, k, n_sym = [var(s) for s in "abcijkn"] @@ -196,7 +196,7 @@ def fancy_matrix_mul(ctx_factory=cl.create_some_context): knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 16) + knl = lp.split_dimension(knl, "k", 19) knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["k_inner", "j_inner"]) assert knl.get_invalid_reason() is None diff --git a/loopy/__init__.py b/loopy/__init__.py index 0fc63dcfc0854b01a674c18de9ea40decaeb7f83..9d427656739e7896ad02042e0b6fe23ca6eca5b2 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -40,6 +40,9 @@ register_mpz_with_pymbolic() +class LoopyAdvisory(UserWarning): + pass + # {{{ index tags class IndexTag(object): @@ -1187,9 +1190,14 @@ class LoopyCCodeMapper(CCodeMapper): return CCodeMapper.map_subscript(self, expr, enclosing_prec) def map_floor_div(self, expr, prec): - return ("floor_int_div(%s, %s)" - % (self.rec(expr.numerator, PREC_NONE), - self.rec(expr.denominator, PREC_NONE))) + if isinstance(expr.denominator, int) and expr.denominator > 0: + return ("int_floor_div_pos_b(%s, %s)" + % (self.rec(expr.numerator, PREC_NONE), + expr.denominator)) + else: + return ("int_floor_div(%s, %s)" + % (self.rec(expr.numerator, PREC_NONE), + self.rec(expr.denominator, PREC_NONE))) # }}} @@ -1943,19 +1951,18 @@ def generate_code(kernel): mod.extend([LiteralLines(kernel.preamble), Line()]) mod.extend([ - LiteralLines(""" - inline int floor_int_div(int a, int b) - { - if ((a<0) != (b<0)) - { - if (b<0) - return (-a+b+1)/-b; - else - return (a-b+1)/b; - } - else - return a/b; - } + LiteralLines(r""" + #define int_floor_div(a,b) \ + (( (a) - \ + ( ( (a)<0 ) != ( (b)<0 )) \ + *( (b) + ( (b)<0 ) - ( (b)>=0 ) )) \ + / (b) ) + + + #define int_floor_div_pos_b(a,b) ( \ + ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) \ + ) + """), Line()])