From 60bc8b738bb382180d6eabe78c783a6c904aa913 Mon Sep 17 00:00:00 2001 From: Isuru Fernando <isuruf@gmail.com> Date: Thu, 28 Apr 2022 16:30:24 -0500 Subject: [PATCH] Avoid creating new variables for x^2 to help pocl generate FMAs --- sumpy/codegen.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 83f4fce7..ef7e059a 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -390,10 +390,12 @@ class PowerRewriter(CSECachingIdentityMapper, CallExternalRecMapper): if isinstance(exp, int): new_base = prim.wrap_in_cse(expr.base) - if exp > 1 and exp % 2 == 0: + if exp > 2 and exp % 2 == 0: square = prim.wrap_in_cse(new_base*new_base) return self.rec(prim.wrap_in_cse(square**(exp//2)), rec_self, *args) + elif exp == 2: + return new_base * new_base elif exp > 1 and exp % 2 == 1: square = prim.wrap_in_cse(new_base*new_base) return self.rec(prim.wrap_in_cse(square**((exp-1)//2))*new_base, -- GitLab