From 46efce1871cc124a961b2519b7a88885e09a8790 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 23 Aug 2015 19:03:19 -0500
Subject: [PATCH] Expand CSEs as early as possible in kernel creation process

---
 loopy/kernel/creation.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index feedd7a21..4683ca905 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -707,7 +707,7 @@ class CSEToAssignmentMapper(IdentityMapper):
             return var
 
 
-def expand_cses(knl):
+def expand_cses(instructions, cse_prefix="cse_expr"):
     def add_assignment(base_name, expr, dtype):
         if base_name is None:
             base_name = "var"
@@ -721,16 +721,15 @@ def expand_cses(knl):
             dtype = np.dtype(dtype)
 
         from loopy.kernel.data import TemporaryVariable
-        new_temp_vars[new_var_name] = TemporaryVariable(
+        new_temp_vars.append(TemporaryVariable(
                 name=new_var_name,
                 dtype=dtype,
                 is_local=lp.auto,
-                shape=())
+                shape=()))
 
         from pymbolic.primitives import Variable
         new_insn = ExpressionInstruction(
-                id=knl.make_unique_instruction_id(
-                    extra_used_ids=newly_created_insn_ids),
+                id=None,
                 assignee=Variable(new_var_name), expression=expr,
                 predicates=insn.predicates)
         newly_created_insn_ids.add(new_insn.id)
@@ -742,20 +741,19 @@ def expand_cses(knl):
 
     new_insns = []
 
-    var_name_gen = knl.get_var_name_generator()
+    from pytools import UniqueNameGenerator
+    var_name_gen = UniqueNameGenerator(forced_prefix=cse_prefix)
 
     newly_created_insn_ids = set()
-    new_temp_vars = knl.temporary_variables.copy()
+    new_temp_vars = []
 
-    for insn in knl.instructions:
+    for insn in instructions:
         if isinstance(insn, ExpressionInstruction):
             new_insns.append(insn.copy(expression=cseam(insn.expression)))
         else:
             new_insns.append(insn)
 
-    return knl.copy(
-            instructions=new_insns,
-            temporary_variables=new_temp_vars)
+    return (new_insns, new_temp_vars)
 
 # }}}
 
@@ -1184,6 +1182,11 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
 
     # }}}
 
+    instructions, cse_temp_vars = expand_cses(instructions)
+    for tv in cse_temp_vars:
+        temporary_variables[tv.name] = tv
+    del cse_temp_vars
+
     domains = parse_domains(domains, defines)
 
     arg_guesser = ArgumentGuesser(domains, instructions,
@@ -1209,10 +1212,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
 
     check_for_nonexistent_iname_deps(knl)
 
-    knl = tag_reduction_inames_as_sequential(knl)
     knl = create_temporaries(knl, default_order)
     knl = determine_shapes_of_temporaries(knl)
-    knl = expand_cses(knl)
+    knl = tag_reduction_inames_as_sequential(knl)
     knl = expand_defines_in_shapes(knl, defines)
     knl = guess_arg_shape_if_requested(knl, default_order)
     knl = apply_default_order_to_args(knl, default_order)
-- 
GitLab