From aed3a1f33103b164638664e1d35cc2aed237cf64 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 28 Aug 2011 21:39:25 +0200
Subject: [PATCH] Fitch local fetch op in CFEM quadrature.

---
 examples/dg.py            |  2 +-
 examples/quadrature.py    |  3 ++-
 loopy/__init__.py         |  2 +-
 loopy/codegen/prefetch.py | 10 ++++++++--
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/examples/dg.py b/examples/dg.py
index 85563ca1f..95d6874c4 100644
--- a/examples/dg.py
+++ b/examples/dg.py
@@ -1,4 +1,4 @@
-# FIXME NOT UPDATED YET FOR NEW-STYLE LOOPY!
+raise NotImplementedError("NOT UPDATED YET FOR NEW-STYLE LOOPY!") # FIXME
 
 
 
diff --git a/examples/quadrature.py b/examples/quadrature.py
index c39d6c99a..5df05e031 100644
--- a/examples/quadrature.py
+++ b/examples/quadrature.py
@@ -60,7 +60,8 @@ def build_mass_mat_maker(ctx_factory=cl.create_some_context):
             outer_slab_increments=(0,0))
 
     # fix reg prefetch
-    knl = lp.add_prefetch(knl, "det_j", ["c_inner"])
+    knl = lp.add_prefetch(knl, "det_j", ["c_inner"],
+            loc_fetch_axes={0: (0, 1)})
 
     #ilp = 4
     #knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 46bc404e5..6b0a1dcd4 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -123,7 +123,7 @@ def add_prefetch(kernel, input_access_descr, fetch_dims, loc_fetch_axes={}):
             input_vector=ivec,
             index_expr=iexpr,
             fetch_dims=fetch_dims,
-            loc_fetch_axes={})
+            loc_fetch_axes=loc_fetch_axes)
 
     return kernel.copy(prefetch=new_prefetch)
 
diff --git a/loopy/codegen/prefetch.py b/loopy/codegen/prefetch.py
index 155927e4b..defaaa81a 100644
--- a/loopy/codegen/prefetch.py
+++ b/loopy/codegen/prefetch.py
@@ -303,11 +303,17 @@ def generate_prefetch_code(kernel, sched_index, exec_domain):
 
     from loopy.kernel import TAG_WORK_ITEM_IDX
     knl_work_item_inames = kernel.ordered_inames_by_tag_type(TAG_WORK_ITEM_IDX)
+    used_kernel_work_item_inames = []
 
     for realization_dim_idx, loc_fetch_axis_list in \
-            getattr(pf, "loc_fetch_axes", {}).iteritems():
-        realization_inames[realization_dim_idx] = [knl_work_item_inames.pop(axis)
+            pf.loc_fetch_axes.iteritems():
+        loc_fetch_inames = [knl_work_item_inames[axis]
             for axis in loc_fetch_axis_list]
+        realization_inames[realization_dim_idx] = loc_fetch_inames
+        used_kernel_work_item_inames.extend(loc_fetch_inames)
+
+    for inm in used_kernel_work_item_inames:
+        knl_work_item_inames.remove(inm)
 
     # }}}
 
-- 
GitLab