diff --git a/examples/dg.py b/examples/dg.py index 85563ca1ff27d4bd875485f67ac99f1c67a72d8a..95d6874c4995045ae52cfc2a6418d6e9a014bdf3 100644 --- a/examples/dg.py +++ b/examples/dg.py @@ -1,4 +1,4 @@ -# FIXME NOT UPDATED YET FOR NEW-STYLE LOOPY! +raise NotImplementedError("NOT UPDATED YET FOR NEW-STYLE LOOPY!") # FIXME diff --git a/examples/quadrature.py b/examples/quadrature.py index c39d6c99ab7461ffe8dfe63f08ded986f8b45baa..5df05e0312743c7d90ee326cd592a43823274bde 100644 --- a/examples/quadrature.py +++ b/examples/quadrature.py @@ -60,7 +60,8 @@ def build_mass_mat_maker(ctx_factory=cl.create_some_context): outer_slab_increments=(0,0)) # fix reg prefetch - knl = lp.add_prefetch(knl, "det_j", ["c_inner"]) + knl = lp.add_prefetch(knl, "det_j", ["c_inner"], + loc_fetch_axes={0: (0, 1)}) #ilp = 4 #knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") diff --git a/loopy/__init__.py b/loopy/__init__.py index 46bc404e5ee3230429e64db6e68aefb2d67973ac..6b0a1dcd41ed50f1e4ccdc7f3a39685ce4a50c50 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -123,7 +123,7 @@ def add_prefetch(kernel, input_access_descr, fetch_dims, loc_fetch_axes={}): input_vector=ivec, index_expr=iexpr, fetch_dims=fetch_dims, - loc_fetch_axes={}) + loc_fetch_axes=loc_fetch_axes) return kernel.copy(prefetch=new_prefetch) diff --git a/loopy/codegen/prefetch.py b/loopy/codegen/prefetch.py index 155927e4b3be4ae1ae6913fd574773e0209c7374..defaaa81a2eeae833a4329ef3117fc45c2552316 100644 --- a/loopy/codegen/prefetch.py +++ b/loopy/codegen/prefetch.py @@ -303,11 +303,17 @@ def generate_prefetch_code(kernel, sched_index, exec_domain): from loopy.kernel import TAG_WORK_ITEM_IDX knl_work_item_inames = kernel.ordered_inames_by_tag_type(TAG_WORK_ITEM_IDX) + used_kernel_work_item_inames = [] for realization_dim_idx, loc_fetch_axis_list in \ - getattr(pf, "loc_fetch_axes", {}).iteritems(): - realization_inames[realization_dim_idx] = [knl_work_item_inames.pop(axis) + pf.loc_fetch_axes.iteritems(): + loc_fetch_inames = [knl_work_item_inames[axis] for axis in loc_fetch_axis_list] + realization_inames[realization_dim_idx] = loc_fetch_inames + used_kernel_work_item_inames.extend(loc_fetch_inames) + + for inm in used_kernel_work_item_inames: + knl_work_item_inames.remove(inm) # }}}