diff --git a/MEMO b/MEMO index b5ab32e625ca5c6e282c93b931e9cab2af6b2289..ac53d89d97e890590f84554cf90eae627b2d4e81 100644 --- a/MEMO +++ b/MEMO @@ -47,8 +47,14 @@ To-do - Make xfail test for strided access. +- Test join_inames + - *_dimension -> *_iname +- Debug axpy nondet fail + +- Make tests run on GPUs + Fixes: - Group instructions by dependency/inames for scheduling, to diff --git a/doc/reference.rst b/doc/reference.rst index 2cc0578bde0f48dbfda0ff249d23fa45d5e9802a..1e77eeb734bfedc3e6d19cfe3d36012a1bd7cf59 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -103,8 +103,8 @@ Automatic local axes are chosen as follows: #. Assign the low-stride iname to the available axis, splitting the iname if it is too long for the available axis size. -If you need different behavior, use :func:`tag_dimensions` and -:func:`split_dimension` to change the assignment of `"l.auto"` axes +If you need different behavior, use :func:`tag_inames` and +:func:`split_iname` to change the assignment of `"l.auto"` axes manually. .. _creating-kernels: @@ -152,14 +152,14 @@ function, which takes the same arguments, but does some extra post-processing. .. autofunction:: make_kernel -Wrangling dimensions --------------------- +Wrangling inames +---------------- -.. autofunction:: split_dimension +.. autofunction:: split_iname -.. autofunction:: join_dimensions +.. autofunction:: join_inames -.. autofunction:: tag_dimensions +.. autofunction:: tag_inames Dealing with Substitution Rules ------------------------------- diff --git a/examples/matrix-mul.py b/examples/matrix-mul.py index d0f11c261e434efdde8264ab8e640428b9a57ed5..6c4d9e97e362d074c5d379928162ba69ff4cdcc9 100644 --- a/examples/matrix-mul.py +++ b/examples/matrix-mul.py @@ -48,11 +48,11 @@ def image_matrix_mul_ilp(ctx_factory=cl.create_some_context): name="matmul") ilp = 4 - knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") + knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") j_inner_split = 16 - knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1") - knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 2) + knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1") + knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 2) knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"]) diff --git a/loopy/__init__.py b/loopy/__init__.py index 9c61523078831c4a09ca2319d9bb7bd4e385097f..ca443db752f54498ca17b82a530d4e511cc095e6 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -11,6 +11,8 @@ register_mpz_with_pymbolic() import islpy as isl from islpy import dim_type +from pytools import MovedFunctionDeprecationWrapper + @@ -50,8 +52,9 @@ __all__ = ["ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "Ima "generate_loop_schedules", "generate_code", "CompiledKernel", "auto_test_vs_ref", "check_kernels", - "make_kernel", "split_dimension", "join_dimensions", - "tag_dimensions", + "make_kernel", + "split_iname", "join_inames", "tag_inames", + "split_dimension", "join_dimensions", "tag_dimensions", "extract_subst", "expand_subst", "precompute", "add_prefetch", "split_arg_axis", "find_padding_multiple", "add_padding" @@ -62,9 +65,9 @@ class infer_type: # }}} -# {{{ dimension split +# {{{ split inames -def split_dimension(kernel, split_iname, inner_length, +def split_iname(kernel, split_iname, inner_length, outer_iname=None, inner_iname=None, outer_tag=None, inner_tag=None, slabs=(0, 0), do_tagged_check=True): @@ -168,16 +171,18 @@ def split_dimension(kernel, split_iname, inner_length, )) if existing_tag is not None: - result = tag_dimensions(result, + result = tag_inames(result, {outer_iname: existing_tag, inner_iname: existing_tag}) - return tag_dimensions(result, {outer_iname: outer_tag, inner_iname: inner_tag}) + return tag_inames(result, {outer_iname: outer_tag, inner_iname: inner_tag}) + +split_dimension = MovedFunctionDeprecationWrapper(split_iname) # }}} -# {{{ dimension join +# {{{ join inames -def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): +def join_inames(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): """ :arg inames: fastest varying last """ @@ -264,13 +269,15 @@ def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()): applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_map] )) - return tag_dimensions(result, {new_iname: tag}) + return tag_inames(result, {new_iname: tag}) + +join_dimensions = MovedFunctionDeprecationWrapper(join_inames) # }}} -# {{{ dimension tag +# {{{ tag inames -def tag_dimensions(kernel, iname_to_tag, force=False): +def tag_inames(kernel, iname_to_tag, force=False): from loopy.kernel import parse_tag iname_to_tag = dict((iname, parse_tag(tag)) @@ -315,6 +322,8 @@ def tag_dimensions(kernel, iname_to_tag, force=False): return kernel.copy(iname_to_tag=new_iname_to_tag) +tag_dimensions = MovedFunctionDeprecationWrapper(tag_inames) + # }}} # {{{ convenience: add_prefetch diff --git a/loopy/creation.py b/loopy/creation.py index 99951635defce433b38148ecf66c8a30ce046507..6be68cfbe6fb48b204743cc4e362b22389534faa 100644 --- a/loopy/creation.py +++ b/loopy/creation.py @@ -27,8 +27,8 @@ def tag_reduction_inames_as_sequential(knl): if tag is None: new_iname_to_tag[iname] = ForceSequentialTag() - from loopy import tag_dimensions - return tag_dimensions(knl, new_iname_to_tag) + from loopy import tag_inames + return tag_inames(knl, new_iname_to_tag) # {{{ sanity checking @@ -397,8 +397,8 @@ def make_kernel(*args, **kwargs): from loopy.kernel import LoopKernel knl = LoopKernel(*args, **kwargs) - from loopy import tag_dimensions - knl = tag_dimensions( + from loopy import tag_inames + knl = tag_inames( knl.copy(iname_to_tag_requests=None), knl.iname_to_tag_requests).copy( iname_to_tag_requests=[]) diff --git a/loopy/cse.py b/loopy/cse.py index 65dcf22d92cb42462086d437b0abc9073be81368..809748b0bc1739b8b19e5259de0555b379ce1f43 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -843,8 +843,8 @@ def precompute(kernel, subst_use, dtype, sweep_inames=[], substitutions=new_substs, temporary_variables=new_temporary_variables) - from loopy import tag_dimensions - return tag_dimensions(result, new_iname_to_tag) + from loopy import tag_inames + return tag_inames(result, new_iname_to_tag) diff --git a/loopy/padding.py b/loopy/padding.py index c3b67a2528cf3876f13d46f98a3597957d5661f4..307a380b67bece5e8dbd87451b84c557945689fe 100644 --- a/loopy/padding.py +++ b/loopy/padding.py @@ -152,10 +152,10 @@ def split_arg_axis(kernel, args_and_axes, count): .map_expressions(aash) .copy(args=new_args)) - from loopy import split_dimension + from loopy import split_iname - for split_iname, (outer_iname, inner_iname) in split_vars.iteritems(): - result = split_dimension(result, split_iname, count, + for iname, (outer_iname, inner_iname) in split_vars.iteritems(): + result = split_iname(result, iname, count, outer_iname=outer_iname, inner_iname=inner_iname) return result diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2e4b98ba66ab73787b7fa1bcb1b421823ccec1dc..46263b6d73db09cae9b940414523d8d3843eb32c 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -557,8 +557,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): # {{{ assign automatic axes def assign_automatic_axes(kernel, axis=0, local_size=None): - from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag, - UnrollTag) + from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag) # Realize that at this point in time, axis lengths are already # fixed. So we compute them once and pass them to our recursive @@ -615,13 +614,13 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): else: new_tag = LocalIndexTag(axis) if desired_length > local_size[axis]: - from loopy import split_dimension + from loopy import split_iname # Don't be tempted to switch the outer tag to unroll--this may # generate tons of code on some examples. return assign_automatic_axes( - split_dimension(kernel, iname, inner_length=local_size[axis], + split_iname(kernel, iname, inner_length=local_size[axis], outer_tag=None, inner_tag=new_tag, do_tagged_check=False), axis=recursion_axis, local_size=local_size) diff --git a/proto-tests/test_dg.py b/proto-tests/test_dg.py index bfd2526bf0a808001b5b5668f4e888dd44ef2988..c9404ea31f644c0eaab3581baf483a18b5648d3e 100644 --- a/proto-tests/test_dg.py +++ b/proto-tests/test_dg.py @@ -61,13 +61,13 @@ def test_dg_matrix_mul(ctx_factory): name="dg_matmul") #ilp = 4 - knl = lp.split_dimension(knl, "i", 30, 32, outer_tag="g.0", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 16, outer_tag="g.1", inner_tag="l.1") - #knl = lp.split_dimension(knl, "k_inner", 16, outer_tag="ilp", inner_tag="l.1") + knl = lp.split_iname(knl, "i", 30, 32, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 16, outer_tag="g.1", inner_tag="l.1") + #knl = lp.split_iname(knl, "k_inner", 16, outer_tag="ilp", inner_tag="l.1") assert Np % 2 == 0 - #knl = lp.split_dimension(knl, "j", Np//2) - #knl = lp.split_dimension(knl, "k", 32) + #knl = lp.split_iname(knl, "j", Np//2) + #knl = lp.split_iname(knl, "k", 32) #for mn in matrix_names: #knl = lp.add_prefetch(knl, mn, ["j", "i_inner"]) diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index c3823d504a41db981d35f5e3900e6edf441f781a..a2cba7c5766f5e66ab0e87608ee07b3dff5269ad 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -44,21 +44,21 @@ def test_laplacian_stiffness(ctx_factory): ], name="lapquad", assumptions="Nc>=1") - knl = lp.tag_dimensions(knl, dict(ax_b="unr")) + knl = lp.tag_inames(knl, dict(ax_b="unr")) seq_knl = knl def variant_fig31(knl): # This (mostly) reproduces Figure 3.1. - knl = lp.tag_dimensions(knl, {"dx_axis": "unr"}) + knl = lp.tag_inames(knl, {"dx_axis": "unr"}) return knl, ["K", "i", "j", "q", "ax_b_insn"] def variant_pg4(knl): # This (mostly) reproduces the unlabeled code snippet on pg. 4. - knl = lp.tag_dimensions(knl, {"dx_axis": "unr"}) + knl = lp.tag_inames(knl, {"dx_axis": "unr"}) Ncloc = 16 - knl = lp.split_dimension(knl, "K", Ncloc, + knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"] @@ -66,21 +66,21 @@ def test_laplacian_stiffness(ctx_factory): # This (mostly) reproduces Figure 3.2. Ncloc = 16 - knl = lp.split_dimension(knl, "K", Ncloc, + knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") knl = lp.precompute(knl, "dPsi", np.float32, ["i", "q", "dx_axis"], default_tag=None) - knl = lp.tag_dimensions(knl, {"dx_axis": "unr", "dxi": "unr"}) + knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"}) return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"] def variant_fig33(knl): # This is meant to (mostly) reproduce Figure 3.3. Ncloc = 16 - knl = lp.split_dimension(knl, "K", Ncloc, + knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None) - knl = lp.tag_dimensions(knl, {"j": "ilp.seq"}) + knl = lp.tag_inames(knl, {"j": "ilp.seq"}) return knl, ["Ko", "Kloc"] @@ -91,12 +91,12 @@ def test_laplacian_stiffness(ctx_factory): # to reverse-engineer what is going on there. Some discussion might # help, too. :) - knl = lp.tag_dimensions(knl, {"dx_axis": "unr"}) + knl = lp.tag_inames(knl, {"dx_axis": "unr"}) Ncloc = 16 - knl = lp.split_dimension(knl, "K", Ncloc, + knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc", outer_tag="g.0") - knl = lp.tag_dimensions(knl, {"i": "l.1", "j": "l.0"}) + knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"}) return knl, ["K", "i", "j", "q", "ax_b_insn"] def variant_simple_gpu_prefetch(knl): @@ -106,12 +106,12 @@ def test_laplacian_stiffness(ctx_factory): # for the upper bound of Kloc (it uses Nc). I'll investigate and # fix that. (FIXME) - knl = lp.tag_dimensions(knl, {"dx_axis": "unr"}) + knl = lp.tag_inames(knl, {"dx_axis": "unr"}) Ncloc = 16 - knl = lp.split_dimension(knl, "K", Ncloc, + knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc", outer_tag="g.0") - knl = lp.tag_dimensions(knl, {"i": "l.1", "j": "l.0"}) + knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"}) knl = lp.add_prefetch(knl, "w", ["q"]) knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2]) knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3]) diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index 888ecdd2b88702b47326910fef9786c594acb670..69fec550128b14b14d52b53eeb8d17542384d651 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -85,19 +85,19 @@ def test_laplacian(ctx_factory): else: seq_knl = knl - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") knl = lp.add_prefetch(knl, "D", ["m", "j"]) #knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]") - #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + #knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") #print seq_knl #print lp.preprocess_kernel(knl) #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl, loop_priority=["m_fetch_G", "i_fetch_u"]) @@ -160,18 +160,18 @@ def test_laplacian_lmem(ctx_factory): else: seq_knl = knl - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") knl = lp.add_prefetch(knl, "D", ["m", "j"]) knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]") - #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + #knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") #print seq_knl #print lp.preprocess_kernel(knl) #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -227,8 +227,8 @@ def test_laplacian_lmem_ilp(ctx_factory): # Must act on u first, otherwise stencil becomes crooked and # footprint becomes non-convex. - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") knl = lp.add_prefetch(knl, "u", [1, 2, 3, "e_inner_inner"]) @@ -242,7 +242,7 @@ def test_laplacian_lmem_ilp(ctx_factory): #print seq_knl #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -330,9 +330,9 @@ def test_advect(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl) @@ -448,9 +448,9 @@ def test_advect_dealias(ctx_factory): print knl 1/0 - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) print knl #1/0 @@ -512,9 +512,9 @@ def test_interp_diff(ctx_factory): print knl 1/0 - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) print knl #1/0 diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 04d6aeed44f14c10796453de539ff7f89a311906..d693b5e83a63bb92e40d0753b64954e42fe8cae0 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -85,19 +85,19 @@ def test_laplacian(ctx_factory): else: seq_knl = knl - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]") knl = lp.add_prefetch(knl, "D", ["m", "j"]) #knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]") - #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + #knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") #print seq_knl #print lp.preprocess_kernel(knl) #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl, loop_priority=["m_fetch_G", "i_fetch_u"]) @@ -154,7 +154,7 @@ def test_laplacian_lmem(ctx_factory): knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"]) knl = lp.precompute(knl, "us", np.float32, ["a", "b", "c"]) knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"]) - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.add_prefetch(knl, "D", ["m", "j", "k", "i"]) else: # experiment @@ -163,7 +163,7 @@ def test_laplacian_lmem(ctx_factory): knl = lp.precompute(knl, "ur", np.float32, ["b", "c"]) knl = lp.precompute(knl, "us", np.float32, ["b", "c"]) knl = lp.precompute(knl, "ut", np.float32, ["b", "c"]) - knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.add_prefetch(knl, "D", ["m", "j", "k", "i"]) @@ -173,7 +173,7 @@ def test_laplacian_lmem(ctx_factory): #print knl #1/0 - #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + #knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") # knl = lp.join_dimensions(knl, ["i", "j"], "i_and_j") #print seq_knl @@ -182,7 +182,7 @@ def test_laplacian_lmem(ctx_factory): # TW: turned this off since it generated: # ValueError: cannot tag 'i_and_j'--not known -# knl = lp.tag_dimensions(knl, dict(i_and_j="l.0", k="l.1")) +# knl = lp.tag_inames(knl, dict(i_and_j="l.0", k="l.1")) kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -238,8 +238,8 @@ def test_laplacian_lmem_ilp(ctx_factory): # Must act on u first, otherwise stencil becomes crooked and # footprint becomes non-convex. - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp") knl = lp.add_prefetch(knl, "u", [1, 2, 3, "e_inner_inner"]) @@ -253,7 +253,7 @@ def test_laplacian_lmem_ilp(ctx_factory): #print seq_knl #1/0 - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000)) @@ -341,9 +341,9 @@ def test_advect(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) kernel_gen = lp.generate_loop_schedules(knl) @@ -459,9 +459,9 @@ def test_advect_dealias(ctx_factory): print knl 1/0 - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) print knl #1/0 @@ -523,9 +523,9 @@ def test_interp_diff(ctx_factory): print knl 1/0 - knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) print knl #1/0 diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 84523146dd586c634c382a2c1019a04b358c3974..866224e116cdd37a003b92f377b9eccf22f53659 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -53,11 +53,11 @@ def test_tim2d(ctx_factory): knl = lp.add_prefetch(knl, "u", ["i", "j", "o"]) knl = lp.precompute(knl, "ur", np.float32, ["a", "b"]) knl = lp.precompute(knl, "us", np.float32, ["a", "b"]) - knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) - knl = lp.tag_dimensions(knl, dict(o="unr")) - knl = lp.tag_dimensions(knl, dict(m="unr")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) + knl = lp.tag_inames(knl, dict(o="unr")) + knl = lp.tag_inames(knl, dict(m="unr")) # knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G @@ -116,12 +116,12 @@ def test_red2d(ctx_factory): knl = lp.precompute(knl, "ue", np.float32, ["a", "b", "m"]) knl = lp.precompute(knl, "ur", np.float32, ["a", "b"]) knl = lp.precompute(knl, "us", np.float32, ["a", "b"]) - knl = lp.split_dimension(knl, "e", 2, outer_tag="g.0") - knl = lp.split_dimension(knl, "j", n, inner_tag="l.0")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "i", n, inner_tag="l.1")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 2, outer_tag="g.0") + knl = lp.split_iname(knl, "j", n, inner_tag="l.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "i", n, inner_tag="l.1")#, slabs=(0, 1)) - knl = lp.tag_dimensions(knl, dict(o="unr")) - knl = lp.tag_dimensions(knl, dict(m="unr")) + knl = lp.tag_inames(knl, dict(o="unr")) + knl = lp.tag_inames(knl, dict(m="unr")) knl = lp.add_prefetch(knl, "G", [2,3]) # axis/argument indices on G @@ -182,16 +182,16 @@ def test_tim3d(ctx_factory): knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"]) knl = lp.precompute(knl, "us", np.float32, ["a", "b", "c"]) knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"]) - knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "k", n, inner_tag="l.2")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "j", n, inner_tag="l.1")#, slabs=(0, 1)) - knl = lp.split_dimension(knl, "i", n, inner_tag="l.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "k", n, inner_tag="l.2")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "j", n, inner_tag="l.1")#, slabs=(0, 1)) + knl = lp.split_iname(knl, "i", n, inner_tag="l.0")#, slabs=(0, 1)) -# knl = lp.tag_dimensions(knl, dict(k_nner="unr")) +# knl = lp.tag_inames(knl, dict(k_nner="unr")) - knl = lp.tag_dimensions(knl, dict(o="unr")) - knl = lp.tag_dimensions(knl, dict(m="unr")) -# knl = lp.tag_dimensions(knl, dict(i="unr")) + knl = lp.tag_inames(knl, dict(o="unr")) + knl = lp.tag_inames(knl, dict(m="unr")) +# knl = lp.tag_inames(knl, dict(i="unr")) knl = lp.add_prefetch(knl, "G", [2,3,4]) # axis/argument indices on G diff --git a/test/test_linalg.py b/test/test_linalg.py index 00b63ec6f66fa566aa8655192fbcd301870e135b..c37134347c864f3f2d02c991d28e4dc1139d8c99 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -124,15 +124,15 @@ def test_axpy(ctx_factory): def variant_cpu(knl): unroll = 16 block_size = unroll*4096 - knl = lp.split_dimension(knl, "i", block_size, outer_tag="g.0", slabs=(0, 1)) - knl = lp.split_dimension(knl, "i_inner", unroll, inner_tag="unr") + knl = lp.split_iname(knl, "i", block_size, outer_tag="g.0", slabs=(0, 1)) + knl = lp.split_iname(knl, "i_inner", unroll, inner_tag="unr") return knl def variant_gpu(knl): unroll = 4 block_size = 256 - knl = lp.split_dimension(knl, "i", unroll*block_size, outer_tag="g.0", slabs=(0, 1)) - knl = lp.split_dimension(knl, "i_inner", block_size, outer_tag="unr", inner_tag="l.0") + knl = lp.split_iname(knl, "i", unroll*block_size, outer_tag="g.0", slabs=(0, 1)) + knl = lp.split_iname(knl, "i_inner", block_size, outer_tag="unr", inner_tag="l.0") return knl for variant in [variant_cpu, variant_gpu]: @@ -169,9 +169,9 @@ def test_transpose(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "i", 16, + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 16, + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") knl = lp.add_prefetch(knl, 'a', ["i_inner", "j_inner"]) @@ -209,11 +209,11 @@ def test_plain_matrix_mul(ctx_factory): ref_knl = knl - knl = lp.split_dimension(knl, "i", 16, + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 16, + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 16) + knl = lp.split_iname(knl, "k", 16) knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"]) knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner", ]) @@ -250,11 +250,11 @@ def test_variable_size_matrix_mul(ctx_factory): ref_knl = knl - knl = lp.split_dimension(knl, "i", 16, + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 8, + knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 32) + knl = lp.split_iname(knl, "k", 32) knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"]) knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"]) @@ -296,9 +296,9 @@ def test_rank_one(ctx_factory): return knl def variant_2(knl): - knl = lp.split_dimension(knl, "i", 16, + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") - knl = lp.split_dimension(knl, "j", 16, + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.add_prefetch(knl, "a") @@ -306,9 +306,9 @@ def test_rank_one(ctx_factory): return knl def variant_3(knl): - knl = lp.split_dimension(knl, "i", 16, + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") - knl = lp.split_dimension(knl, "j", 16, + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.add_prefetch(knl, "a", ["i_inner"]) @@ -316,22 +316,22 @@ def test_rank_one(ctx_factory): return knl def variant_4(knl): - knl = lp.split_dimension(knl, "i", 256, + knl = lp.split_iname(knl, "i", 256, outer_tag="g.0", slabs=(0, 1)) - knl = lp.split_dimension(knl, "j", 256, + knl = lp.split_iname(knl, "j", 256, outer_tag="g.1", slabs=(0, 1)) knl = lp.add_prefetch(knl, "a", ["i_inner"], default_tag=None) knl = lp.add_prefetch(knl, "b", ["j_inner"], default_tag=None) - knl = lp.split_dimension(knl, "i_inner", 16, + knl = lp.split_iname(knl, "i_inner", 16, inner_tag="l.0") - knl = lp.split_dimension(knl, "j_inner", 16, + knl = lp.split_iname(knl, "j_inner", 16, inner_tag="l.1") - knl = lp.split_dimension(knl, "a_dim_0", 16, + knl = lp.split_iname(knl, "a_dim_0", 16, outer_tag="l.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "b_dim_0", 16, + knl = lp.split_iname(knl, "b_dim_0", 16, outer_tag="l.1", inner_tag="l.0") return knl @@ -374,11 +374,11 @@ def test_troublesome_premagma_fermi_matrix_mul(ctx_factory): j_reg = 2 i_chunks = 16 j_chunks = 16 - knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0") - knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") - knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1") - knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") - knl = lp.split_dimension(knl, "k", 16) + knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0") + knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") + knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1") + knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") + knl = lp.split_iname(knl, "k", 16) knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"]) kernel_gen = lp.generate_loop_schedules(knl) @@ -416,12 +416,12 @@ def test_intel_matrix_mul(ctx_factory): j_reg = 4 i_chunks = 16 j_chunks = 16 - knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0") - knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") - knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1") - knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") - knl = lp.split_dimension(knl, "k", 16) - #knl = lp.split_dimension(knl, "k_inner", 8, outer_tag="unr") + knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0") + knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") + knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1") + knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") + knl = lp.split_iname(knl, "k", 16) + #knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr") knl = lp.add_prefetch(knl, 'a', ["i_inner_inner", "k_inner", "i_inner_outer"]) knl = lp.add_prefetch(knl, 'b', ["j_inner_inner", "k_inner", "j_inner_outer"]) @@ -469,12 +469,12 @@ def test_magma_fermi_matrix_mul(ctx_factory): j_chunks = 16 - knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0") - knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") - knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1") - knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") - knl = lp.split_dimension(knl, "k", 16) - knl = lp.split_dimension(knl, "k_inner", 8, outer_tag="unr") + knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0") + knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp") + knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1") + knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp") + knl = lp.split_iname(knl, "k", 16) + knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr") # FIXME #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"]) #knl = lp.add_prefetch(knl, 'b', ["k_inner", ("j_inner_inner", "j_inner_outer"),]) @@ -511,9 +511,9 @@ def test_image_matrix_mul(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 32) + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 32) # conflict-free knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner"]) @@ -549,11 +549,11 @@ def test_image_matrix_mul_ilp(ctx_factory): seq_knl = knl ilp = 4 - knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") + knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") j_inner_split = 4 - knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1") - knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 2) + knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1") + knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 2) # conflict-free? knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"]) @@ -586,8 +586,8 @@ def test_ilp_race_matmul(ctx_factory): ], name="matmul") - knl = lp.split_dimension(knl, "j", 2, outer_tag="ilp", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 2) + knl = lp.split_iname(knl, "j", 2, outer_tag="ilp", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 2) knl = lp.add_prefetch(knl, 'b', ["k_inner"]) from loopy.check import WriteRaceConditionError @@ -621,9 +621,9 @@ def test_fancy_matrix_mul(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") - knl = lp.split_dimension(knl, "k", 16, slabs=(0,1)) + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 16, slabs=(0,1)) knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) knl = lp.add_prefetch(knl, 'b', ["k_inner", "j_inner"]) diff --git a/test/test_loopy.py b/test/test_loopy.py index fdcd58af1361cda34d4f7a55bc87f6119dd885e2..0203bb1d8fbd84e297415a6e6754e12c98870187 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -72,7 +72,7 @@ def test_multi_cse(ctx_factory): [lp.GlobalArg("a", np.float32, shape=(100,))], local_sizes={0: 16}) - knl = lp.split_dimension(knl, "i", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "i", 16, inner_tag="l.0") knl = lp.add_prefetch(knl, "a", []) kernel_gen = lp.generate_loop_schedules(knl) @@ -111,8 +111,8 @@ def test_stencil(ctx_factory): ref_knl = knl def variant_1(knl): - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.1", inner_tag="l.1") - knl = lp.split_dimension(knl, "j", 16, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "i", 16, outer_tag="g.1", inner_tag="l.1") + knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"]) return knl @@ -141,8 +141,8 @@ def test_eq_constraint(ctx_factory): lp.GlobalArg("b", np.float32, shape=(1000,)) ]) - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0") - knl = lp.split_dimension(knl, "i_inner", 16, outer_tag=None, inner_tag="l.0") + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0") + knl = lp.split_iname(knl, "i_inner", 16, outer_tag=None, inner_tag="l.0") kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen) @@ -397,7 +397,7 @@ def test_dependent_loop_bounds_2(ctx_factory): ], assumptions="n>=1 and row_len>=1") - knl = lp.split_dimension(knl, "i", 128, outer_tag="g.0", + knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") cknl = lp.CompiledKernel(ctx, knl) print "---------------------------------------------------" @@ -434,7 +434,7 @@ def test_dependent_loop_bounds_3(ctx_factory): assert knl.parents_per_domain()[1] == 0 - knl = lp.split_dimension(knl, "i", 128, outer_tag="g.0", + knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") cknl = lp.CompiledKernel(ctx, knl) @@ -442,7 +442,7 @@ def test_dependent_loop_bounds_3(ctx_factory): cknl.print_code() print "---------------------------------------------------" - knl_bad = lp.split_dimension(knl, "jj", 128, outer_tag="g.1", + knl_bad = lp.split_iname(knl, "jj", 128, outer_tag="g.1", inner_tag="l.1") import pytest @@ -473,9 +473,9 @@ def test_independent_multi_domain(ctx_factory): ]) - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") - knl = lp.split_dimension(knl, "j", 16, outer_tag="g.0", + knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0") assert knl.parents_per_domain() == 2*[None] @@ -544,8 +544,8 @@ def test_equality_constraints(ctx_factory): seq_knl = knl - knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") - knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") + knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1") #print knl #print knl.domains[0].detect_equalities() diff --git a/test/test_nbody.py b/test/test_nbody.py index 90859883b82e45bd39c3838f867bf4b1a4d01b87..931b460a5ffa0df0ae873ffe3e053be9746b49fe 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -30,28 +30,28 @@ def test_nbody(ctx_factory): seq_knl = knl def variant_1(knl): - knl = lp.split_dimension(knl, "i", 256, + knl = lp.split_iname(knl, "i", 256, outer_tag="g.0", inner_tag="l.0", slabs=(0,1)) - knl = lp.split_dimension(knl, "j", 256, slabs=(0,1)) + knl = lp.split_iname(knl, "j", 256, slabs=(0,1)) return knl, [] def variant_cpu(knl): knl = lp.expand_subst(knl) - knl = lp.split_dimension(knl, "i", 1024, + knl = lp.split_iname(knl, "i", 1024, outer_tag="g.0", slabs=(0,1)) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) return knl, [] def variant_gpu(knl): knl = lp.expand_subst(knl) - knl = lp.split_dimension(knl, "i", 256, + knl = lp.split_iname(knl, "i", 256, outer_tag="g.0", inner_tag="l.0", slabs=(0,1)) - knl = lp.split_dimension(knl, "j", 256, slabs=(0,1)) + knl = lp.split_iname(knl, "j", 256, slabs=(0,1)) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) knl = lp.add_prefetch(knl, "x[j,k]", ["j_inner", "k"], ["x_fetch_j", "x_fetch_k"]) - knl = lp.tag_dimensions(knl, dict(x_fetch_k="unr")) + knl = lp.tag_inames(knl, dict(x_fetch_k="unr")) return knl, ["j_outer", "j_inner"] n = 3000 diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index 176f1d00462fdb7294564a8db016c74373b8e49e..38afd2489402e14e4314516a743ea7f3b7fb1c8c 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -52,7 +52,7 @@ def test_tim2d(ctx_factory): seq_knl = knl def variant_orig(knl): - knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1", e="g.0")) + knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0")) knl = lp.add_prefetch(knl, "D[:,:]") knl = lp.add_prefetch(knl, "u[e, :, :]") @@ -66,8 +66,8 @@ def test_tim2d(ctx_factory): knl = lp.add_prefetch(knl, "G$x") knl = lp.add_prefetch(knl, "G$y") - knl = lp.tag_dimensions(knl, dict(o="unr")) - knl = lp.tag_dimensions(knl, dict(m="unr")) + knl = lp.tag_inames(knl, dict(o="unr")) + knl = lp.tag_inames(knl, dict(m="unr")) knl = lp.set_instruction_priority(knl, "D_fetch", 5)