Skip to content
Snippets Groups Projects
Commit 5d87f11f authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Reanme *_dimension -> *_iname.

parent c0055227
No related branches found
No related tags found
No related merge requests found
......@@ -47,8 +47,14 @@ To-do
- Make xfail test for strided access.
- Test join_inames
- *_dimension -> *_iname
- Debug axpy nondet fail
- Make tests run on GPUs
Fixes:
- Group instructions by dependency/inames for scheduling, to
......
......@@ -103,8 +103,8 @@ Automatic local axes are chosen as follows:
#. Assign the low-stride iname to the available axis, splitting
the iname if it is too long for the available axis size.
If you need different behavior, use :func:`tag_dimensions` and
:func:`split_dimension` to change the assignment of `"l.auto"` axes
If you need different behavior, use :func:`tag_inames` and
:func:`split_iname` to change the assignment of `"l.auto"` axes
manually.
.. _creating-kernels:
......@@ -152,14 +152,14 @@ function, which takes the same arguments, but does some extra post-processing.
.. autofunction:: make_kernel
Wrangling dimensions
--------------------
Wrangling inames
----------------
.. autofunction:: split_dimension
.. autofunction:: split_iname
.. autofunction:: join_dimensions
.. autofunction:: join_inames
.. autofunction:: tag_dimensions
.. autofunction:: tag_inames
Dealing with Substitution Rules
-------------------------------
......
......@@ -48,11 +48,11 @@ def image_matrix_mul_ilp(ctx_factory=cl.create_some_context):
name="matmul")
ilp = 4
knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
j_inner_split = 16
knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1")
knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 2)
knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1")
knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 2)
knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"])
knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"])
......
......@@ -11,6 +11,8 @@ register_mpz_with_pymbolic()
import islpy as isl
from islpy import dim_type
from pytools import MovedFunctionDeprecationWrapper
......@@ -50,8 +52,9 @@ __all__ = ["ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "Ima
"generate_loop_schedules",
"generate_code",
"CompiledKernel", "auto_test_vs_ref", "check_kernels",
"make_kernel", "split_dimension", "join_dimensions",
"tag_dimensions",
"make_kernel",
"split_iname", "join_inames", "tag_inames",
"split_dimension", "join_dimensions", "tag_dimensions",
"extract_subst", "expand_subst",
"precompute", "add_prefetch",
"split_arg_axis", "find_padding_multiple", "add_padding"
......@@ -62,9 +65,9 @@ class infer_type:
# }}}
# {{{ dimension split
# {{{ split inames
def split_dimension(kernel, split_iname, inner_length,
def split_iname(kernel, split_iname, inner_length,
outer_iname=None, inner_iname=None,
outer_tag=None, inner_tag=None,
slabs=(0, 0), do_tagged_check=True):
......@@ -168,16 +171,18 @@ def split_dimension(kernel, split_iname, inner_length,
))
if existing_tag is not None:
result = tag_dimensions(result,
result = tag_inames(result,
{outer_iname: existing_tag, inner_iname: existing_tag})
return tag_dimensions(result, {outer_iname: outer_tag, inner_iname: inner_tag})
return tag_inames(result, {outer_iname: outer_tag, inner_iname: inner_tag})
split_dimension = MovedFunctionDeprecationWrapper(split_iname)
# }}}
# {{{ dimension join
# {{{ join inames
def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()):
def join_inames(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()):
"""
:arg inames: fastest varying last
"""
......@@ -264,13 +269,15 @@ def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()):
applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_map]
))
return tag_dimensions(result, {new_iname: tag})
return tag_inames(result, {new_iname: tag})
join_dimensions = MovedFunctionDeprecationWrapper(join_inames)
# }}}
# {{{ dimension tag
# {{{ tag inames
def tag_dimensions(kernel, iname_to_tag, force=False):
def tag_inames(kernel, iname_to_tag, force=False):
from loopy.kernel import parse_tag
iname_to_tag = dict((iname, parse_tag(tag))
......@@ -315,6 +322,8 @@ def tag_dimensions(kernel, iname_to_tag, force=False):
return kernel.copy(iname_to_tag=new_iname_to_tag)
tag_dimensions = MovedFunctionDeprecationWrapper(tag_inames)
# }}}
# {{{ convenience: add_prefetch
......
......@@ -27,8 +27,8 @@ def tag_reduction_inames_as_sequential(knl):
if tag is None:
new_iname_to_tag[iname] = ForceSequentialTag()
from loopy import tag_dimensions
return tag_dimensions(knl, new_iname_to_tag)
from loopy import tag_inames
return tag_inames(knl, new_iname_to_tag)
# {{{ sanity checking
......@@ -397,8 +397,8 @@ def make_kernel(*args, **kwargs):
from loopy.kernel import LoopKernel
knl = LoopKernel(*args, **kwargs)
from loopy import tag_dimensions
knl = tag_dimensions(
from loopy import tag_inames
knl = tag_inames(
knl.copy(iname_to_tag_requests=None),
knl.iname_to_tag_requests).copy(
iname_to_tag_requests=[])
......
......@@ -843,8 +843,8 @@ def precompute(kernel, subst_use, dtype, sweep_inames=[],
substitutions=new_substs,
temporary_variables=new_temporary_variables)
from loopy import tag_dimensions
return tag_dimensions(result, new_iname_to_tag)
from loopy import tag_inames
return tag_inames(result, new_iname_to_tag)
......
......@@ -152,10 +152,10 @@ def split_arg_axis(kernel, args_and_axes, count):
.map_expressions(aash)
.copy(args=new_args))
from loopy import split_dimension
from loopy import split_iname
for split_iname, (outer_iname, inner_iname) in split_vars.iteritems():
result = split_dimension(result, split_iname, count,
for iname, (outer_iname, inner_iname) in split_vars.iteritems():
result = split_iname(result, iname, count,
outer_iname=outer_iname, inner_iname=inner_iname)
return result
......
......@@ -557,8 +557,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn):
# {{{ assign automatic axes
def assign_automatic_axes(kernel, axis=0, local_size=None):
from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag,
UnrollTag)
from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag)
# Realize that at this point in time, axis lengths are already
# fixed. So we compute them once and pass them to our recursive
......@@ -615,13 +614,13 @@ def assign_automatic_axes(kernel, axis=0, local_size=None):
else:
new_tag = LocalIndexTag(axis)
if desired_length > local_size[axis]:
from loopy import split_dimension
from loopy import split_iname
# Don't be tempted to switch the outer tag to unroll--this may
# generate tons of code on some examples.
return assign_automatic_axes(
split_dimension(kernel, iname, inner_length=local_size[axis],
split_iname(kernel, iname, inner_length=local_size[axis],
outer_tag=None, inner_tag=new_tag,
do_tagged_check=False),
axis=recursion_axis, local_size=local_size)
......
......@@ -61,13 +61,13 @@ def test_dg_matrix_mul(ctx_factory):
name="dg_matmul")
#ilp = 4
knl = lp.split_dimension(knl, "i", 30, 32, outer_tag="g.0", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 16, outer_tag="g.1", inner_tag="l.1")
#knl = lp.split_dimension(knl, "k_inner", 16, outer_tag="ilp", inner_tag="l.1")
knl = lp.split_iname(knl, "i", 30, 32, outer_tag="g.0", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 16, outer_tag="g.1", inner_tag="l.1")
#knl = lp.split_iname(knl, "k_inner", 16, outer_tag="ilp", inner_tag="l.1")
assert Np % 2 == 0
#knl = lp.split_dimension(knl, "j", Np//2)
#knl = lp.split_dimension(knl, "k", 32)
#knl = lp.split_iname(knl, "j", Np//2)
#knl = lp.split_iname(knl, "k", 32)
#for mn in matrix_names:
#knl = lp.add_prefetch(knl, mn, ["j", "i_inner"])
......
......@@ -44,21 +44,21 @@ def test_laplacian_stiffness(ctx_factory):
],
name="lapquad", assumptions="Nc>=1")
knl = lp.tag_dimensions(knl, dict(ax_b="unr"))
knl = lp.tag_inames(knl, dict(ax_b="unr"))
seq_knl = knl
def variant_fig31(knl):
# This (mostly) reproduces Figure 3.1.
knl = lp.tag_dimensions(knl, {"dx_axis": "unr"})
knl = lp.tag_inames(knl, {"dx_axis": "unr"})
return knl, ["K", "i", "j", "q", "ax_b_insn"]
def variant_pg4(knl):
# This (mostly) reproduces the unlabeled code snippet on pg. 4.
knl = lp.tag_dimensions(knl, {"dx_axis": "unr"})
knl = lp.tag_inames(knl, {"dx_axis": "unr"})
Ncloc = 16
knl = lp.split_dimension(knl, "K", Ncloc,
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc")
return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"]
......@@ -66,21 +66,21 @@ def test_laplacian_stiffness(ctx_factory):
# This (mostly) reproduces Figure 3.2.
Ncloc = 16
knl = lp.split_dimension(knl, "K", Ncloc,
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc")
knl = lp.precompute(knl, "dPsi", np.float32, ["i", "q", "dx_axis"],
default_tag=None)
knl = lp.tag_dimensions(knl, {"dx_axis": "unr", "dxi": "unr"})
knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"})
return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"]
def variant_fig33(knl):
# This is meant to (mostly) reproduce Figure 3.3.
Ncloc = 16
knl = lp.split_dimension(knl, "K", Ncloc,
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc")
knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None)
knl = lp.tag_dimensions(knl, {"j": "ilp.seq"})
knl = lp.tag_inames(knl, {"j": "ilp.seq"})
return knl, ["Ko", "Kloc"]
......@@ -91,12 +91,12 @@ def test_laplacian_stiffness(ctx_factory):
# to reverse-engineer what is going on there. Some discussion might
# help, too. :)
knl = lp.tag_dimensions(knl, {"dx_axis": "unr"})
knl = lp.tag_inames(knl, {"dx_axis": "unr"})
Ncloc = 16
knl = lp.split_dimension(knl, "K", Ncloc,
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc",
outer_tag="g.0")
knl = lp.tag_dimensions(knl, {"i": "l.1", "j": "l.0"})
knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
return knl, ["K", "i", "j", "q", "ax_b_insn"]
def variant_simple_gpu_prefetch(knl):
......@@ -106,12 +106,12 @@ def test_laplacian_stiffness(ctx_factory):
# for the upper bound of Kloc (it uses Nc). I'll investigate and
# fix that. (FIXME)
knl = lp.tag_dimensions(knl, {"dx_axis": "unr"})
knl = lp.tag_inames(knl, {"dx_axis": "unr"})
Ncloc = 16
knl = lp.split_dimension(knl, "K", Ncloc,
knl = lp.split_iname(knl, "K", Ncloc,
outer_iname="Ko", inner_iname="Kloc",
outer_tag="g.0")
knl = lp.tag_dimensions(knl, {"i": "l.1", "j": "l.0"})
knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
knl = lp.add_prefetch(knl, "w", ["q"])
knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2])
knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3])
......
......@@ -85,19 +85,19 @@ def test_laplacian(ctx_factory):
else:
seq_knl = knl
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]")
knl = lp.add_prefetch(knl, "D", ["m", "j"])
#knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]")
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
#print seq_knl
#print lp.preprocess_kernel(knl)
#1/0
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl,
loop_priority=["m_fetch_G", "i_fetch_u"])
......@@ -160,18 +160,18 @@ def test_laplacian_lmem(ctx_factory):
else:
seq_knl = knl
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]")
knl = lp.add_prefetch(knl, "D", ["m", "j"])
knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]")
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
#print seq_knl
#print lp.preprocess_kernel(knl)
#1/0
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
......@@ -227,8 +227,8 @@ def test_laplacian_lmem_ilp(ctx_factory):
# Must act on u first, otherwise stencil becomes crooked and
# footprint becomes non-convex.
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
knl = lp.add_prefetch(knl, "u", [1, 2, 3, "e_inner_inner"])
......@@ -242,7 +242,7 @@ def test_laplacian_lmem_ilp(ctx_factory):
#print seq_knl
#1/0
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
......@@ -330,9 +330,9 @@ def test_advect(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
......@@ -448,9 +448,9 @@ def test_advect_dealias(ctx_factory):
print knl
1/0
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
print knl
#1/0
......@@ -512,9 +512,9 @@ def test_interp_diff(ctx_factory):
print knl
1/0
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
print knl
#1/0
......
......@@ -85,19 +85,19 @@ def test_laplacian(ctx_factory):
else:
seq_knl = knl
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]")
knl = lp.add_prefetch(knl, "D", ["m", "j"])
#knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]")
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
#print seq_knl
#print lp.preprocess_kernel(knl)
#1/0
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl,
loop_priority=["m_fetch_G", "i_fetch_u"])
......@@ -154,7 +154,7 @@ def test_laplacian_lmem(ctx_factory):
knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"])
knl = lp.precompute(knl, "us", np.float32, ["a", "b", "c"])
knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"])
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.add_prefetch(knl, "D", ["m", "j", "k", "i"])
else:
# experiment
......@@ -163,7 +163,7 @@ def test_laplacian_lmem(ctx_factory):
knl = lp.precompute(knl, "ur", np.float32, ["b", "c"])
knl = lp.precompute(knl, "us", np.float32, ["b", "c"])
knl = lp.precompute(knl, "ut", np.float32, ["b", "c"])
knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.add_prefetch(knl, "D", ["m", "j", "k", "i"])
......@@ -173,7 +173,7 @@ def test_laplacian_lmem(ctx_factory):
#print knl
#1/0
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
# knl = lp.join_dimensions(knl, ["i", "j"], "i_and_j")
#print seq_knl
......@@ -182,7 +182,7 @@ def test_laplacian_lmem(ctx_factory):
# TW: turned this off since it generated:
# ValueError: cannot tag 'i_and_j'--not known
# knl = lp.tag_dimensions(knl, dict(i_and_j="l.0", k="l.1"))
# knl = lp.tag_inames(knl, dict(i_and_j="l.0", k="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
......@@ -238,8 +238,8 @@ def test_laplacian_lmem_ilp(ctx_factory):
# Must act on u first, otherwise stencil becomes crooked and
# footprint becomes non-convex.
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e_inner", 4, inner_tag="ilp")
knl = lp.add_prefetch(knl, "u", [1, 2, 3, "e_inner_inner"])
......@@ -253,7 +253,7 @@ def test_laplacian_lmem_ilp(ctx_factory):
#print seq_knl
#1/0
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
......@@ -341,9 +341,9 @@ def test_advect(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
kernel_gen = lp.generate_loop_schedules(knl)
......@@ -459,9 +459,9 @@ def test_advect_dealias(ctx_factory):
print knl
1/0
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
print knl
#1/0
......@@ -523,9 +523,9 @@ def test_interp_diff(ctx_factory):
print knl
1/0
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
print knl
#1/0
......
......@@ -53,11 +53,11 @@ def test_tim2d(ctx_factory):
knl = lp.add_prefetch(knl, "u", ["i", "j", "o"])
knl = lp.precompute(knl, "ur", np.float32, ["a", "b"])
knl = lp.precompute(knl, "us", np.float32, ["a", "b"])
knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_dimensions(knl, dict(o="unr"))
knl = lp.tag_dimensions(knl, dict(m="unr"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1"))
knl = lp.tag_inames(knl, dict(o="unr"))
knl = lp.tag_inames(knl, dict(m="unr"))
# knl = lp.add_prefetch(knl, "G", [2,3], default_tag=None) # axis/argument indices on G
......@@ -116,12 +116,12 @@ def test_red2d(ctx_factory):
knl = lp.precompute(knl, "ue", np.float32, ["a", "b", "m"])
knl = lp.precompute(knl, "ur", np.float32, ["a", "b"])
knl = lp.precompute(knl, "us", np.float32, ["a", "b"])
knl = lp.split_dimension(knl, "e", 2, outer_tag="g.0")
knl = lp.split_dimension(knl, "j", n, inner_tag="l.0")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "i", n, inner_tag="l.1")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 2, outer_tag="g.0")
knl = lp.split_iname(knl, "j", n, inner_tag="l.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "i", n, inner_tag="l.1")#, slabs=(0, 1))
knl = lp.tag_dimensions(knl, dict(o="unr"))
knl = lp.tag_dimensions(knl, dict(m="unr"))
knl = lp.tag_inames(knl, dict(o="unr"))
knl = lp.tag_inames(knl, dict(m="unr"))
knl = lp.add_prefetch(knl, "G", [2,3]) # axis/argument indices on G
......@@ -182,16 +182,16 @@ def test_tim3d(ctx_factory):
knl = lp.precompute(knl, "ur", np.float32, ["a", "b", "c"])
knl = lp.precompute(knl, "us", np.float32, ["a", "b", "c"])
knl = lp.precompute(knl, "ut", np.float32, ["a", "b", "c"])
knl = lp.split_dimension(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "k", n, inner_tag="l.2")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "j", n, inner_tag="l.1")#, slabs=(0, 1))
knl = lp.split_dimension(knl, "i", n, inner_tag="l.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "e", 1, outer_tag="g.0")#, slabs=(0, 1))
knl = lp.split_iname(knl, "k", n, inner_tag="l.2")#, slabs=(0, 1))
knl = lp.split_iname(knl, "j", n, inner_tag="l.1")#, slabs=(0, 1))
knl = lp.split_iname(knl, "i", n, inner_tag="l.0")#, slabs=(0, 1))
# knl = lp.tag_dimensions(knl, dict(k_nner="unr"))
# knl = lp.tag_inames(knl, dict(k_nner="unr"))
knl = lp.tag_dimensions(knl, dict(o="unr"))
knl = lp.tag_dimensions(knl, dict(m="unr"))
# knl = lp.tag_dimensions(knl, dict(i="unr"))
knl = lp.tag_inames(knl, dict(o="unr"))
knl = lp.tag_inames(knl, dict(m="unr"))
# knl = lp.tag_inames(knl, dict(i="unr"))
knl = lp.add_prefetch(knl, "G", [2,3,4]) # axis/argument indices on G
......
......@@ -124,15 +124,15 @@ def test_axpy(ctx_factory):
def variant_cpu(knl):
unroll = 16
block_size = unroll*4096
knl = lp.split_dimension(knl, "i", block_size, outer_tag="g.0", slabs=(0, 1))
knl = lp.split_dimension(knl, "i_inner", unroll, inner_tag="unr")
knl = lp.split_iname(knl, "i", block_size, outer_tag="g.0", slabs=(0, 1))
knl = lp.split_iname(knl, "i_inner", unroll, inner_tag="unr")
return knl
def variant_gpu(knl):
unroll = 4
block_size = 256
knl = lp.split_dimension(knl, "i", unroll*block_size, outer_tag="g.0", slabs=(0, 1))
knl = lp.split_dimension(knl, "i_inner", block_size, outer_tag="unr", inner_tag="l.0")
knl = lp.split_iname(knl, "i", unroll*block_size, outer_tag="g.0", slabs=(0, 1))
knl = lp.split_iname(knl, "i_inner", block_size, outer_tag="unr", inner_tag="l.0")
return knl
for variant in [variant_cpu, variant_gpu]:
......@@ -169,9 +169,9 @@ def test_transpose(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "i", 16,
knl = lp.split_iname(knl, "i", 16,
outer_tag="g.0", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16,
knl = lp.split_iname(knl, "j", 16,
outer_tag="g.1", inner_tag="l.0")
knl = lp.add_prefetch(knl, 'a', ["i_inner", "j_inner"])
......@@ -209,11 +209,11 @@ def test_plain_matrix_mul(ctx_factory):
ref_knl = knl
knl = lp.split_dimension(knl, "i", 16,
knl = lp.split_iname(knl, "i", 16,
outer_tag="g.0", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16,
knl = lp.split_iname(knl, "j", 16,
outer_tag="g.1", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 16)
knl = lp.split_iname(knl, "k", 16)
knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"])
knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner", ])
......@@ -250,11 +250,11 @@ def test_variable_size_matrix_mul(ctx_factory):
ref_knl = knl
knl = lp.split_dimension(knl, "i", 16,
knl = lp.split_iname(knl, "i", 16,
outer_tag="g.0", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 8,
knl = lp.split_iname(knl, "j", 8,
outer_tag="g.1", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 32)
knl = lp.split_iname(knl, "k", 32)
knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"])
knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"])
......@@ -296,9 +296,9 @@ def test_rank_one(ctx_factory):
return knl
def variant_2(knl):
knl = lp.split_dimension(knl, "i", 16,
knl = lp.split_iname(knl, "i", 16,
outer_tag="g.0", inner_tag="l.0")
knl = lp.split_dimension(knl, "j", 16,
knl = lp.split_iname(knl, "j", 16,
outer_tag="g.1", inner_tag="l.1")
knl = lp.add_prefetch(knl, "a")
......@@ -306,9 +306,9 @@ def test_rank_one(ctx_factory):
return knl
def variant_3(knl):
knl = lp.split_dimension(knl, "i", 16,
knl = lp.split_iname(knl, "i", 16,
outer_tag="g.0", inner_tag="l.0")
knl = lp.split_dimension(knl, "j", 16,
knl = lp.split_iname(knl, "j", 16,
outer_tag="g.1", inner_tag="l.1")
knl = lp.add_prefetch(knl, "a", ["i_inner"])
......@@ -316,22 +316,22 @@ def test_rank_one(ctx_factory):
return knl
def variant_4(knl):
knl = lp.split_dimension(knl, "i", 256,
knl = lp.split_iname(knl, "i", 256,
outer_tag="g.0", slabs=(0, 1))
knl = lp.split_dimension(knl, "j", 256,
knl = lp.split_iname(knl, "j", 256,
outer_tag="g.1", slabs=(0, 1))
knl = lp.add_prefetch(knl, "a", ["i_inner"], default_tag=None)
knl = lp.add_prefetch(knl, "b", ["j_inner"], default_tag=None)
knl = lp.split_dimension(knl, "i_inner", 16,
knl = lp.split_iname(knl, "i_inner", 16,
inner_tag="l.0")
knl = lp.split_dimension(knl, "j_inner", 16,
knl = lp.split_iname(knl, "j_inner", 16,
inner_tag="l.1")
knl = lp.split_dimension(knl, "a_dim_0", 16,
knl = lp.split_iname(knl, "a_dim_0", 16,
outer_tag="l.1", inner_tag="l.0")
knl = lp.split_dimension(knl, "b_dim_0", 16,
knl = lp.split_iname(knl, "b_dim_0", 16,
outer_tag="l.1", inner_tag="l.0")
return knl
......@@ -374,11 +374,11 @@ def test_troublesome_premagma_fermi_matrix_mul(ctx_factory):
j_reg = 2
i_chunks = 16
j_chunks = 16
knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_dimension(knl, "k", 16)
knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_iname(knl, "k", 16)
knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"])
kernel_gen = lp.generate_loop_schedules(knl)
......@@ -416,12 +416,12 @@ def test_intel_matrix_mul(ctx_factory):
j_reg = 4
i_chunks = 16
j_chunks = 16
knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_dimension(knl, "k", 16)
#knl = lp.split_dimension(knl, "k_inner", 8, outer_tag="unr")
knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_iname(knl, "k", 16)
#knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr")
knl = lp.add_prefetch(knl, 'a', ["i_inner_inner", "k_inner", "i_inner_outer"])
knl = lp.add_prefetch(knl, 'b', ["j_inner_inner", "k_inner", "j_inner_outer"])
......@@ -469,12 +469,12 @@ def test_magma_fermi_matrix_mul(ctx_factory):
j_chunks = 16
knl = lp.split_dimension(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_dimension(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_dimension(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_dimension(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_dimension(knl, "k", 16)
knl = lp.split_dimension(knl, "k_inner", 8, outer_tag="unr")
knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0")
knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1")
knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
knl = lp.split_iname(knl, "k", 16)
knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr")
# FIXME
#knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"])
#knl = lp.add_prefetch(knl, 'b', ["k_inner", ("j_inner_inner", "j_inner_outer"),])
......@@ -511,9 +511,9 @@ def test_image_matrix_mul(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 32)
knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 32)
# conflict-free
knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"])
knl = lp.add_prefetch(knl, 'b', ["j_inner", "k_inner"])
......@@ -549,11 +549,11 @@ def test_image_matrix_mul_ilp(ctx_factory):
seq_knl = knl
ilp = 4
knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
j_inner_split = 4
knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1")
knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 2)
knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1")
knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 2)
# conflict-free?
knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"])
knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"])
......@@ -586,8 +586,8 @@ def test_ilp_race_matmul(ctx_factory):
],
name="matmul")
knl = lp.split_dimension(knl, "j", 2, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 2)
knl = lp.split_iname(knl, "j", 2, outer_tag="ilp", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 2)
knl = lp.add_prefetch(knl, 'b', ["k_inner"])
from loopy.check import WriteRaceConditionError
......@@ -621,9 +621,9 @@ def test_fancy_matrix_mul(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")
knl = lp.split_dimension(knl, "k", 16, slabs=(0,1))
knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.0")
knl = lp.split_iname(knl, "k", 16, slabs=(0,1))
knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"])
knl = lp.add_prefetch(knl, 'b', ["k_inner", "j_inner"])
......
......@@ -72,7 +72,7 @@ def test_multi_cse(ctx_factory):
[lp.GlobalArg("a", np.float32, shape=(100,))],
local_sizes={0: 16})
knl = lp.split_dimension(knl, "i", 16, inner_tag="l.0")
knl = lp.split_iname(knl, "i", 16, inner_tag="l.0")
knl = lp.add_prefetch(knl, "a", [])
kernel_gen = lp.generate_loop_schedules(knl)
......@@ -111,8 +111,8 @@ def test_stencil(ctx_factory):
ref_knl = knl
def variant_1(knl):
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.1", inner_tag="l.1")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.0", inner_tag="l.0")
knl = lp.split_iname(knl, "i", 16, outer_tag="g.1", inner_tag="l.1")
knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0")
knl = lp.add_prefetch(knl, "a", ["i_inner", "j_inner"])
return knl
......@@ -141,8 +141,8 @@ def test_eq_constraint(ctx_factory):
lp.GlobalArg("b", np.float32, shape=(1000,))
])
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0")
knl = lp.split_dimension(knl, "i_inner", 16, outer_tag=None, inner_tag="l.0")
knl = lp.split_iname(knl, "i", 16, outer_tag="g.0")
knl = lp.split_iname(knl, "i_inner", 16, outer_tag=None, inner_tag="l.0")
kernel_gen = lp.generate_loop_schedules(knl)
kernel_gen = lp.check_kernels(kernel_gen)
......@@ -397,7 +397,7 @@ def test_dependent_loop_bounds_2(ctx_factory):
],
assumptions="n>=1 and row_len>=1")
knl = lp.split_dimension(knl, "i", 128, outer_tag="g.0",
knl = lp.split_iname(knl, "i", 128, outer_tag="g.0",
inner_tag="l.0")
cknl = lp.CompiledKernel(ctx, knl)
print "---------------------------------------------------"
......@@ -434,7 +434,7 @@ def test_dependent_loop_bounds_3(ctx_factory):
assert knl.parents_per_domain()[1] == 0
knl = lp.split_dimension(knl, "i", 128, outer_tag="g.0",
knl = lp.split_iname(knl, "i", 128, outer_tag="g.0",
inner_tag="l.0")
cknl = lp.CompiledKernel(ctx, knl)
......@@ -442,7 +442,7 @@ def test_dependent_loop_bounds_3(ctx_factory):
cknl.print_code()
print "---------------------------------------------------"
knl_bad = lp.split_dimension(knl, "jj", 128, outer_tag="g.1",
knl_bad = lp.split_iname(knl, "jj", 128, outer_tag="g.1",
inner_tag="l.1")
import pytest
......@@ -473,9 +473,9 @@ def test_independent_multi_domain(ctx_factory):
])
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0",
knl = lp.split_iname(knl, "i", 16, outer_tag="g.0",
inner_tag="l.0")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.0",
knl = lp.split_iname(knl, "j", 16, outer_tag="g.0",
inner_tag="l.0")
assert knl.parents_per_domain() == 2*[None]
......@@ -544,8 +544,8 @@ def test_equality_constraints(ctx_factory):
seq_knl = knl
knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.0")
knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.1")
knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0")
knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1")
#print knl
#print knl.domains[0].detect_equalities()
......
......@@ -30,28 +30,28 @@ def test_nbody(ctx_factory):
seq_knl = knl
def variant_1(knl):
knl = lp.split_dimension(knl, "i", 256,
knl = lp.split_iname(knl, "i", 256,
outer_tag="g.0", inner_tag="l.0",
slabs=(0,1))
knl = lp.split_dimension(knl, "j", 256, slabs=(0,1))
knl = lp.split_iname(knl, "j", 256, slabs=(0,1))
return knl, []
def variant_cpu(knl):
knl = lp.expand_subst(knl)
knl = lp.split_dimension(knl, "i", 1024,
knl = lp.split_iname(knl, "i", 1024,
outer_tag="g.0", slabs=(0,1))
knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None)
return knl, []
def variant_gpu(knl):
knl = lp.expand_subst(knl)
knl = lp.split_dimension(knl, "i", 256,
knl = lp.split_iname(knl, "i", 256,
outer_tag="g.0", inner_tag="l.0", slabs=(0,1))
knl = lp.split_dimension(knl, "j", 256, slabs=(0,1))
knl = lp.split_iname(knl, "j", 256, slabs=(0,1))
knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None)
knl = lp.add_prefetch(knl, "x[j,k]", ["j_inner", "k"],
["x_fetch_j", "x_fetch_k"])
knl = lp.tag_dimensions(knl, dict(x_fetch_k="unr"))
knl = lp.tag_inames(knl, dict(x_fetch_k="unr"))
return knl, ["j_outer", "j_inner"]
n = 3000
......
......@@ -52,7 +52,7 @@ def test_tim2d(ctx_factory):
seq_knl = knl
def variant_orig(knl):
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1", e="g.0"))
knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0"))
knl = lp.add_prefetch(knl, "D[:,:]")
knl = lp.add_prefetch(knl, "u[e, :, :]")
......@@ -66,8 +66,8 @@ def test_tim2d(ctx_factory):
knl = lp.add_prefetch(knl, "G$x")
knl = lp.add_prefetch(knl, "G$y")
knl = lp.tag_dimensions(knl, dict(o="unr"))
knl = lp.tag_dimensions(knl, dict(m="unr"))
knl = lp.tag_inames(knl, dict(o="unr"))
knl = lp.tag_inames(knl, dict(m="unr"))
knl = lp.set_instruction_priority(knl, "D_fetch", 5)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment