diff --git a/MEMO b/MEMO index 694b49a929d7d6e25c522fec3552c5f6abd6d288..aeeb733a0eb40fd76a4e40e0798fb308337452ba 100644 --- a/MEMO +++ b/MEMO @@ -53,16 +53,12 @@ To-do - Add dependencies after the fact -- Fix all tests - - Scalar insn priority - If finding a maximum proves troublesome, move parameters into the domain - : (as in, Matlab full-slice) in prefetches -- ScalarArg is a bad name - Future ideas ^^^^^^^^^^^^ @@ -115,6 +111,9 @@ Future ideas Dealt with ^^^^^^^^^^ +- ScalarArg is a bad name + -> renamed to ValueArg + - What to do about constants in codegen? (...f suffix, complex types) -> dealt with by type contexts diff --git a/doc/reference.rst b/doc/reference.rst index 6915549f681e72c2f31b58ee7a23c64aed603e85..69b1b0fe2405ce3c7a6c6e9f2be1cb8d3a1c03b5 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -117,7 +117,7 @@ Creating Kernels Arguments ^^^^^^^^^ -.. autoclass:: ScalarArg +.. autoclass:: ValueArg :members: :undoc-members: diff --git a/loopy/__init__.py b/loopy/__init__.py index eb89d65707a60b1be41f5fbbc57945fc3536f1e5..43c13f8f6242a37e265726f74b1bf48ef06cd156 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -19,7 +19,7 @@ class LoopyAdvisory(UserWarning): # {{{ imported user interface -from loopy.kernel import ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg +from loopy.kernel import ValueArg, ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg from loopy.kernel import (AutoFitLocalIndexTag, get_dot_dependency_graph, LoopKernel, Instruction, @@ -35,7 +35,7 @@ from loopy.codegen import generate_code from loopy.compiled import CompiledKernel, drive_timing_run, auto_test_vs_ref from loopy.check import check_kernels -__all__ = ["ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", +__all__ = ["ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", "LoopKernel", "Instruction", "default_function_mangler", "single_arg_function_mangler", diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index e653feb93ff50331c62fb4a8d5138e03683a8aeb..e25f0af365f62137e35657617a8e7fcfe55ff7b2 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -228,7 +228,7 @@ def generate_code(kernel, with_annotation=False, has_image = False - from loopy.kernel import GlobalArg, ConstantArg, ImageArg, ScalarArg + from loopy.kernel import GlobalArg, ConstantArg, ImageArg, ValueArg args = [] for arg in kernel.args: @@ -250,7 +250,7 @@ def generate_code(kernel, with_annotation=False, arg_decl = CLImage(arg.dimensions, mode, arg.name) has_image = True - elif isinstance(arg, ScalarArg): + elif isinstance(arg, ValueArg): arg_decl = Const(POD(arg.dtype, arg.name)) else: raise ValueError("argument type not understood: '%s'" % type(arg)) diff --git a/loopy/compiled.py b/loopy/compiled.py index 8cf223f471b1de357539547e4764ecbc5ad2bc77..44497d97dc9f57aab30edc1a2d9dff065761f426 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -108,11 +108,11 @@ class CompiledKernel: print "[Loopy] ----------------------------------------------------" raise - from loopy.kernel import ScalarArg + from loopy.kernel import ValueArg arg_types = [] for arg in kernel.args: - if isinstance(arg, ScalarArg): + if isinstance(arg, ValueArg): arg_types.append(arg.dtype) else: arg_types.append(None) @@ -307,7 +307,7 @@ def fill_rand(ary): def make_ref_args(kernel, queue, parameters, fill_value): - from loopy.kernel import ScalarArg, GlobalArg, ImageArg + from loopy.kernel import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -316,7 +316,7 @@ def make_ref_args(kernel, queue, parameters, output_arrays = [] for arg in kernel.args: - if isinstance(arg, ScalarArg): + if isinstance(arg, ValueArg): arg_value = parameters[arg.name] try: @@ -373,14 +373,14 @@ def make_ref_args(kernel, queue, parameters, def make_args(queue, kernel, ref_input_arrays, parameters, fill_value): - from loopy.kernel import ScalarArg, GlobalArg, ImageArg + from loopy.kernel import ValueArg, GlobalArg, ImageArg from pymbolic import evaluate result = [] output_arrays = [] for arg in kernel.args: - if isinstance(arg, ScalarArg): + if isinstance(arg, ValueArg): arg_value = parameters[arg.name] try: diff --git a/loopy/kernel.py b/loopy/kernel.py index f7b2f1590b0ba1e5d434a190a586ee3736dc4039..a6d8095bb27614a5d700d91c21121d7d55f4b6cc 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -211,14 +211,22 @@ class ImageArg(object): return "<ImageArg '%s' of type %s>" % (self.name, self.dtype) -class ScalarArg(object): +class ValueArg(object): def __init__(self, name, dtype, approximately=None): self.name = name self.dtype = np.dtype(dtype) self.approximately = approximately def __repr__(self): - return "<ScalarArg '%s' of type %s>" % (self.name, self.dtype) + return "<ValueArg '%s' of type %s>" % (self.name, self.dtype) + +class ScalarArg(ValueArg): + def __init__(self, name, dtype, approximately=None): + from warnings import warn + warn("ScalarArg is a deprecated name of ValueArg", + DeprecationWarning, stacklevel=2) + + ValueArg.__init__(self, name, dtype, approximately) # }}} @@ -854,7 +862,7 @@ class LoopKernel(Record): domains = [domains] ctx = isl.Context() - scalar_arg_names = set(arg.name for arg in args if isinstance(arg, ScalarArg)) + scalar_arg_names = set(arg.name for arg in args if isinstance(arg, ValueArg)) var_names = ( set(temporary_variables) | set(insn.get_assignee_var_name() @@ -1310,7 +1318,7 @@ class LoopKernel(Record): from pytools import flatten loop_arg_names = list(flatten(dom.get_var_names(dim_type.param) for dom in self.domains)) - return [arg.name for arg in self.args if isinstance(arg, ScalarArg) + return [arg.name for arg in self.args if isinstance(arg, ValueArg) if arg.name in loop_arg_names] @memoize_method diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 8492e9f9bc22d23763120a4ee671861cf6946182..71a936924c36313f80d1d7ab7a008309c10a75e4 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -492,12 +492,12 @@ def limit_boostability(kernel): # {{{ rank inames by stride def get_auto_axis_iname_ranking_by_stride(kernel, insn): - from loopy.kernel import ImageArg, ScalarArg + from loopy.kernel import ImageArg, ValueArg approximate_arg_values = dict( (arg.name, arg.approximately) for arg in kernel.args - if isinstance(arg, ScalarArg)) + if isinstance(arg, ValueArg)) # {{{ find all array accesses in insn diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 7d71d76c77cefaef98e3c48193774f78e9d21bf6..c3823d504a41db981d35f5e3900e6edf441f781a 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -40,7 +40,7 @@ def test_laplacian_stiffness(ctx_factory): lp.GlobalArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order), lp.ConstantArg("w", dtype, shape=(Nq,), order=order), lp.GlobalArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order), - lp.ScalarArg("Nc", np.int32, approximately=1000), + lp.ValueArg("Nc", np.int32, approximately=1000), ], name="lapquad", assumptions="Nc>=1") diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index 6ee04953d65df1fdee103cccb77fb9941c2b59b4..888ecdd2b88702b47326910fef9786c594acb670 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -56,7 +56,7 @@ def test_laplacian(ctx_factory): lp.GlobalArg("lap", dtype, shape=field_shape, order=order), lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -144,7 +144,7 @@ def test_laplacian_lmem(ctx_factory): lp.GlobalArg("lap", dtype, shape=field_shape, order=order), lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -219,7 +219,7 @@ def test_laplacian_lmem_ilp(ctx_factory): lp.GlobalArg("lap", dtype, shape=field_shape, order=order), lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -321,7 +321,7 @@ def test_advect(ctx_factory): lp.GlobalArg("Nw", dtype, shape=field_shape, order=order), lp.GlobalArg("G", dtype, shape=(9,)+field_shape, order=order), lp.GlobalArg("D", dtype, shape=(N, N), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -441,7 +441,7 @@ def test_advect_dealias(ctx_factory): lp.GlobalArg("D", dtype, shape=(M,M), order=order), lp.GlobalArg("I", dtype, shape=(M, N), order=order), lp.GlobalArg("V", dtype, shape=(N, M), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -505,7 +505,7 @@ def test_interp_diff(ctx_factory): lp.GlobalArg("I", dtype, shape=(M, N), order=order), lp.GlobalArg("V", dtype, shape=(N, M), order=order), lp.GlobalArg("Pu", dtype, shape=field_shape, order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_lap_precon", assumptions="K>=1") diff --git a/proto-tests/test_sem_reagan.py b/proto-tests/test_sem_reagan.py index c147c20b99287136b86fdd233e5a476caf733c5d..f625c3b04e9b39b1005c1577c5990bd10b4de55c 100644 --- a/proto-tests/test_sem_reagan.py +++ b/proto-tests/test_sem_reagan.py @@ -45,7 +45,7 @@ def test_tim2d(ctx_factory): # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap2D", assumptions="K>=1") diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 8d1c03e7c9d7fa0775a39d3bca3b082bd1f28f30..04d6aeed44f14c10796453de539ff7f89a311906 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -56,7 +56,7 @@ def test_laplacian(ctx_factory): lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -142,7 +142,7 @@ def test_laplacian_lmem(ctx_factory): lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -230,7 +230,7 @@ def test_laplacian_lmem_ilp(ctx_factory): lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -332,7 +332,7 @@ def test_advect(ctx_factory): lp.ArrayArg("Nw", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(9,)+field_shape, order=order), lp.ArrayArg("D", dtype, shape=(N, N), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -452,7 +452,7 @@ def test_advect_dealias(ctx_factory): lp.ArrayArg("D", dtype, shape=(M,M), order=order), lp.ArrayArg("I", dtype, shape=(M, N), order=order), lp.ArrayArg("V", dtype, shape=(N, M), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -516,7 +516,7 @@ def test_interp_diff(ctx_factory): lp.ArrayArg("I", dtype, shape=(M, N), order=order), lp.ArrayArg("V", dtype, shape=(N, M), order=order), lp.ArrayArg("Pu", dtype, shape=field_shape, order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="sem_lap_precon", assumptions="K>=1") diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index f5a8545d817a5fbc1f02f0a32efed8f70a342056..84523146dd586c634c382a2c1019a04b358c3974 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -42,7 +42,7 @@ def test_tim2d(ctx_factory): # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap2D", assumptions="K>=1") @@ -104,7 +104,7 @@ def test_red2d(ctx_factory): lp.ArrayArg("lap", dtype, shape=field_shape, order=order), lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap2D", assumptions="K>=1") @@ -172,7 +172,7 @@ def test_tim3d(ctx_factory): # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), - lp.ScalarArg("K", np.int32, approximately=1000), + lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap3D", assumptions="K>=1") diff --git a/test/test_linalg.py b/test/test_linalg.py index d4db1941ba144f890732510c058fb03e6694279a..74bd1c501df478b60d0a832d94fb6ee46d23ff0c 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -110,12 +110,12 @@ def test_axpy(ctx_factory): "z[i] = a*x[i]+b*y[i]" ], [ - lp.ScalarArg("a", dtype), + lp.ValueArg("a", dtype), lp.GlobalArg("x", dtype, shape="n,"), - lp.ScalarArg("b", dtype), + lp.ValueArg("b", dtype), lp.GlobalArg("y", dtype, shape="n,"), lp.GlobalArg("z", dtype, shape="n,"), - lp.ScalarArg("n", np.int32, approximately=n), + lp.ValueArg("n", np.int32, approximately=n), ], name="axpy", assumptions="n>=1") @@ -246,7 +246,7 @@ def test_variable_size_matrix_mul(ctx_factory): lp.GlobalArg("a", dtype, shape=(n, n), order=order), lp.GlobalArg("b", dtype, shape=(n, n), order=order), lp.GlobalArg("c", dtype, shape=(n, n), order=order), - lp.ScalarArg("n", np.int32, approximately=n), + lp.ValueArg("n", np.int32, approximately=n), ], name="matmul", assumptions="n >= 16") @@ -297,7 +297,7 @@ def test_rank_one(ctx_factory): lp.GlobalArg("a", dtype, shape=(n,), order=order), lp.GlobalArg("b", dtype, shape=(n,), order=order), lp.GlobalArg("c", dtype, shape=(n, n), order=order), - lp.ScalarArg("n", np.int32, approximately=n), + lp.ValueArg("n", np.int32, approximately=n), ], name="rank_one", assumptions="n >= 16") @@ -652,7 +652,7 @@ def test_fancy_matrix_mul(ctx_factory): lp.GlobalArg("a", dtype, shape="(n, n)", order=order), lp.GlobalArg("b", dtype, shape="(n, n)", order=order), lp.GlobalArg("c", dtype, shape="(n, n)", order=order), - lp.ScalarArg("n", np.int32, approximately=1000), + lp.ValueArg("n", np.int32, approximately=1000), ], name="fancy_matmul", assumptions="n>=1") knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") diff --git a/test/test_loopy.py b/test/test_loopy.py index e4644bcf1de36cf5bc4416f2de2f6d998409edc9..0639bbfec03f3f5b0432dc4d075778df2b042db8 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -254,7 +254,7 @@ def test_fuzz_code_generator(ctx_factory): [lp.Instruction(None, "value", expr)], [lp.GlobalArg("value", np.complex128, shape=())] + [ - lp.ScalarArg(name, get_dtype(val)) + lp.ValueArg(name, get_dtype(val)) for name, val in var_values.iteritems() ]) ck = lp.CompiledKernel(ctx, knl) @@ -322,7 +322,7 @@ def test_nested_dependent_reduction(ctx_factory): "a[i] = sum(j, j)", ], [ - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), lp.GlobalArg("a", dtype, ("n",)), lp.GlobalArg("l", np.int32, ("n",)), ]) @@ -361,7 +361,7 @@ def test_dependent_loop_bounds(ctx_factory): lp.GlobalArg("a_values", dtype), lp.GlobalArg("x", dtype), lp.GlobalArg("ax", dtype), - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), ], assumptions="n>=1 and row_len>=1") @@ -393,7 +393,7 @@ def test_dependent_loop_bounds_2(ctx_factory): lp.GlobalArg("a_values", dtype), lp.GlobalArg("x", dtype), lp.GlobalArg("ax", dtype), - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), ], assumptions="n>=1 and row_len>=1") @@ -429,7 +429,7 @@ def test_dependent_loop_bounds_3(ctx_factory): [ lp.GlobalArg("a_row_lengths", np.int32), lp.GlobalArg("a", dtype, shape=("n,n"), order="C"), - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), ]) assert knl.parents_per_domain()[1] == 0 @@ -469,7 +469,7 @@ def test_independent_multi_domain(ctx_factory): [ lp.GlobalArg("a", dtype, shape=("n"), order="C"), lp.GlobalArg("b", dtype, shape=("n"), order="C"), - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), ]) @@ -507,7 +507,7 @@ def test_bare_data_dependency(ctx_factory): ], [ lp.GlobalArg("a", dtype, shape=("n"), order="C"), - lp.ScalarArg("n", np.int32), + lp.ValueArg("n", np.int32), ]) cknl = lp.CompiledKernel(ctx, knl) diff --git a/test/test_nbody.py b/test/test_nbody.py index d3a38cc71368b7a7b8897e2ef86e01387314c0ff..aa9812527e39a866cab533ae88c9d35da03d5974 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -23,7 +23,7 @@ def test_nbody(ctx_factory): [ lp.GlobalArg("x", dtype, shape="N,3", order="C"), lp.GlobalArg("pot", dtype, shape="N", order="C"), - lp.ScalarArg("N", np.int32), + lp.ValueArg("N", np.int32), ], name="nbody", assumptions="N>=1")