From 9b751a81b0125e582322eec16bacba078c2c4e97 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Tue, 17 Apr 2012 20:22:20 -0400 Subject: [PATCH] Rename ArrayArg -> GlobalArg, ConstantArrayArg -> ConstantArg. --- MEMO | 6 +-- doc/reference.rst | 4 +- examples/matrix-mul.py | 2 +- examples/quadrature.py | 105 ------------------------------------ loopy/__init__.py | 6 +-- loopy/check.py | 4 +- loopy/codegen/__init__.py | 11 ++-- loopy/codegen/expression.py | 2 +- loopy/compiled.py | 14 ++--- loopy/kernel.py | 21 +++++--- test/test_fem_assembly.py | 10 ++-- test/test_linalg.py | 52 +++++++++--------- test/test_loopy.py | 12 ++--- test/test_nbody.py | 4 +- test/test_sem.py | 68 +++++++++++------------ 15 files changed, 110 insertions(+), 211 deletions(-) delete mode 100644 examples/quadrature.py diff --git a/MEMO b/MEMO index a0f7f32ff..02eaf2264 100644 --- a/MEMO +++ b/MEMO @@ -46,8 +46,6 @@ To-do - Add dependencies after the fact -- bug? with fetching only g[j,*] inside j loop - - Expose iname-duplicate-and-rename as a primitive. - Allow parameters to be varying during run-time, substituting values @@ -61,7 +59,9 @@ To-do - If finding a maximum proves troublesome, move parameters into the domain -- : (as in, Matlab full-sclice) in prefetches +- : (as in, Matlab full-slice) in prefetches + +- ScalarArg is a bad name Future ideas ^^^^^^^^^^^^ diff --git a/doc/reference.rst b/doc/reference.rst index 1f19611d1..6915549f6 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -121,11 +121,11 @@ Arguments :members: :undoc-members: -.. autoclass:: ArrayArg +.. autoclass:: GlobalArg :members: :undoc-members: -.. autoclass:: ConstantArrayArg +.. autoclass:: ConstantArg :members: :undoc-members: diff --git a/examples/matrix-mul.py b/examples/matrix-mul.py index 82c8c4e33..d0f11c261 100644 --- a/examples/matrix-mul.py +++ b/examples/matrix-mul.py @@ -43,7 +43,7 @@ def image_matrix_mul_ilp(ctx_factory=cl.create_some_context): [ lp.ImageArg("a", dtype, 2), lp.ImageArg("b", dtype, 2), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") diff --git a/examples/quadrature.py b/examples/quadrature.py deleted file mode 100644 index 5df05e031..000000000 --- a/examples/quadrature.py +++ /dev/null @@ -1,105 +0,0 @@ - -import numpy as np -import pyopencl as cl -import pyopencl.array as cl_array -import loopy as lp - - - - -def make_well_conditioned_dev_matrix(queue, shape, dtype=np.float32, - order="C", ran_factor=1, id_factor=5, inc_factor=0, od=0): - if isinstance(shape, int): - shape = (shape, shape) - l = max(shape) - eye_ish = id_factor*np.eye(l, k=od) - if inc_factor: - eye_ish[np.arange(l), np.arange(l)] = inc_factor*np.arange(l) - ary = np.asarray( - ran_factor*np.random.randn(*shape) - + eye_ish[:shape[0], :shape[1]], - dtype=dtype, order=order) - - return cl_array.to_device(queue, ary) - - - - -def build_mass_mat_maker(ctx_factory=cl.create_some_context): - dtype = np.float32 - ctx = ctx_factory() - queue = cl.CommandQueue(ctx, - properties=cl.command_queue_properties.PROFILING_ENABLE) - - Nb = 3 - Nv = 3 - Nq = 3*3 - Nc = 1600 - - knl = lp.LoopKernel(ctx.devices[0], - "[ncells] -> {[c,i,j,q]: 0<=c<ncells and 0 <= i < %(Nv)s " - "and 0<=j<%(Nb)s and 0<=q<%(Nq)s}" % dict( - Nv=Nv, Nb=Nb, Nq=Nq), - [ - "m[c,i,j] = w[q]*det_j[c]*phi[i,q]*phi[j,q]", - ], - [ - lp.ArrayArg("m", dtype, shape=(Nc, Nv, Nb)), - lp.ArrayArg("w", dtype, shape=(Nq,)), - lp.ArrayArg("det_j", dtype, shape=(Nc,)), - lp.ArrayArg("phi", dtype, shape=(Nv, Nq,)), - lp.ScalarArg("ncells", np.int32, approximately=1000), - ], - name="mass_mat", - iname_to_tag=dict(i="l.0", j="l.1"), - assumptions="ncells >= 1" - ) - knl = lp.split_dimension(knl, "c", 8, inner_tag="l.2", - outer_slab_increments=(0,0)) - knl = lp.split_dimension(knl, "c_outer", 8, outer_tag="g.0", - outer_slab_increments=(0,0)) - - # fix reg prefetch - knl = lp.add_prefetch(knl, "det_j", ["c_inner"], - loc_fetch_axes={0: (0, 1)}) - - #ilp = 4 - #knl = lp.split_dimension(knl, "i", 2, outer_tag="g.0", inner_tag="l.1") - #j_inner_split = 16 - #knl = lp.split_dimension(knl, "j", ilp*j_inner_split, outer_tag="g.1") - #knl = lp.split_dimension(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0") - #knl = lp.split_dimension(knl, "k", 2) - - #knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"]) - #knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"]) - #assert knl.get_problems({})[0] <= 2 - - kernel_gen = (knl - #kernel_gen = (lp.insert_register_prefetches(knl) - for knl in lp.generate_loop_schedules(knl)) - - if False: - a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order, - ran_factor=1, id_factor=5) - b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order, - ran_factor=1, id_factor=5, inc_factor=0) - c = cl_array.empty_like(a) - a_img = cl.image_from_array(ctx, a.get(), 1) - b_img = cl.image_from_array(ctx, b.get(), 1) - - def launcher(kernel, gsize, lsize, check): - 1/0 - evt = kernel(queue, gsize(), lsize(), a_img, b_img, c.data, - g_times_l=True) - - return evt - - from pyopencl.characterize import get_fast_inaccurate_build_options - lp.drive_timing_run(kernel_gen, queue, launcher, flop_count=0, - options=get_fast_inaccurate_build_options(ctx.devices[0])) - - - - -if __name__ == "__main__": - build_mass_mat_maker() diff --git a/loopy/__init__.py b/loopy/__init__.py index d23cc6707..189aabd4c 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -10,7 +10,6 @@ register_mpz_with_pymbolic() import islpy as isl from islpy import dim_type -import numpy as np @@ -20,7 +19,7 @@ class LoopyAdvisory(UserWarning): # {{{ imported user interface -from loopy.kernel import ScalarArg, ArrayArg, ConstantArrayArg, ImageArg +from loopy.kernel import ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg from loopy.kernel import (AutoFitLocalIndexTag, get_dot_dependency_graph, LoopKernel, Instruction) @@ -33,7 +32,8 @@ from loopy.codegen import generate_code from loopy.compiled import CompiledKernel, drive_timing_run, auto_test_vs_ref from loopy.check import check_kernels -__all__ = ["ScalarArg", "ArrayArg", "ConstantArrayArg", "ImageArg", "LoopKernel", +__all__ = ["ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", + "LoopKernel", "Instruction", "make_kernel", "get_dot_dependency_graph", "preprocess_kernel", "realize_reduction", diff --git a/loopy/check.py b/loopy/check.py index 454f3450f..148595636 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -293,10 +293,10 @@ def get_problems(kernel, parameters): msg(4, "using more local memory than available--" "possibly OK due to cache nature") - from loopy.kernel import ArrayArg + from loopy.kernel import ConstantArg const_arg_count = sum( 1 for arg in kernel.args - if isinstance(arg, ArrayArg) and arg.constant_mem) + if isinstance(arg, ConstantArg)) if const_arg_count > kernel.device.max_constant_args: msg(5, "too many constant arguments") diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index f2c04e2aa..4653adc53 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -223,19 +223,16 @@ def generate_code(kernel, with_annotation=False, has_double = False has_image = False - from loopy.kernel import ArrayArg, ConstantArrayArg, ImageArg, ScalarArg + from loopy.kernel import GlobalArg, ConstantArg, ImageArg, ScalarArg args = [] for arg in kernel.args: - if isinstance(arg, (ConstantArrayArg, ArrayArg)): + if isinstance(arg, (ConstantArg, GlobalArg)): arg_decl = restrict_ptr_if_not_nvidia( POD(arg.dtype, arg.name)) if arg_decl.name not in kernel.get_written_variables(): - if arg.constant_mem: - arg_decl = CLConstant(Const(arg_decl)) - else: - arg_decl = Const(arg_decl) - if isinstance(arg, ConstantArrayArg): + arg_decl = Const(arg_decl) + if isinstance(arg, ConstantArg): arg_decl = CLConstant(arg_decl) else: arg_decl = CLGlobal(arg_decl) diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py index 46f8aa6ed..5435a3b4f 100644 --- a/loopy/codegen/expression.py +++ b/loopy/codegen/expression.py @@ -166,7 +166,7 @@ class LoopyCCodeMapper(CCodeMapper): "non-floating-point images not supported for now") else: - # ArrayArg + # GlobalArg index_expr = expr.index if not isinstance(expr.index, tuple): index_expr = (index_expr,) diff --git a/loopy/compiled.py b/loopy/compiled.py index c23e0b4f3..977237d50 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -11,7 +11,7 @@ import numpy as np def _arg_matches_spec(arg, val, other_args): import loopy as lp - if isinstance(arg, lp.ArrayArg): + if isinstance(arg, lp.GlobalArg): from pymbolic import evaluate shape = evaluate(arg.shape, other_args) @@ -168,7 +168,7 @@ class CompiledKernel: assert _arg_matches_spec(arg, val, kwargs) # automatically transfer host-side arrays - if isinstance(arg, lp.ArrayArg): + if isinstance(arg, lp.GlobalArg): if isinstance(val, np.ndarray): # synchronous, so nothing to worry about val = cl_array.to_device(queue, val, allocator=allocator) @@ -178,7 +178,7 @@ class CompiledKernel: if is_written: outputs.append(val) - if isinstance(arg, lp.ArrayArg): + if isinstance(arg, lp.GlobalArg): args.append(val.data) else: args.append(val) @@ -287,7 +287,7 @@ def fill_rand(ary): def make_ref_args(kernel, queue, parameters, fill_value): - from loopy.kernel import ScalarArg, ArrayArg, ImageArg + from loopy.kernel import ScalarArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -309,7 +309,7 @@ def make_ref_args(kernel, queue, parameters, result.append(arg_value) - elif isinstance(arg, (ArrayArg, ImageArg)): + elif isinstance(arg, (GlobalArg, ImageArg)): if arg.shape is None: raise ValueError("arrays need known shape to use automatic " "testing") @@ -353,7 +353,7 @@ def make_ref_args(kernel, queue, parameters, def make_args(queue, kernel, ref_input_arrays, parameters, fill_value): - from loopy.kernel import ScalarArg, ArrayArg, ImageArg + from loopy.kernel import ScalarArg, GlobalArg, ImageArg from pymbolic import evaluate @@ -373,7 +373,7 @@ def make_args(queue, kernel, ref_input_arrays, parameters, result.append(arg_value) - elif isinstance(arg, (ArrayArg, ImageArg)): + elif isinstance(arg, (GlobalArg, ImageArg)): if arg.name in kernel.get_written_variables(): if isinstance(arg, ImageArg): raise RuntimeError("write-mode images not supported in " diff --git a/loopy/kernel.py b/loopy/kernel.py index 753744974..7fc58debb 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -113,9 +113,9 @@ def parse_tag(tag): # {{{ arguments -class ArrayArg: +class _ShapedArg: def __init__(self, name, dtype, strides=None, shape=None, order="C", - offset=0, constant_mem=False): + offset=0): """ All of the following are optional. Specify either strides or shape. @@ -170,18 +170,25 @@ class ArrayArg: self.shape = shape self.order = order - self.constant_mem = constant_mem - @property def dimensions(self): return len(self.shape) +class GlobalArg(_ShapedArg): def __repr__(self): - return "<ArrayArg '%s' of type %s and shape (%s)>" % ( + return "<GlobalArg '%s' of type %s and shape (%s)>" % ( self.name, self.dtype, ",".join(str(i) for i in self.shape)) -class ConstantArrayArg(ArrayArg): - pass +class ArrayArg(GlobalArg): + def __init__(self, *args, **kwargs): + from warnings import warn + warn("ArrayArg is a deprecated name of GlobalArg") + GlobalArg.__init__(self, *args, **kwargs) + +class ConstantArg(_ShapedArg): + def __repr__(self): + return "<ConstantArg '%s' of type %s and shape (%s)>" % ( + self.name, self.dtype, ",".join(str(i) for i in self.shape)) class ImageArg: def __init__(self, name, dtype, dimensions=None, shape=None): diff --git a/test/test_fem_assembly.py b/test/test_fem_assembly.py index d5e439514..7d71d76c7 100644 --- a/test/test_fem_assembly.py +++ b/test/test_fem_assembly.py @@ -35,11 +35,11 @@ def test_laplacian_stiffness(ctx_factory): "sum_float32(dx_axis, dPsi$one(i,dx_axis)*dPsi$two(j,dx_axis))))" ], [ - lp.ArrayArg("jacInv", dtype, shape=(dim, dim, Nc_sym, Nq), order=order), - lp.ConstantArrayArg("DPsi", dtype, shape=(dim, Nb, Nq), order=order), - lp.ArrayArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order), - lp.ConstantArrayArg("w", dtype, shape=(Nq,), order=order), - lp.ArrayArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order), + lp.GlobalArg("jacInv", dtype, shape=(dim, dim, Nc_sym, Nq), order=order), + lp.ConstantArg("DPsi", dtype, shape=(dim, Nb, Nq), order=order), + lp.GlobalArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order), + lp.ConstantArg("w", dtype, shape=(Nq,), order=order), + lp.GlobalArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order), lp.ScalarArg("Nc", np.int32, approximately=1000), ], name="lapquad", assumptions="Nc>=1") diff --git a/test/test_linalg.py b/test/test_linalg.py index ffd99a6c7..a81094336 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -111,10 +111,10 @@ def test_axpy(ctx_factory): ], [ lp.ScalarArg("a", dtype), - lp.ArrayArg("x", dtype, shape="n,"), + lp.GlobalArg("x", dtype, shape="n,"), lp.ScalarArg("b", dtype), - lp.ArrayArg("y", dtype, shape="n,"), - lp.ArrayArg("z", dtype, shape="n,"), + lp.GlobalArg("y", dtype, shape="n,"), + lp.GlobalArg("z", dtype, shape="n,"), lp.ScalarArg("n", np.int32, approximately=n), ], name="axpy", assumptions="n>=1", @@ -163,8 +163,8 @@ def test_transpose(ctx_factory): "b[i, j] = a[j, i]" ], [ - lp.ArrayArg("a", dtype, shape=(n, n), order=order), - lp.ArrayArg("b", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n, n), order=order), + lp.GlobalArg("b", dtype, shape=(n, n), order=order), ], name="transpose") @@ -202,9 +202,9 @@ def test_plain_matrix_mul(ctx_factory): "c[i, j] = %s(k, a[i, k]*b[k, j])" % reduction_func ], [ - lp.ArrayArg("a", dtype, shape=(n, n), order=order), - lp.ArrayArg("b", dtype, shape=(n, n), order=order), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n, n), order=order), + lp.GlobalArg("b", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -244,9 +244,9 @@ def test_variable_size_matrix_mul(ctx_factory): "label: c[i, j] = sum_float32(k, a[i, k]*b[k, j])" ], [ - lp.ArrayArg("a", dtype, shape=(n, n), order=order), - lp.ArrayArg("b", dtype, shape=(n, n), order=order), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n, n), order=order), + lp.GlobalArg("b", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), lp.ScalarArg("n", np.int32, approximately=n), ], name="matmul", assumptions="n >= 16") @@ -295,9 +295,9 @@ def test_rank_one(ctx_factory): "label: c[i, j] = a[i]*b[j]" ], [ - lp.ArrayArg("a", dtype, shape=(n,), order=order), - lp.ArrayArg("b", dtype, shape=(n,), order=order), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n,), order=order), + lp.GlobalArg("b", dtype, shape=(n,), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), lp.ScalarArg("n", np.int32, approximately=n), ], name="rank_one", assumptions="n >= 16") @@ -376,9 +376,9 @@ def test_troublesome_premagma_fermi_matrix_mul(ctx_factory): "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" ], [ - lp.ArrayArg("a", dtype, shape=(n, n), order=order), - lp.ArrayArg("b", dtype, shape=(n, n), order=order), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n, n), order=order), + lp.GlobalArg("b", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -430,9 +430,9 @@ def test_intel_matrix_mul(ctx_factory): "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" ], [ - lp.ArrayArg("a", dtype, shape=(n, n), order=order), - lp.ArrayArg("b", dtype, shape=(n, n), order=order), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("a", dtype, shape=(n, n), order=order), + lp.GlobalArg("b", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -497,7 +497,7 @@ def test_magma_fermi_matrix_mul(ctx_factory): [ lp.ImageArg("a", dtype, 2), lp.ImageArg("b", dtype, 2), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -557,7 +557,7 @@ def test_image_matrix_mul(ctx_factory): [ lp.ImageArg("a", dtype, 2), lp.ImageArg("b", dtype, 2), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -607,7 +607,7 @@ def test_image_matrix_mul_ilp(ctx_factory): [ lp.ImageArg("a", dtype, shape=(n, n)), lp.ImageArg("b", dtype, shape=(n, n)), - lp.ArrayArg("c", dtype, shape=(n, n), order=order), + lp.GlobalArg("c", dtype, shape=(n, n), order=order), ], name="matmul") @@ -650,9 +650,9 @@ def test_fancy_matrix_mul(ctx_factory): "c[i, j] = sum_float32(k, a[i, k]*b[k, j])" ], [ - lp.ArrayArg("a", dtype, shape="(n, n)", order=order), - lp.ArrayArg("b", dtype, shape="(n, n)", order=order), - lp.ArrayArg("c", dtype, shape="(n, n)", order=order), + lp.GlobalArg("a", dtype, shape="(n, n)", order=order), + lp.GlobalArg("b", dtype, shape="(n, n)", order=order), + lp.GlobalArg("c", dtype, shape="(n, n)", order=order), lp.ScalarArg("n", np.int32, approximately=1000), ], name="fancy_matmul", assumptions="n>=1") diff --git a/test/test_loopy.py b/test/test_loopy.py index 2eeea576f..d412b4861 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -22,7 +22,7 @@ def test_owed_barriers(ctx_factory): [ "[i:l.0] <float32> z[i] = a[i]" ], - [lp.ArrayArg("a", np.float32, shape=(100,))] + [lp.GlobalArg("a", np.float32, shape=(100,))] ) kernel_gen = lp.generate_loop_schedules(knl) @@ -43,7 +43,7 @@ def test_wg_too_small(ctx_factory): [ "[i:l.0] <float32> z[i] = a[i]" ], - [lp.ArrayArg("a", np.float32, shape=(100,))], + [lp.GlobalArg("a", np.float32, shape=(100,))], local_sizes={0: 16}) kernel_gen = lp.generate_loop_schedules(knl) @@ -69,7 +69,7 @@ def test_multi_cse(ctx_factory): [ "[i] <float32> z[i] = a[i] + a[i]**2" ], - [lp.ArrayArg("a", np.float32, shape=(100,))], + [lp.GlobalArg("a", np.float32, shape=(100,))], local_sizes={0: 16}) knl = lp.split_dimension(knl, "i", 16, inner_tag="l.0") @@ -99,7 +99,7 @@ def test_stencil(ctx_factory): " + a[i+1,j]" ], [ - lp.ArrayArg("a", np.float32, shape=(32,32,)) + lp.GlobalArg("a", np.float32, shape=(32,32,)) ]) @@ -134,8 +134,8 @@ def test_eq_constraint(ctx_factory): "a[i] = b[i]" ], [ - lp.ArrayArg("a", np.float32, shape=(1000,)), - lp.ArrayArg("b", np.float32, shape=(1000,)) + lp.GlobalArg("a", np.float32, shape=(1000,)), + lp.GlobalArg("b", np.float32, shape=(1000,)) ]) knl = lp.split_dimension(knl, "i", 16, outer_tag="g.0") diff --git a/test/test_nbody.py b/test/test_nbody.py index f1641256f..13cfc02ba 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -21,8 +21,8 @@ def test_nbody(ctx_factory): "pot[i] = sum_float32(j, if(i != j, invdist, 0))", ], [ - lp.ArrayArg("x", dtype, shape="N,3", order="C"), - lp.ArrayArg("pot", dtype, shape="N", order="C"), + lp.GlobalArg("x", dtype, shape="N,3", order="C"), + lp.GlobalArg("pot", dtype, shape="N", order="C"), lp.ScalarArg("N", np.int32), ], name="nbody", assumptions="N>=1") diff --git a/test/test_sem.py b/test/test_sem.py index 8a93d11c0..6ee04953d 100644 --- a/test/test_sem.py +++ b/test/test_sem.py @@ -52,10 +52,10 @@ def test_laplacian(ctx_factory): "+ sum_float32(m, D[m,k]*Gw(i,j,m))" ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), - lp.ArrayArg("D", dtype, shape=(n, n), order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("lap", dtype, shape=field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -140,10 +140,10 @@ def test_laplacian_lmem(ctx_factory): "+ sum_float32(m, D[m,k]*(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)))" ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), - lp.ArrayArg("D", dtype, shape=(n, n), order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("lap", dtype, shape=field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -215,10 +215,10 @@ def test_laplacian_lmem_ilp(ctx_factory): "+ sum_float32(m, D[m,k]*(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)))" ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), - lp.ArrayArg("D", dtype, shape=(n, n), order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("lap", dtype, shape=field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="semlap", assumptions="K>=1") @@ -313,14 +313,14 @@ def test_advect(ctx_factory): "Nw[e,i,j,k] = Vr(i,j,k)*wr(i,j,k)+Vs(i,j,k)*ws(i,j,k)+Vt(i,j,k)*wt(i,j,k)", ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("v", dtype, shape=field_shape, order=order), - lp.ArrayArg("w", dtype, shape=field_shape, order=order), - lp.ArrayArg("Nu", dtype, shape=field_shape, order=order), - lp.ArrayArg("Nv", dtype, shape=field_shape, order=order), - lp.ArrayArg("Nw", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(9,)+field_shape, order=order), - lp.ArrayArg("D", dtype, shape=(N, N), order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("v", dtype, shape=field_shape, order=order), + lp.GlobalArg("w", dtype, shape=field_shape, order=order), + lp.GlobalArg("Nu", dtype, shape=field_shape, order=order), + lp.GlobalArg("Nv", dtype, shape=field_shape, order=order), + lp.GlobalArg("Nw", dtype, shape=field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(9,)+field_shape, order=order), + lp.GlobalArg("D", dtype, shape=(N, N), order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -432,15 +432,15 @@ def test_advect_dealias(ctx_factory): ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("v", dtype, shape=field_shape, order=order), - lp.ArrayArg("w", dtype, shape=field_shape, order=order), - lp.ArrayArg("INu", dtype, shape=field_shape, order=order), - lp.ArrayArg("INv", dtype, shape=field_shape, order=order), - lp.ArrayArg("INw", dtype, shape=field_shape, order=order), - lp.ArrayArg("D", dtype, shape=(M,M), order=order), - lp.ArrayArg("I", dtype, shape=(M, N), order=order), - lp.ArrayArg("V", dtype, shape=(N, M), order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("v", dtype, shape=field_shape, order=order), + lp.GlobalArg("w", dtype, shape=field_shape, order=order), + lp.GlobalArg("INu", dtype, shape=field_shape, order=order), + lp.GlobalArg("INv", dtype, shape=field_shape, order=order), + lp.GlobalArg("INw", dtype, shape=field_shape, order=order), + lp.GlobalArg("D", dtype, shape=(M,M), order=order), + lp.GlobalArg("I", dtype, shape=(M, N), order=order), + lp.GlobalArg("V", dtype, shape=(N, M), order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="sem_advect", assumptions="K>=1") @@ -500,11 +500,11 @@ def test_interp_diff(ctx_factory): "Pu[ip,jp,kp,e] = sum_float32(i, V[ip,i]*Pu[i ,jp,kp,e])", ], [ - lp.ArrayArg("u", dtype, shape=field_shape, order=order), - lp.ArrayArg("P", dtype, shape=interim_field_shape, order=order), - lp.ArrayArg("I", dtype, shape=(M, N), order=order), - lp.ArrayArg("V", dtype, shape=(N, M), order=order), - lp.ArrayArg("Pu", dtype, shape=field_shape, order=order), + lp.GlobalArg("u", dtype, shape=field_shape, order=order), + lp.GlobalArg("P", dtype, shape=interim_field_shape, order=order), + lp.GlobalArg("I", dtype, shape=(M, N), order=order), + lp.GlobalArg("V", dtype, shape=(N, M), order=order), + lp.GlobalArg("Pu", dtype, shape=field_shape, order=order), lp.ScalarArg("K", np.int32, approximately=1000), ], name="sem_lap_precon", assumptions="K>=1") -- GitLab