diff --git a/examples/python/ispc-harness.py b/examples/python/ispc-harness.py index 7b29340cf2ea73b3afb726da6fb81799ab3c5c2d..f18bc1c4bb9e5cf6f82ed0506a96b0a3924194dc 100644 --- a/examples/python/ispc-harness.py +++ b/examples/python/ispc-harness.py @@ -62,6 +62,8 @@ def build_ispc_shared_lib( # }}} +# {{{ numpy address munging + def address_from_numpy(obj): ary_intf = getattr(obj, "__array_interface__", None) if ary_intf is None: @@ -111,38 +113,101 @@ def empty_aligned(shape, dtype, order='C', n=64): return array +# }}} + + +def transform(knl, vars, stream_dtype): + vars = [v.strip() for v in vars.split(",")] + knl = lp.assume(knl, "n>0") + knl = lp.split_iname( + knl, "i", 2**18, outer_tag="g.0", slabs=(0, 1)) + knl = lp.split_iname(knl, "i_inner", 8, inner_tag="l.0") + + knl = lp.add_and_infer_dtypes(knl, { + var: stream_dtype + for var in vars + }) + + knl = lp.set_argument_order(knl, vars + ["n"]) + + return knl + + +def gen_code(knl): + knl = lp.preprocess_kernel(knl) + knl = lp.get_one_scheduled_kernel(knl) + ispc_code, arg_info = lp.generate_code(knl) + + return ispc_code + + +NRUNS = 10 +ALIGN_TO = 4096 +ARRAY_SIZE = 2**28 + +if 0: + STREAM_DTYPE = np.float64 + STREAM_CTYPE = ctypes.c_double +else: + STREAM_DTYPE = np.float32 + STREAM_CTYPE = ctypes.c_float + +if 1: + INDEX_DTYPE = np.int32 + INDEX_CTYPE = ctypes.c_int +else: + INDEX_DTYPE = np.int64 + INDEX_CTYPE = ctypes.c_longlong + def main(): with open("tasksys.cpp", "r") as ts_file: tasksys_source = ts_file.read() - stream_dtype = np.float64 - stream_ctype = ctypes.c_double - index_dtype = np.int32 - - from loopy.target.ispc import ISPCTarget - stream_knl = lp.make_kernel( - "{[i]: 0<=i0") - stream_knl = lp.split_iname(stream_knl, - "i", 2**18, outer_tag="g.0", slabs=(0, 1)) - stream_knl = lp.split_iname(stream_knl, "i_inner", 8, inner_tag="l.0") - stream_knl = lp.preprocess_kernel(stream_knl) - stream_knl = lp.get_one_scheduled_kernel(stream_knl) - stream_knl = lp.set_argument_order(stream_knl, "n,a,x,y,z") - ispc_code, arg_info = lp.generate_code(stream_knl) + if 0: + from loopy.target.ispc import ISPCTarget + stream_knl = lp.make_kernel( + "{[i]: 0<=i0") + stream_knl = lp.split_iname(stream_knl, + "i", 2**18, outer_tag="g.0", slabs=(0, 1)) + stream_knl = lp.split_iname(stream_knl, "i_inner", 8, inner_tag="l.0") + stream_knl = lp.preprocess_kernel(stream_knl) + stream_knl = lp.get_one_scheduled_kernel(stream_knl) + stream_knl = lp.set_argument_order(stream_knl, "n,a,x,y,z") + ispc_code, arg_info = lp.generate_code(stream_knl) + + def make_knl(name, insn, vars): + knl = lp.make_kernel( + "{[i]: 0<=i