diff --git a/examples/python/ispc-harness.py b/examples/python/ispc-stream-harness.py similarity index 62% rename from examples/python/ispc-harness.py rename to examples/python/ispc-stream-harness.py index f18bc1c4bb9e5cf6f82ed0506a96b0a3924194dc..199a4dd038b1b79e3364c9a0c053300c5407bff0 100644 --- a/examples/python/ispc-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -7,113 +7,8 @@ import os from time import time from tempfile import TemporaryDirectory - -# {{{ build_ispc_shared_lib - -def build_ispc_shared_lib( - cwd, ispc_sources, cxx_sources, - ispc_options=[], cxx_options=[], - ispc_bin="ispc", - cxx_bin="g++", - quiet=True): - from os.path import join - - ispc_source_names = [] - for name, contents in ispc_sources: - ispc_source_names.append(name) - - with open(join(cwd, name), "w") as srcf: - srcf.write(contents) - - cxx_source_names = [] - for name, contents in cxx_sources: - cxx_source_names.append(name) - - with open(join(cwd, name), "w") as srcf: - srcf.write(contents) - - from subprocess import check_call - - ispc_cmd = ([ispc_bin, - "--pic", - "-o", "ispc.o"] - + ispc_options - + list(ispc_source_names)) - if not quiet: - print(" ".join(ispc_cmd)) - - check_call(ispc_cmd, cwd=cwd) - - cxx_cmd = ([ - cxx_bin, - "-shared", "-Wl,--export-dynamic", - "-fPIC", - "-oshared.so", - "ispc.o", - ] - + cxx_options - + list(cxx_source_names)) - - check_call(cxx_cmd, cwd=cwd) - - if not quiet: - print(" ".join(cxx_cmd)) - -# }}} - - -# {{{ numpy address munging - -def address_from_numpy(obj): - ary_intf = getattr(obj, "__array_interface__", None) - if ary_intf is None: - raise RuntimeError("no array interface") - - buf_base, is_read_only = ary_intf["data"] - return buf_base + ary_intf.get("offset", 0) - - -def cptr_from_numpy(obj): - return ctypes.c_void_p(address_from_numpy(obj)) - - -# https://github.com/hgomersall/pyFFTW/blob/master/pyfftw/utils.pxi#L172 -def empty_aligned(shape, dtype, order='C', n=64): - '''empty_aligned(shape, dtype='float64', order='C', n=None) - Function that returns an empty numpy array that is n-byte aligned, - where ``n`` is determined by inspecting the CPU if it is not - provided. - The alignment is given by the final optional argument, ``n``. If - ``n`` is not provided then this function will inspect the CPU to - determine alignment. The rest of the arguments are as per - :func:`numpy.empty`. - ''' - itemsize = np.dtype(dtype).itemsize - - # Apparently there is an issue with numpy.prod wrapping around on 32-bits - # on Windows 64-bit. This shouldn't happen, but the following code - # alleviates the problem. - if not isinstance(shape, (int, np.integer)): - array_length = 1 - for each_dimension in shape: - array_length *= each_dimension - - else: - array_length = shape - - base_ary = np.empty(array_length*itemsize+n, dtype=np.int8) - - # We now need to know how to offset base_ary - # so it is correctly aligned - _array_aligned_offset = (n-address_from_numpy(base_ary)) % n - - array = np.frombuffer( - base_ary[_array_aligned_offset:_array_aligned_offset-n].data, - dtype=dtype).reshape(shape, order=order) - - return array - -# }}} +from loopy.tools import (empty_aligned, address_from_numpy, + build_ispc_shared_lib, cptr_from_numpy) def transform(knl, vars, stream_dtype): @@ -219,6 +114,7 @@ def main(): #"-g", "--no-omit-frame-pointer", "--target=avx2-i32x8", "--opt=force-aligned-memory", + "--opt=disable-loop-unroll", #"--opt=fast-math", #"--opt=disable-fma", ] diff --git a/examples/python/run-ispc-harness.sh b/examples/python/run-ispc-harness.sh index dfed8c221058f7b89e0239221bcb22761547154f..f39a51fefee478b8023ded65a2ce3c8de018c852 100755 --- a/examples/python/run-ispc-harness.sh +++ b/examples/python/run-ispc-harness.sh @@ -1,3 +1,3 @@ -#! /bin/sh +#! /bin/bash -OMP_PLACES=cores OMP_DISPLAY_ENV=true OMP_SCHEDULE=static python ispc-harness.py +OMP_PLACES=cores OMP_DISPLAY_ENV=true OMP_SCHEDULE=static python "$@" diff --git a/loopy/tools.py b/loopy/tools.py index 55b177bda4e6be03a985286fd4faf6322e257824..777532e7af92bc62e6878f564c8c5545f4cb2c4a 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -233,6 +233,119 @@ def remove_common_indentation(code, require_leading_newline=True, # }}} +# {{{ build_ispc_shared_lib + +# DO NOT RELY ON THESE: THEY WILL GO AWAY + +def build_ispc_shared_lib( + cwd, ispc_sources, cxx_sources, + ispc_options=[], cxx_options=[], + ispc_bin="ispc", + cxx_bin="g++", + quiet=True): + from os.path import join + + ispc_source_names = [] + for name, contents in ispc_sources: + ispc_source_names.append(name) + + with open(join(cwd, name), "w") as srcf: + srcf.write(contents) + + cxx_source_names = [] + for name, contents in cxx_sources: + cxx_source_names.append(name) + + with open(join(cwd, name), "w") as srcf: + srcf.write(contents) + + from subprocess import check_call + + ispc_cmd = ([ispc_bin, + "--pic", + "-o", "ispc.o"] + + ispc_options + + list(ispc_source_names)) + if not quiet: + print(" ".join(ispc_cmd)) + + check_call(ispc_cmd, cwd=cwd) + + cxx_cmd = ([ + cxx_bin, + "-shared", "-Wl,--export-dynamic", + "-fPIC", + "-oshared.so", + "ispc.o", + ] + + cxx_options + + list(cxx_source_names)) + + check_call(cxx_cmd, cwd=cwd) + + if not quiet: + print(" ".join(cxx_cmd)) + +# }}} + + +# {{{ numpy address munging + +# DO NOT RELY ON THESE: THEY WILL GO AWAY + +def address_from_numpy(obj): + ary_intf = getattr(obj, "__array_interface__", None) + if ary_intf is None: + raise RuntimeError("no array interface") + + buf_base, is_read_only = ary_intf["data"] + return buf_base + ary_intf.get("offset", 0) + + +def cptr_from_numpy(obj): + import ctypes + return ctypes.c_void_p(address_from_numpy(obj)) + + +# https://github.com/hgomersall/pyFFTW/blob/master/pyfftw/utils.pxi#L172 +def empty_aligned(shape, dtype, order='C', n=64): + '''empty_aligned(shape, dtype='float64', order='C', n=None) + Function that returns an empty numpy array that is n-byte aligned, + where ``n`` is determined by inspecting the CPU if it is not + provided. + The alignment is given by the final optional argument, ``n``. If + ``n`` is not provided then this function will inspect the CPU to + determine alignment. The rest of the arguments are as per + :func:`numpy.empty`. + ''' + itemsize = np.dtype(dtype).itemsize + + # Apparently there is an issue with numpy.prod wrapping around on 32-bits + # on Windows 64-bit. This shouldn't happen, but the following code + # alleviates the problem. + if not isinstance(shape, (int, np.integer)): + array_length = 1 + for each_dimension in shape: + array_length *= each_dimension + + else: + array_length = shape + + base_ary = np.empty(array_length*itemsize+n, dtype=np.int8) + + # We now need to know how to offset base_ary + # so it is correctly aligned + _array_aligned_offset = (n-address_from_numpy(base_ary)) % n + + array = np.frombuffer( + base_ary[_array_aligned_offset:_array_aligned_offset-n].data, + dtype=dtype).reshape(shape, order=order) + + return array + +# }}} + + def is_interned(s): return s is None or intern(s) is s