From 28c0b84ae5d7fb0712141f6e2cf81dbe5345ba4a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 12:53:23 -0600 Subject: [PATCH 01/10] Fix broken fold nesting --- loopy/target/execution.py | 1 + loopy/target/pyopencl_execution.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 4d6c0378c..503158e1e 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -649,6 +649,7 @@ class ExecutionWrapperGeneratorBase(object): return gen.get_function() +# }}} # }}} diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 0da502fba..cc0b48a6a 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -76,6 +76,8 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen("") + # }}} + # {{{ handle allocation of unspecified arguements def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): -- GitLab From 729dec731e42a70c3dd4378593953e982d289592 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 12:53:49 -0600 Subject: [PATCH 02/10] Remove @memoize_method from get_typed_and_scheduled_kernel --- loopy/target/execution.py | 1 - 1 file changed, 1 deletion(-) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 503158e1e..fbacf11a7 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -729,7 +729,6 @@ class KernelExecutorBase(object): return kernel - @memoize_method def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): from loopy import CACHING_ENABLED -- GitLab From 929398414e86c38dfc49330f4463061910eacc17 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 12:54:17 -0600 Subject: [PATCH 03/10] Reinstate process_dtype into KernelExecutorBase.get_code --- loopy/target/execution.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index fbacf11a7..0304ec6f0 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -784,8 +784,18 @@ class KernelExecutorBase(object): return get_highlighted_code(code) def get_code(self, arg_to_dtype=None): + def process_dtype(dtype): + if isinstance(dtype, type) and issubclass(dtype, np.generic): + dtype = np.dtype(dtype) + if isinstance(dtype, np.dtype): + from loopy.types import NumpyType + dtype = NumpyType(dtype, self.kernel.target) + + return dtype + if arg_to_dtype is not None: - arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) + arg_to_dtype = frozenset( + (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) -- GitLab From b08c65f2745b97c09375207ecb2a44b5f3f7738a Mon Sep 17 00:00:00 2001 From: Nick Date: Mon, 11 Dec 2017 17:37:54 -0500 Subject: [PATCH 04/10] fix pickling / caching issue for ctypes.CDLL --- loopy/target/c/c_execution.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 36c4b769d..1345b58da 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -227,7 +227,6 @@ class CCompiler(object): """Build temporary filename path in tempdir.""" return os.path.join(self.tempdir, name) - @memoize_method def build(self, name, code, debug=False, wait_on_error=None, debug_recompile=True): """Compile code, build and load shared library.""" @@ -235,7 +234,7 @@ class CCompiler(object): c_fname = self._tempname('code.' + self.source_suffix) # build object - checksum, mod_name, ext_file, recompiled = \ + _, mod_name, ext_file, recompiled = \ compile_from_string(self.toolchain, name, code, c_fname, self.tempdir, debug, wait_on_error, debug_recompile, False) @@ -244,7 +243,7 @@ class CCompiler(object): logger.debug('Kernel {} compiled from source'.format(name)) # and return compiled - return checksum, ctypes.CDLL(ext_file) + return ctypes.CDLL(ext_file) class CPlusPlusCompiler(CCompiler): @@ -306,8 +305,7 @@ class CompiledCKernel(object): # get code and build self.code = dev_code self.comp = comp - self.checksum, self.dll = self.comp.build( - self.name, self.code) + self.dll = self.comp.build(self.name, self.code) # get the function declaration for interface with ctypes func_decl = IDIToCDLL(self.target) -- GitLab From 4179590ffb4c0edd711c52826ed990fb667c2838 Mon Sep 17 00:00:00 2001 From: Nick Date: Mon, 11 Dec 2017 17:43:22 -0500 Subject: [PATCH 05/10] incorrect sense --- loopy/target/c/c_execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 1345b58da..de8a0b780 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -239,7 +239,7 @@ class CCompiler(object): self.tempdir, debug, wait_on_error, debug_recompile, False) - if not recompiled: + if recompiled: logger.debug('Kernel {} compiled from source'.format(name)) # and return compiled -- GitLab From a74486964e17e782fff1e92e884c2b6e3318d14c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 17:03:15 -0600 Subject: [PATCH 06/10] Split test_preamble_with_separate_temporaries into separate file for picklability --- test/test_loopy.py | 105 ++++++--------------------------------------- test/testlib.py | 100 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 93 deletions(-) create mode 100644 test/testlib.py diff --git a/test/test_loopy.py b/test/test_loopy.py index 53fe1c150..e33412001 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2689,97 +2689,6 @@ def test_preamble_with_separate_temporaries(ctx_factory): from loopy.kernel.data import temp_var_scope as scopes # create a function mangler - func_name = 'indirect' - func_arg_dtypes = (np.int32, np.int32, np.int32) - func_result_dtypes = (np.int32,) - - def __indirectmangler(kernel, name, arg_dtypes): - """ - A function that will return a :class:`loopy.kernel.data.CallMangleInfo` - to interface with the calling :class:`loopy.LoopKernel` - """ - if name != func_name: - return None - - from loopy.types import to_loopy_type - from loopy.kernel.data import CallMangleInfo - - def __compare(d1, d2): - # compare dtypes ignoring atomic - return to_loopy_type(d1, for_atomic=True) == \ - to_loopy_type(d2, for_atomic=True) - - # check types - if len(arg_dtypes) != len(arg_dtypes): - raise Exception('Unexpected number of arguments provided to mangler ' - '{}, expected {}, got {}'.format( - func_name, len(func_arg_dtypes), len(arg_dtypes))) - - for i, (d1, d2) in enumerate(zip(func_arg_dtypes, arg_dtypes)): - if not __compare(d1, d2): - raise Exception('Argument at index {} for mangler {} does not ' - 'match expected dtype. Expected {}, got {}'. - format(i, func_name, str(d1), str(d2))) - - # get target for creation - target = arg_dtypes[0].target - return CallMangleInfo( - target_name=func_name, - result_dtypes=tuple(to_loopy_type(x, target=target) for x in - func_result_dtypes), - arg_dtypes=arg_dtypes) - - # create the preamble generator - def create_preamble(arr): - def __indirectpreamble(preamble_info): - # find a function matching our name - func_match = next( - (x for x in preamble_info.seen_functions - if x.name == func_name), None) - desc = 'custom_funcs_indirect' - if func_match is not None: - from loopy.types import to_loopy_type - # check types - if tuple(to_loopy_type(x) for x in func_arg_dtypes) == \ - func_match.arg_dtypes: - # if match, create our temporary - var = lp.TemporaryVariable( - 'lookup', initializer=arr, dtype=arr.dtype, shape=arr.shape, - scope=scopes.GLOBAL, read_only=True) - # and code - code = """ - int {name}(int start, int end, int match) - {{ - int result = start; - for (int i = start + 1; i < end; ++i) - {{ - if (lookup[i] == match) - result = i; - }} - return result; - }} - """.format(name=func_name) - - # generate temporary variable code - from cgen import Initializer - from loopy.target.c import generate_array_literal - codegen_state = preamble_info.codegen_state.copy( - is_generating_device_code=True) - kernel = preamble_info.kernel - ast_builder = codegen_state.ast_builder - target = kernel.target - decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) - decl = ast_builder.wrap_global_constant( - ast_builder.get_temporary_decl( - codegen_state, None, var, - decl_info)) - if var.initializer is not None: - decl = Initializer(decl, generate_array_literal( - codegen_state, var, var.initializer)) - # return generated code - yield (desc, '\n'.join([str(decl), code])) - return __indirectpreamble - # and finally create a test n = 10 # for each entry come up with a random number of data points @@ -2809,9 +2718,19 @@ def test_preamble_with_separate_temporaries(ctx_factory): lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)], ) # fixt params, and add manglers / preamble + from testlib import SeparateTemporariesPreambleTestHelper + preamble_with_sep_helper = SeparateTemporariesPreambleTestHelper( + func_name='indirect', + func_arg_dtypes=(np.int32, np.int32, np.int32), + func_result_dtypes=(np.int32,), + arr=lookup + ) + kernel = lp.fix_parameters(kernel, **{'n': n}) - kernel = lp.register_preamble_generators(kernel, [create_preamble(lookup)]) - kernel = lp.register_function_manglers(kernel, [__indirectmangler]) + kernel = lp.register_preamble_generators( + kernel, [preamble_with_sep_helper.preamble_gen]) + kernel = lp.register_function_manglers( + kernel, [preamble_with_sep_helper.mangler]) print(lp.generate_code(kernel)[0]) # and call (functionality unimportant, more that it compiles) diff --git a/test/testlib.py b/test/testlib.py new file mode 100644 index 000000000..0d32e0b51 --- /dev/null +++ b/test/testlib.py @@ -0,0 +1,100 @@ +import loopy as lp + +# {{{ test_preamble_with_separate_temporaries + +class SeparateTemporariesPreambleTestHelper: + def __init__(self, func_name, func_arg_dtypes, func_result_dtypes, arr): + self.func_name = func_name + self.func_arg_dtypes = func_arg_dtypes + self.func_result_dtypes = func_result_dtypes + self.arr = arr + + def mangler(self, kernel, name, arg_dtypes): + """ + A function that will return a :class:`loopy.kernel.data.CallMangleInfo` + to interface with the calling :class:`loopy.LoopKernel` + """ + if name != self.func_name: + return None + + from loopy.types import to_loopy_type + from loopy.kernel.data import CallMangleInfo + + def __compare(d1, d2): + # compare dtypes ignoring atomic + return to_loopy_type(d1, for_atomic=True) == \ + to_loopy_type(d2, for_atomic=True) + + # check types + if len(arg_dtypes) != len(arg_dtypes): + raise Exception('Unexpected number of arguments provided to mangler ' + '{}, expected {}, got {}'.format( + self.func_name, len(self.func_arg_dtypes), + len(arg_dtypes))) + + for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)): + if not __compare(d1, d2): + raise Exception('Argument at index {} for mangler {} does not ' + 'match expected dtype. Expected {}, got {}'. + format(i, self.func_name, str(d1), str(d2))) + + # get target for creation + target = arg_dtypes[0].target + return CallMangleInfo( + target_name=self.func_name, + result_dtypes=tuple(to_loopy_type(x, target=target) for x in + self.func_result_dtypes), + arg_dtypes=arg_dtypes) + + def preamble_gen(self, preamble_info): + from loopy.kernel.data import temp_var_scope as scopes + + # find a function matching our name + func_match = next( + (x for x in preamble_info.seen_functions + if x.name == self.func_name), None) + desc = 'custom_funcs_indirect' + if func_match is not None: + from loopy.types import to_loopy_type + # check types + if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \ + func_match.arg_dtypes: + # if match, create our temporary + var = lp.TemporaryVariable( + 'lookup', initializer=self.arr, dtype=self.arr.dtype, + shape=self.arr.shape, + scope=scopes.GLOBAL, read_only=True) + # and code + code = """ + int {name}(int start, int end, int match) + {{ + int result = start; + for (int i = start + 1; i < end; ++i) + {{ + if (lookup[i] == match) + result = i; + }} + return result; + }} + """.format(name=self.func_name) + + # generate temporary variable code + from cgen import Initializer + from loopy.target.c import generate_array_literal + codegen_state = preamble_info.codegen_state.copy( + is_generating_device_code=True) + kernel = preamble_info.kernel + ast_builder = codegen_state.ast_builder + target = kernel.target + decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) + decl = ast_builder.wrap_global_constant( + ast_builder.get_temporary_decl( + codegen_state, None, var, + decl_info)) + if var.initializer is not None: + decl = Initializer(decl, generate_array_literal( + codegen_state, var, var.initializer)) + # return generated code + yield (desc, '\n'.join([str(decl), code])) + +# }}} -- GitLab From 0225cce99b8ea10242d9cc031a67185df3c76c13 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 17:06:09 -0600 Subject: [PATCH 07/10] Flake8 fix --- test/testlib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/testlib.py b/test/testlib.py index 0d32e0b51..3fae05a38 100644 --- a/test/testlib.py +++ b/test/testlib.py @@ -1,5 +1,6 @@ import loopy as lp + # {{{ test_preamble_with_separate_temporaries class SeparateTemporariesPreambleTestHelper: -- GitLab From eae9953e14dcf93b928a24e47ffaa2624b8eddd2 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 17:21:27 -0600 Subject: [PATCH 08/10] Fix format C exec strings for Py2.6 --- loopy/target/c/c_execution.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index de8a0b780..c382e2deb 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -53,7 +53,7 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): def python_dtype_str(self, dtype): if np.dtype(str(dtype)).isbuiltin: return "_lpy_np."+dtype.name - raise Exception('dtype: {} not recognized'.format(dtype)) + raise Exception('dtype: {0} not recognized'.format(dtype)) # {{{ handle non numpy arguements @@ -240,7 +240,7 @@ class CCompiler(object): debug_recompile, False) if recompiled: - logger.debug('Kernel {} compiled from source'.format(name)) + logger.debug('Kernel {0} compiled from source'.format(name)) # and return compiled return ctypes.CDLL(ext_file) -- GitLab From d522316e339416cbe2e2fbfae9d9162832858516 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 17:22:06 -0600 Subject: [PATCH 09/10] Fix with-cache-twice CI script --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 62cbf57e1..7506a0a6b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,6 +94,7 @@ Python 3.6 POCL Twice With Cache: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python3.6 -- GitLab From e5cdbee5d33c4b592729a0ca2051ebdcceabb66c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 11 Dec 2017 17:49:19 -0600 Subject: [PATCH 10/10] Fix^2 with-cache-twice CI script --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7506a0a6b..c76590f62 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,6 +94,7 @@ Python 3.6 POCL Twice With Cache: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + - "cd .." - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: -- GitLab