diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 62cbf57e18d8e4ae11525699916b8719a4747546..c76590f62d4383d611e8afb40fdaddf21b148f07 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -94,6 +94,8 @@ Python 3.6 POCL Twice With Cache: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + - "cd .." + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python3.6 diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 36c4b769d02a75df056757b6240103108377f8b0..c382e2debc8986de43355b34f9114a65bbbaf881 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -53,7 +53,7 @@ class CExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): def python_dtype_str(self, dtype): if np.dtype(str(dtype)).isbuiltin: return "_lpy_np."+dtype.name - raise Exception('dtype: {} not recognized'.format(dtype)) + raise Exception('dtype: {0} not recognized'.format(dtype)) # {{{ handle non numpy arguements @@ -227,7 +227,6 @@ class CCompiler(object): """Build temporary filename path in tempdir.""" return os.path.join(self.tempdir, name) - @memoize_method def build(self, name, code, debug=False, wait_on_error=None, debug_recompile=True): """Compile code, build and load shared library.""" @@ -235,16 +234,16 @@ class CCompiler(object): c_fname = self._tempname('code.' + self.source_suffix) # build object - checksum, mod_name, ext_file, recompiled = \ + _, mod_name, ext_file, recompiled = \ compile_from_string(self.toolchain, name, code, c_fname, self.tempdir, debug, wait_on_error, debug_recompile, False) - if not recompiled: - logger.debug('Kernel {} compiled from source'.format(name)) + if recompiled: + logger.debug('Kernel {0} compiled from source'.format(name)) # and return compiled - return checksum, ctypes.CDLL(ext_file) + return ctypes.CDLL(ext_file) class CPlusPlusCompiler(CCompiler): @@ -306,8 +305,7 @@ class CompiledCKernel(object): # get code and build self.code = dev_code self.comp = comp - self.checksum, self.dll = self.comp.build( - self.name, self.code) + self.dll = self.comp.build(self.name, self.code) # get the function declaration for interface with ctypes func_decl = IDIToCDLL(self.target) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 4d6c0378cd6322e21909514a1d3c7d9d4cbe96de..0304ec6f09eb2b014bb01a7b30889e24910e0dd9 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -649,6 +649,7 @@ class ExecutionWrapperGeneratorBase(object): return gen.get_function() +# }}} # }}} @@ -728,7 +729,6 @@ class KernelExecutorBase(object): return kernel - @memoize_method def get_typed_and_scheduled_kernel(self, arg_to_dtype_set): from loopy import CACHING_ENABLED @@ -784,8 +784,18 @@ class KernelExecutorBase(object): return get_highlighted_code(code) def get_code(self, arg_to_dtype=None): + def process_dtype(dtype): + if isinstance(dtype, type) and issubclass(dtype, np.generic): + dtype = np.dtype(dtype) + if isinstance(dtype, np.dtype): + from loopy.types import NumpyType + dtype = NumpyType(dtype, self.kernel.target) + + return dtype + if arg_to_dtype is not None: - arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) + arg_to_dtype = frozenset( + (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 0da502fbab8aa45ed58a0491e0f43323ecf9aeff..cc0b48a6ac17e23f318c5489d45fca6710bb3392 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -76,6 +76,8 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen("") + # }}} + # {{{ handle allocation of unspecified arguements def handle_alloc(self, gen, arg, kernel_arg, strify, skip_arg_checks): diff --git a/test/test_loopy.py b/test/test_loopy.py index 53fe1c1504db3df6f5d5a30f8f3a20a7e997f43c..e33412001573a56ecf008b26e5849ba6c457dbeb 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2689,97 +2689,6 @@ def test_preamble_with_separate_temporaries(ctx_factory): from loopy.kernel.data import temp_var_scope as scopes # create a function mangler - func_name = 'indirect' - func_arg_dtypes = (np.int32, np.int32, np.int32) - func_result_dtypes = (np.int32,) - - def __indirectmangler(kernel, name, arg_dtypes): - """ - A function that will return a :class:`loopy.kernel.data.CallMangleInfo` - to interface with the calling :class:`loopy.LoopKernel` - """ - if name != func_name: - return None - - from loopy.types import to_loopy_type - from loopy.kernel.data import CallMangleInfo - - def __compare(d1, d2): - # compare dtypes ignoring atomic - return to_loopy_type(d1, for_atomic=True) == \ - to_loopy_type(d2, for_atomic=True) - - # check types - if len(arg_dtypes) != len(arg_dtypes): - raise Exception('Unexpected number of arguments provided to mangler ' - '{}, expected {}, got {}'.format( - func_name, len(func_arg_dtypes), len(arg_dtypes))) - - for i, (d1, d2) in enumerate(zip(func_arg_dtypes, arg_dtypes)): - if not __compare(d1, d2): - raise Exception('Argument at index {} for mangler {} does not ' - 'match expected dtype. Expected {}, got {}'. - format(i, func_name, str(d1), str(d2))) - - # get target for creation - target = arg_dtypes[0].target - return CallMangleInfo( - target_name=func_name, - result_dtypes=tuple(to_loopy_type(x, target=target) for x in - func_result_dtypes), - arg_dtypes=arg_dtypes) - - # create the preamble generator - def create_preamble(arr): - def __indirectpreamble(preamble_info): - # find a function matching our name - func_match = next( - (x for x in preamble_info.seen_functions - if x.name == func_name), None) - desc = 'custom_funcs_indirect' - if func_match is not None: - from loopy.types import to_loopy_type - # check types - if tuple(to_loopy_type(x) for x in func_arg_dtypes) == \ - func_match.arg_dtypes: - # if match, create our temporary - var = lp.TemporaryVariable( - 'lookup', initializer=arr, dtype=arr.dtype, shape=arr.shape, - scope=scopes.GLOBAL, read_only=True) - # and code - code = """ - int {name}(int start, int end, int match) - {{ - int result = start; - for (int i = start + 1; i < end; ++i) - {{ - if (lookup[i] == match) - result = i; - }} - return result; - }} - """.format(name=func_name) - - # generate temporary variable code - from cgen import Initializer - from loopy.target.c import generate_array_literal - codegen_state = preamble_info.codegen_state.copy( - is_generating_device_code=True) - kernel = preamble_info.kernel - ast_builder = codegen_state.ast_builder - target = kernel.target - decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) - decl = ast_builder.wrap_global_constant( - ast_builder.get_temporary_decl( - codegen_state, None, var, - decl_info)) - if var.initializer is not None: - decl = Initializer(decl, generate_array_literal( - codegen_state, var, var.initializer)) - # return generated code - yield (desc, '\n'.join([str(decl), code])) - return __indirectpreamble - # and finally create a test n = 10 # for each entry come up with a random number of data points @@ -2809,9 +2718,19 @@ def test_preamble_with_separate_temporaries(ctx_factory): lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)], ) # fixt params, and add manglers / preamble + from testlib import SeparateTemporariesPreambleTestHelper + preamble_with_sep_helper = SeparateTemporariesPreambleTestHelper( + func_name='indirect', + func_arg_dtypes=(np.int32, np.int32, np.int32), + func_result_dtypes=(np.int32,), + arr=lookup + ) + kernel = lp.fix_parameters(kernel, **{'n': n}) - kernel = lp.register_preamble_generators(kernel, [create_preamble(lookup)]) - kernel = lp.register_function_manglers(kernel, [__indirectmangler]) + kernel = lp.register_preamble_generators( + kernel, [preamble_with_sep_helper.preamble_gen]) + kernel = lp.register_function_manglers( + kernel, [preamble_with_sep_helper.mangler]) print(lp.generate_code(kernel)[0]) # and call (functionality unimportant, more that it compiles) diff --git a/test/testlib.py b/test/testlib.py new file mode 100644 index 0000000000000000000000000000000000000000..3fae05a38ad0f0c414f42a182e36ed26c5b50da5 --- /dev/null +++ b/test/testlib.py @@ -0,0 +1,101 @@ +import loopy as lp + + +# {{{ test_preamble_with_separate_temporaries + +class SeparateTemporariesPreambleTestHelper: + def __init__(self, func_name, func_arg_dtypes, func_result_dtypes, arr): + self.func_name = func_name + self.func_arg_dtypes = func_arg_dtypes + self.func_result_dtypes = func_result_dtypes + self.arr = arr + + def mangler(self, kernel, name, arg_dtypes): + """ + A function that will return a :class:`loopy.kernel.data.CallMangleInfo` + to interface with the calling :class:`loopy.LoopKernel` + """ + if name != self.func_name: + return None + + from loopy.types import to_loopy_type + from loopy.kernel.data import CallMangleInfo + + def __compare(d1, d2): + # compare dtypes ignoring atomic + return to_loopy_type(d1, for_atomic=True) == \ + to_loopy_type(d2, for_atomic=True) + + # check types + if len(arg_dtypes) != len(arg_dtypes): + raise Exception('Unexpected number of arguments provided to mangler ' + '{}, expected {}, got {}'.format( + self.func_name, len(self.func_arg_dtypes), + len(arg_dtypes))) + + for i, (d1, d2) in enumerate(zip(self.func_arg_dtypes, arg_dtypes)): + if not __compare(d1, d2): + raise Exception('Argument at index {} for mangler {} does not ' + 'match expected dtype. Expected {}, got {}'. + format(i, self.func_name, str(d1), str(d2))) + + # get target for creation + target = arg_dtypes[0].target + return CallMangleInfo( + target_name=self.func_name, + result_dtypes=tuple(to_loopy_type(x, target=target) for x in + self.func_result_dtypes), + arg_dtypes=arg_dtypes) + + def preamble_gen(self, preamble_info): + from loopy.kernel.data import temp_var_scope as scopes + + # find a function matching our name + func_match = next( + (x for x in preamble_info.seen_functions + if x.name == self.func_name), None) + desc = 'custom_funcs_indirect' + if func_match is not None: + from loopy.types import to_loopy_type + # check types + if tuple(to_loopy_type(x) for x in self.func_arg_dtypes) == \ + func_match.arg_dtypes: + # if match, create our temporary + var = lp.TemporaryVariable( + 'lookup', initializer=self.arr, dtype=self.arr.dtype, + shape=self.arr.shape, + scope=scopes.GLOBAL, read_only=True) + # and code + code = """ + int {name}(int start, int end, int match) + {{ + int result = start; + for (int i = start + 1; i < end; ++i) + {{ + if (lookup[i] == match) + result = i; + }} + return result; + }} + """.format(name=self.func_name) + + # generate temporary variable code + from cgen import Initializer + from loopy.target.c import generate_array_literal + codegen_state = preamble_info.codegen_state.copy( + is_generating_device_code=True) + kernel = preamble_info.kernel + ast_builder = codegen_state.ast_builder + target = kernel.target + decl_info, = var.decl_info(target, index_dtype=kernel.index_dtype) + decl = ast_builder.wrap_global_constant( + ast_builder.get_temporary_decl( + codegen_state, None, var, + decl_info)) + if var.initializer is not None: + decl = Initializer(decl, generate_array_literal( + codegen_state, var, var.initializer)) + # return generated code + yield (desc, '\n'.join([str(decl), code])) + +# }}}