diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a6f81e5b6e19af6362b51aec26f277a8d83bb829..c11e507ee79cdc6f1567acbf6c12bbd7ed22f1cc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,38 +17,36 @@ Python 2.7 POCL: junit: test/pytest.xml -Python 2.7 with legacy PyOpenCL: +Python 3 POCL: script: - - export PY_EXE=python2.7 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="pybind11 numpy mako" - - export REQUIREMENTS_TXT="requirements-old-pyopencl.txt" - export LOOPY_NO_CACHE=1 - - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python2.7 + - python3 - pocl except: - tags - retry: 2 artifacts: reports: junit: test/pytest.xml - -Python 3 POCL: +Python 3 Intel: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=intel - export EXTRA_INSTALL="pybind11 numpy mako" - export LOOPY_NO_CACHE=1 + - export LOOPY_INTEL_CL_OK_FOR_TEST_REF=1 + - source /opt/enable-intel-cl.sh - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python3 - - pocl + - intel-cl-cpu except: - tags artifacts: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6e0979a776b49556b69407b9d0d3717ca0d7761c..0dfb2455568b275b40e699683071da3a1cd2f483 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -97,9 +97,18 @@ jobs: - script: | set -e - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + sed 's/python=3/python=3.7/' .test-conda-env-py3.yml > .test-conda-env.yml + CONDA_ENVIRONMENT=.test-conda-env.yml USE_CONDA_BUILD=1 curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh . ./prepare-and-run-pylint.sh loopy test/test_*.py displayName: 'Pylint' + +schedules: +- + cron: "0 0 * * 0" + displayName: Weekly build + branches: + include: + - master diff --git a/loopy/__init__.py b/loopy/__init__.py index d69a57bf1a5435adfb067df5cfb2080633cac765..b60de6e2dcd35c1c167bf5e303401f2c6242ebec 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -142,7 +142,7 @@ from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, parse_fortran) from loopy.target import TargetBase, ASTBuilderBase -from loopy.target.c import CTarget, ExecutableCTarget, generate_header +from loopy.target.c import CFamilyTarget, CTarget, ExecutableCTarget, generate_header from loopy.target.cuda import CudaTarget from loopy.target.opencl import OpenCLTarget from loopy.target.pyopencl import PyOpenCLTarget @@ -271,7 +271,7 @@ __all__ = [ "LoopyError", "LoopyWarning", "TargetBase", - "CTarget", "ExecutableCTarget", "generate_header", + "CFamilyTarget", "CTarget", "ExecutableCTarget", "generate_header", "CudaTarget", "OpenCLTarget", "PyOpenCLTarget", "ISPCTarget", "NumbaTarget", "NumbaCudaTarget", @@ -473,7 +473,8 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None): ) result = make_kernel(set_str, "output[%s] = input[%s]" - % (commad_indices, commad_indices)) + % (commad_indices, commad_indices), + lang_version=MOST_RECENT_LANGUAGE_VERSION) result = tag_array_axes(result, "input", old_dim_tags) result = tag_array_axes(result, "output", new_dim_tags) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 66b3cb0bde00dee3ae46fcf63052219cc3664360..6837b99a026debf32b12aceef00ed3863c620639 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -23,9 +23,11 @@ THE SOFTWARE. """ from six.moves import range, zip -from pytools import Record +import os from warnings import warn +from pytools import Record + import numpy as np import loopy as lp @@ -321,7 +323,7 @@ def _default_check_result(result, ref_result): # {{{ find device for reference test -def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): +def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors, need_image_support): import pyopencl as cl noncpu_devs = [] @@ -336,8 +338,17 @@ def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): for bl in blacklist_ref_vendors): continue + if need_image_support: + if not dev.image_support: + continue + if pf.vendor == "The pocl project": + # Hahaha, no. + continue + if dev.type & cl.device_type.CPU: - if "Intel" in dev.platform.vendor: + if ("Intel" in dev.platform.vendor + and os.environ.get("LOOPY_INTEL_CL_OK_FOR_TEST_REF") + is None): # Sorry, Intel, your CPU CL has gotten too crashy of late. # (Feb 2016) continue @@ -427,7 +438,12 @@ def auto_test_vs_ref( ref_errors = [] - for dev in _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): + from loopy.kernel.data import ImageArg + need_ref_image_support = any(isinstance(arg, ImageArg) for arg in ref_knl.args) + + for dev in _enumerate_cl_devices_for_ref_test( + blacklist_ref_vendors, need_ref_image_support): + ref_ctx = cl.Context([dev]) ref_queue = cl.CommandQueue(ref_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) diff --git a/loopy/check.py b/loopy/check.py index 81b384a38a18f92c76fda9695b8d340c6f2dcc17..cc87ad9872668bf5323aefd79944e3bbd71b1153 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -362,8 +362,9 @@ class _AccessCheckMapper(WalkMapper): if not access_range.is_subset(shape_domain): raise LoopyError("'%s' in instruction '%s' " - "accesses out-of-bounds array element" - % (expr, self.insn_id)) + "accesses out-of-bounds array element (could not" + " establish '%s' is a subset of '%s')." + % (expr, self.insn_id, access_range, shape_domain)) def check_bounds(kernel): diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 58f055b7b5042ff28f7bf9674b0e7dc5ff1b6269..128e4fbc85a2a03e25da3f88b200e67eb41756d3 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -364,8 +364,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): # {{{ find bounds - aligned_domain = isl.align_spaces(domain, slab, across_dim_types=True, - obj_bigger_ok=True) + aligned_domain = isl.align_spaces(domain, slab, obj_bigger_ok=True) dom_and_slab = aligned_domain & slab @@ -389,8 +388,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): impl_domain = isl.align_spaces( codegen_state.implemented_domain, dom_and_slab, - obj_bigger_ok=True, - across_dim_types=True + obj_bigger_ok=True ).params() lbound = ( diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 42d7c0f1e2062f84ee171c8c6274a0aa74601a4a..80a7ad03101bc67f39c89c6089aa6533d1886185 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -633,7 +633,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): result = dom else: aligned_dom, aligned_result = isl.align_two( - dom, result, across_dim_types=True) + dom, result) result = aligned_result & aligned_dom return result diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 9cead8db8c9d3093ed3f179e1790d765fc4c07d2..3735b2d510aef1ada1643bce9ff07e797bfb210c 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -619,7 +619,7 @@ class ArrayBase(ImmutableRecord): If an integer N is given, the array would be declared with ``__attribute__((aligned(N)))`` in code generation for - :class:`loopy.CTarget`. + :class:`loopy.CFamilyTarget`. .. versionadded:: 2018.1 diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 2f32d94f662fe59d23ffb06dd13ade4a057304aa..157099df5a2133baa109f24e8216d63577b5dcb4 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1868,7 +1868,7 @@ def infer_arg_is_output_only(kernel): from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg new_args = [] for arg in kernel.args: - if isinstance(arg, (ArrayArg, ImageArg, ValueArg)): + if isinstance(arg, ArrayArg): if arg.is_output_only is not None: assert isinstance(arg.is_output_only, bool) new_args.append(arg) @@ -1877,7 +1877,7 @@ def infer_arg_is_output_only(kernel): new_args.append(arg.copy(is_output_only=True)) else: new_args.append(arg.copy(is_output_only=False)) - elif isinstance(arg, ConstantArg): + elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): new_args.append(arg) else: raise NotImplementedError("Unkonwn argument type %s." % type(arg)) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 2658b8cd743d335323dab7dd9aebd82ef5830652..53d05a28e7245e381be769af12d6066ffb486541 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -448,8 +448,8 @@ def parse_reduction_op(name): def reduction_function_mangler(kernel, func_id, arg_dtypes): if isinstance(func_id, ArgExtOp): - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): + from loopy.target.opencl import CFamilyTarget + if not isinstance(kernel.target, CFamilyTarget): raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op @@ -470,8 +470,8 @@ def reduction_function_mangler(kernel, func_id, arg_dtypes): ) elif isinstance(func_id, SegmentedOp): - from loopy.target.opencl import CTarget - if not isinstance(kernel.target, CTarget): + from loopy.target.opencl import CFamilyTarget + if not isinstance(kernel.target, CFamilyTarget): raise LoopyError("%s: only C-like targets supported for now" % func_id) op = func_id.reduction_op diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2afcd3db4331d57e1e61c48ba521ebaa296ddbb2..c0eb91ea60317ef8cad1c594571d46bba2d1a671 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -376,18 +376,15 @@ def _check_reduction_is_triangular(kernel, expr, scan_param): sweep_lower_bound = isl.align_spaces( scan_param.sweep_lower_bound, - affs[0], - across_dim_types=True) + affs[0]) sweep_upper_bound = isl.align_spaces( scan_param.sweep_upper_bound, - affs[0], - across_dim_types=True) + affs[0]) scan_lower_bound = isl.align_spaces( scan_param.scan_lower_bound, - affs[0], - across_dim_types=True) + affs[0]) from itertools import product @@ -799,7 +796,7 @@ def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel): TemporaryVariable( name=new_assignee_name, dtype=None, - scope=AddressSpace.PRIVATE)) + address_space=AddressSpace.PRIVATE)) from pymbolic import var new_assignee = var(new_assignee_name) @@ -985,7 +982,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, name=name, shape=(), dtype=None, - scope=AddressSpace.PRIVATE) + address_space=AddressSpace.PRIVATE) from pymbolic import var temp_vars = tuple(var(n) for n in temp_var_names) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8f5337533fd9e96c77b56154c1848f5ac419b425..ccac5e199d2b53e202dd735ffd8dfe20a7dc29a2 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -247,7 +247,7 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase): """ def __init__(self): - super(EqualityPreservingStringifyMapper, self).__init__(constant_mapper=repr) + super(EqualityPreservingStringifyMapper, self).__init__() def map_constant(self, expr, enclosing_prec): if isinstance(expr, np.generic): @@ -257,8 +257,15 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase): return "%s(%s)" % (type(expr).__name__, repr(expr)) else: - return super(EqualityPreservingStringifyMapper, self).map_constant( - expr, enclosing_prec) + result = repr(expr) + + from pymbolic.mapper.stringifier import PREC_SUM + if not (result.startswith("(") and result.endswith(")")) \ + and ("-" in result or "+" in result) \ + and (enclosing_prec > PREC_SUM): + return self.parenthesize(result) + else: + return result class UnidirectionalUnifier(UnidirectionalUnifierBase): diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index a81354e2fd7b52ba514af936441c7a2d980c77b5..73d2a6328af87cb51fb90d43efcde34d39aa8299 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -31,6 +31,7 @@ __doc__ = """ .. autoclass:: TargetBase .. autoclass:: ASTBuilderBase +.. autoclass:: CFamilyTarget .. autoclass:: CTarget .. autoclass:: ExecutableCTarget .. autoclass:: CudaTarget diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 25b190809fdc38341c811ede15a8baae693a3116..6e3602eda11d5f65e8a6af2977966e946c72a718 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -77,6 +77,11 @@ class DTypeRegistryWrapper(object): # {{{ preamble generator +def c99_preamble_generator(preamble_info): + if any(dtype.is_integral() for dtype in preamble_info.seen_dtypes): + yield("10_stdint", "#include <stdint.h>") + + def _preamble_generator(preamble_info): integer_type_names = ["int8", "int16", "int32", "int64"] @@ -314,8 +319,10 @@ class CExpression(object): # }}} -class CTarget(TargetBase): - """A target for plain "C", without any parallel extensions. +class CFamilyTarget(TargetBase): + """A target for "least-common denominator C", without any parallel + extensions, and without use of any C99 specifics. Intended to be + usable as a common base for C99, C++, OpenCL, CUDA, and the like. """ hash_fields = TargetBase.hash_fields + ("fortran_abi",) @@ -323,7 +330,7 @@ class CTarget(TargetBase): def __init__(self, fortran_abi=False): self.fortran_abi = fortran_abi - super(CTarget, self).__init__() + super(CFamilyTarget, self).__init__() def split_kernel_at_global_barriers(self): return False @@ -332,7 +339,7 @@ class CTarget(TargetBase): return DummyHostASTBuilder(self) def get_device_ast_builder(self): - return CASTBuilder(self) + return CFamilyASTBuilder(self) # {{{ types @@ -368,29 +375,6 @@ class CTarget(TargetBase): # }}} -# {{{ executable c target - -class ExecutableCTarget(CTarget): - """ - An executable CTarget that uses (by default) JIT compilation of C-code - """ - - def __init__(self, compiler=None, fortran_abi=False): - super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi) - from loopy.target.c.c_execution import CCompiler - self.compiler = compiler or CCompiler() - - def get_kernel_executor(self, knl, *args, **kwargs): - from loopy.target.c.c_execution import CKernelExecutor - return CKernelExecutor(knl, compiler=self.compiler) - - def get_host_ast_builder(self): - # enable host code generation - return CASTBuilder(self) - -# }}} - - class _ConstRestrictPointer(Pointer): def get_decl_pair(self): sub_tp, sub_decl = self.subdecl.get_decl_pair() @@ -484,24 +468,24 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): # }}} -class CASTBuilder(ASTBuilderBase): +class CFamilyASTBuilder(ASTBuilderBase): # {{{ library def function_manglers(self): return ( - super(CASTBuilder, self).function_manglers() + [ + super(CFamilyASTBuilder, self).function_manglers() + [ c_math_mangler ]) def symbol_manglers(self): return ( - super(CASTBuilder, self).symbol_manglers() + [ + super(CFamilyASTBuilder, self).symbol_manglers() + [ c_symbol_mangler ]) def preamble_generators(self): return ( - super(CASTBuilder, self).preamble_generators() + [ + super(CFamilyASTBuilder, self).preamble_generators() + [ _preamble_generator, ]) @@ -1064,7 +1048,7 @@ def generate_header(kernel, codegen_result=None): functions. """ - if not isinstance(kernel.target, CTarget): + if not isinstance(kernel.target, CFamilyTarget): raise LoopyError( 'Header generation for non C-based languages are not implemented') @@ -1080,4 +1064,57 @@ def generate_header(kernel, codegen_result=None): # }}} + +# {{{ C99 target + +class CTarget(CFamilyTarget): + """This target may emit code using all features of C99. + For a target base supporting "least-common-denominator" C, + see :class:`CFamilyTarget`. + """ + + def get_device_ast_builder(self): + return CASTBuilder(self) + + @memoize_method + def get_dtype_registry(self): + from loopy.target.c.compyte.dtypes import ( + DTypeRegistry, fill_registry_with_c99_stdint_types) + result = DTypeRegistry() + fill_registry_with_c99_stdint_types(result) + return DTypeRegistryWrapper(result) + + +class CASTBuilder(CFamilyASTBuilder): + def preamble_generators(self): + return ( + super(CASTBuilder, self).preamble_generators() + [ + c99_preamble_generator, + ]) + +# }}} + + +# {{{ executable c target + +class ExecutableCTarget(CTarget): + """ + An executable CFamilyTarget that uses (by default) JIT compilation of C-code + """ + + def __init__(self, compiler=None, fortran_abi=False): + super(ExecutableCTarget, self).__init__(fortran_abi=fortran_abi) + from loopy.target.c.c_execution import CCompiler + self.compiler = compiler or CCompiler() + + def get_kernel_executor(self, knl, *args, **kwargs): + from loopy.target.c.c_execution import CKernelExecutor + return CKernelExecutor(knl, compiler=self.compiler) + + def get_host_ast_builder(self): + # enable host code generation + return CFamilyASTBuilder(self) + +# }}} + # vim: foldmethod=marker diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 60947c7f77d09582868304ded121386bbb3aab68..698507978f7c20d6d594fd3e03626e7b12012a94 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -306,7 +306,8 @@ class IDIToCDLL(object): """ def __init__(self, target): self.target = target - self.registry = target.get_dtype_registry().wrapped_registry + from loopy.target.c import CFamilyTarget + self.registry = CFamilyTarget().get_dtype_registry().wrapped_registry def __call__(self, knl, idi): # next loop through the implemented data info to get the arg data diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index 11dc00352423cddd71f09e809d0a22ab1c3ea7a5..25ee8b48fd0c7d9f0bd987c6862cdb1884fb1372 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit 11dc00352423cddd71f09e809d0a22ab1c3ea7a5 +Subproject commit 25ee8b48fd0c7d9f0bd987c6862cdb1884fb1372 diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 8f14738c307ce7f2d98a47ef0dc086b4c69f7910..50fd1026f7bd15ce72915d0d5d5e60f6da4e264c 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -28,7 +28,7 @@ import numpy as np from pytools import memoize_method -from loopy.target.c import CTarget, CASTBuilder +from loopy.target.c import CFamilyTarget, CFamilyASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from loopy.diagnostic import LoopyError from loopy.types import NumpyType @@ -169,7 +169,7 @@ class ExpressionToCudaCExpressionMapper(ExpressionToCExpressionMapper): # {{{ target -class CudaTarget(CTarget): +class CudaTarget(CFamilyTarget): """A target for Nvidia's CUDA GPU programming language.""" def __init__(self, extern_c=True): @@ -216,7 +216,7 @@ class CudaTarget(CTarget): # {{{ ast builder -class CUDACASTBuilder(CASTBuilder): +class CUDACASTBuilder(CFamilyASTBuilder): # {{{ library def function_manglers(self): diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index cccee2301e44b16e2454bda5e98af7db7893c003..eb0157bf86d478901fb5a07bbac28aa7a11bcec9 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -26,7 +26,7 @@ THE SOFTWARE. import numpy as np # noqa -from loopy.target.c import CTarget, CASTBuilder +from loopy.target.c import CFamilyTarget, CFamilyASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from loopy.diagnostic import LoopyError from loopy.symbolic import Literal @@ -154,7 +154,7 @@ def fill_registry_with_ispc_types(reg, respect_windows, include_bool=True): # }}} -class ISPCTarget(CTarget): +class ISPCTarget(CFamilyTarget): """A code generation target for Intel's `ISPC <https://ispc.github.io/>`_ SPMD programming language, to target Intel's Knight's hardware and modern Intel CPUs with wide vector units. @@ -200,7 +200,7 @@ class ISPCTarget(CTarget): # }}} -class ISPCASTBuilder(CASTBuilder): +class ISPCASTBuilder(CFamilyASTBuilder): def _arg_names_and_decls(self, codegen_state): implemented_data_info = codegen_state.implemented_data_info arg_names = [iai.name for iai in implemented_data_info] diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 9bdfa8ded76b182ea1552680d65392d4bff219ec..4569be50367b3063999656bcd1de9d76f98e8c0a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -26,7 +26,7 @@ THE SOFTWARE. import numpy as np -from loopy.target.c import CTarget, CASTBuilder +from loopy.target.c import CFamilyTarget, CFamilyASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method from loopy.diagnostic import LoopyError @@ -304,7 +304,7 @@ class ExpressionToOpenCLCExpressionMapper(ExpressionToCExpressionMapper): # {{{ target -class OpenCLTarget(CTarget): +class OpenCLTarget(CFamilyTarget): """A target for the OpenCL C heterogeneous compute programming language. """ @@ -362,7 +362,7 @@ class OpenCLTarget(CTarget): # {{{ ast builder -class OpenCLCASTBuilder(CASTBuilder): +class OpenCLCASTBuilder(CFamilyASTBuilder): # {{{ library def function_manglers(self): diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index 63d3a40fb6c6967cac5e6149d5cf51bb7c2efbb9..7f4779cc7c0af3fa228ca51a3f8d45944ec21bff 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -378,7 +378,7 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, dtype=var_descr.dtype, base_indices=(0,)*len(abm.non1_storage_shape), shape=tuple(abm.non1_storage_shape), - scope=temporary_scope) + address_space=temporary_scope) new_temporary_variables[buf_var_name] = temp_var diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 7fbc595f27ea34f9056b36e3d6f0e168b182c24e..a6a2d7b4fe4ba94caa8cbe112a5cf90719ceb643 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -696,7 +696,7 @@ def set_temporary_scope(kernel, temp_var_names, scope): except KeyError: raise LoopyError("temporary '%s' not found" % tv_name) - new_temp_vars[tv_name] = tv.copy(scope=scope) + new_temp_vars[tv_name] = tv.copy(address_space=scope) return kernel.copy(temporary_variables=new_temp_vars) diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index d4dcb3701f4f23a5b1c66b1559bf6c4879425902..21e61075596bc2b795434716ba8a4347f5cfb173 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -37,7 +37,7 @@ from loopy.diagnostic import LoopyError # {{{ diff mapper -def func_map(i, func, args): +def func_map(i, func, args, allowed_nonsmoothness): if func.name == "exp": return var("exp")(*args) elif func.name == "log": @@ -62,8 +62,17 @@ def func_map(i, func, args): class LoopyDiffMapper(DifferentiationMapper, RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, diff_context, diff_inames): + def __init__(self, rule_mapping_context, diff_context, diff_inames, + allowed_nonsmoothness=None): RuleAwareIdentityMapper.__init__(self, rule_mapping_context) + DifferentiationMapper.__init__( + self, + + # This is actually ignored because we + # override map_variable below. + variable=None, + + allowed_nonsmoothness=None) self.diff_context = diff_context self.diff_inames = diff_inames self.diff_iname_exprs = tuple(var(diname) for diname in diff_inames) diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 49e30a7516cbbf00a07aace34831eb857a877432..70ad2406aabdd63ee21c448aac1091999247925e 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -209,9 +209,9 @@ def _fuse_two_kernels(knla, knlb): knlb = _apply_renames_in_exprs(knlb, b_var_renames) from pymbolic.imperative.transform import \ - fuse_instruction_streams_with_unique_ids + fuse_statement_streams_with_unique_ids new_instructions, old_b_id_to_new_b_id = \ - fuse_instruction_streams_with_unique_ids( + fuse_statement_streams_with_unique_ids( knla.instructions, knlb.instructions) # {{{ fuse assumptions diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index f2b184a4119485e53d7dee14b1a322be45a0bfe3..9f426f76bc6902fd09bd7685c73f187df935be1e 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -956,13 +956,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ set up temp variable import loopy as lp - if dtype is None: - dtype = lp.auto - else: + if dtype is not None: dtype = np.dtype(dtype) - import loopy as lp - if temporary_address_space is None: temporary_address_space = lp.auto diff --git a/loopy/transform/save.py b/loopy/transform/save.py index cca62bc522bb110ec4aeb190b538e5b6e8583abf..baa558a72861f31c5ce707329ea84786b96eb6d2 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -228,7 +228,7 @@ class TemporarySaver(object): return TemporaryVariable( name=self.name, dtype=temporary.dtype, - scope=AddressSpace.GLOBAL, + address_space=AddressSpace.GLOBAL, shape=self.new_shape) @property diff --git a/loopy/version.py b/loopy/version.py index 66c7a0cf46f3b40a8cc0af345d55c40e4e7d7f4f..29abbc2de889b884de93e5fe39a1d996811c93c9 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -29,7 +29,7 @@ if os.environ.get("AKPYTHON_EXEC_IMPORT_UNAVAILABLE") is not None: _git_rev = None else: - import loopy._git_rev as _git_rev_mod + import loopy._git_rev as _git_rev_mod # pylint: disable=no-name-in-module,import-error # noqa: E501 _git_rev = _git_rev_mod.GIT_REVISION # If we're running from a dev tree, the last install (and hence the most diff --git a/requirements-old-pyopencl.txt b/requirements-old-pyopencl.txt deleted file mode 100644 index 1f1f16075cd9178209a0e601af4177eb3480a517..0000000000000000000000000000000000000000 --- a/requirements-old-pyopencl.txt +++ /dev/null @@ -1,11 +0,0 @@ -git+https://github.com/inducer/pytools.git -git+https://github.com/inducer/islpy.git -git+https://github.com/inducer/cgen.git -git+https://github.com/inducer/pyopencl.git@deprecated-boost-python -git+https://github.com/inducer/pymbolic.git -git+https://github.com/inducer/genpy.git - -hg+https://bitbucket.org/inducer/f2py - -# Optional, needed for using the C preprocessor on Fortran -ply>=3.6 diff --git a/setup.py b/setup.py index c579bccf1aef27680dc23fa8291390b68923fcb6..75d8b340e8ad98794a244f7e5da89e079870bd2b 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ setup(name="loo.py", install_requires=[ "pytools>=2018.4", - "pymbolic>=2019.1", + "pymbolic>=2019.2", "genpy>=2016.1.2", "cgen>=2016.1", "islpy>=2019.1", diff --git a/test/test_apps.py b/test/test_apps.py index e7f4004fa0f2285920bdf9a0848c0d400e2c31b7..e07262dbdda8ad3c24522f7d0eb4dba8422bf0ce 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -619,7 +619,8 @@ def test_poisson_fem(ctx_factory): def test_domain_tree_nesting(): # From https://github.com/inducer/loopy/issues/78 - from loopy.kernel.data import temp_var_scope as scopes + + AS = lp.AddressSpace # noqa out_map = np.array([1, 2], dtype=np.int32) if_val = np.array([-1, 0], dtype=np.int32) @@ -651,12 +652,13 @@ def test_domain_tree_nesting(): end """, [ - TV('out_map', initializer=out_map, read_only=True, scope=scopes.PRIVATE), - TV('if_val', initializer=if_val, read_only=True, scope=scopes.PRIVATE), - TV('vals', initializer=vals, read_only=True, scope=scopes.PRIVATE), - TV('num_vals', initializer=num_vals, read_only=True, scope=scopes.PRIVATE), + TV('out_map', initializer=out_map, read_only=True, address_space=AS.PRIVATE), + TV('if_val', initializer=if_val, read_only=True, address_space=AS.PRIVATE), + TV('vals', initializer=vals, read_only=True, address_space=AS.PRIVATE), + TV('num_vals', initializer=num_vals, read_only=True, + address_space=AS.PRIVATE), TV('num_vals_offset', initializer=num_vals_offset, read_only=True, - scope=scopes.PRIVATE), + address_space=AS.PRIVATE), lp.GlobalArg('B', shape=(100, 31), dtype=np.float64), lp.GlobalArg('out', shape=(100, 12), dtype=np.float64)]) diff --git a/test/test_c_execution.py b/test/test_c_execution.py index bf168c11d838248947a2806123053e63c13ccbeb..b0ca7ade25d3077c7f868f366cb9ff6bb011af33 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -275,7 +275,7 @@ def test_c_execution_with_global_temporaries(): # global constant temporaries is None from loopy.target.c import ExecutableCTarget - from loopy.kernel.data import temp_var_scope as scopes + AS = lp.AddressSpace # noqa n = 10 knl = lp.make_kernel('{[i]: 0 <= i < n}', @@ -287,7 +287,7 @@ def test_c_execution_with_global_temporaries(): initializer=np.arange(n, dtype=np.int32), dtype=np.int32, read_only=True, - scope=scopes.GLOBAL)], + address_space=AS.GLOBAL)], target=ExecutableCTarget()) knl = lp.fix_parameters(knl, n=n) diff --git a/test/test_expression.py b/test/test_expression.py index 752d9ab2da3e2865891da8eedb4d5295b8b35826..41a8de656efcfc44fe404fa4722572d36c974409 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -411,9 +411,13 @@ def test_indexof_vec(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) - if ctx.devices[0].platform.name.startswith("Portable"): - # Accurate as of 2015-10-08 - pytest.skip("POCL miscompiles vector code") + if ( + # Accurate as of 2015-10-08 + ctx.devices[0].platform.name.startswith("Portable") + or + # Accurate as of 2019-11-04 + ctx.devices[0].platform.name.startswith("Intel")): + pytest.skip("target ICD miscompiles vector code") knl = lp.make_kernel( ''' { [i,j,k]: 0<=i,j,k<4 } ''', diff --git a/test/test_linalg.py b/test/test_linalg.py index fec6cd5e7f3ffbb823f36d2de2b17ff9190273d6..f075d3493195ec3364c4de0d26f92c4a987e7187 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -27,7 +27,8 @@ import pytest import sys import numpy as np import pyopencl as cl -import pyopencl.array as cl_array +import pyopencl.array as cl_array # noqa: F401 +import pyopencl.cltypes as cltypes import loopy as lp import logging @@ -71,15 +72,14 @@ def test_axpy(ctx_factory): n = 3145182 - vec = cl_array.vec - if ctx.devices[0].platform.vendor.startswith("Advanced Micro"): pytest.skip("crashes on AMD 15.12") for dtype, check, a, b in [ (np.complex64, None, 5, 7), - (vec.float4, check_float4, - vec.make_float4(1, 2, 3, 4), vec.make_float4(6, 7, 8, 9)), + (cltypes.float4, check_float4, # pylint:disable=no-member + cltypes.make_float4(1, 2, 3, 4), # pylint:disable=no-member + cltypes.make_float4(6, 7, 8, 9)), # pylint:disable=no-member (np.float32, None, 5, 7), ]: knl = lp.make_kernel( @@ -163,7 +163,7 @@ def test_plain_matrix_mul(ctx_factory): n = get_suitable_size(ctx) for dtype, check, vec_size in [ - (cl_array.vec.float4, check_float4, 4), + (cltypes.float4, check_float4, 4), # pylint:disable=no-member (np.float32, None, 1), ]: knl = lp.make_kernel( diff --git a/test/test_loopy.py b/test/test_loopy.py index 36dc35688472fea444a46e03cf076ddde5cf18a4..203ebb3922d3cc7f41b56abc31202b8974b88117 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -107,9 +107,7 @@ def test_complicated_subst(ctx_factory): assert substs_with_letter == how_many -def test_type_inference_no_artificial_doubles(ctx_factory): - ctx = ctx_factory() - +def test_type_inference_no_artificial_doubles(): knl = lp.make_kernel( "{[i]: 0<=i<n}", """ @@ -124,7 +122,7 @@ def test_type_inference_no_artificial_doubles(ctx_factory): ], assumptions="n>=1") - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) for k in lp.generate_loop_schedules(knl): code = lp.generate_code(k) assert "double" not in code @@ -186,7 +184,7 @@ def test_simple_side_effect(ctx_factory): [lp.GlobalArg("a", np.float32, shape=(100,))] ) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) kernel_gen = lp.generate_loop_schedules(knl) for gen_knl in kernel_gen: @@ -208,7 +206,7 @@ def test_owed_barriers(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0")) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) kernel_gen = lp.generate_loop_schedules(knl) for gen_knl in kernel_gen: @@ -229,7 +227,7 @@ def test_wg_too_small(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0")) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) kernel_gen = lp.generate_loop_schedules(knl) import pytest @@ -252,7 +250,7 @@ def test_multi_cse(ctx_factory): knl = lp.split_iname(knl, "i", 16, inner_tag="l.0") knl = lp.add_prefetch(knl, "a", []) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) kernel_gen = lp.generate_loop_schedules(knl) for gen_knl in kernel_gen: @@ -278,9 +276,8 @@ def test_bare_data_dependency(ctx_factory): lp.ValueArg("n", np.int32), ]) - cknl = lp.CompiledKernel(ctx, knl) n = 20000 - evt, (a,) = cknl(queue, n=n, out_host=True) + evt, (a,) = knl(queue, n=n, out_host=True) assert a.shape == (n,) assert (a == 1).all() @@ -288,10 +285,7 @@ def test_bare_data_dependency(ctx_factory): # {{{ test race detection -@pytest.mark.skipif("sys.version_info < (2,6)") -def test_ilp_write_race_detection_global(ctx_factory): - ctx = ctx_factory() - +def test_ilp_write_race_detection_global(): knl = lp.make_kernel( "[n] -> {[i,j]: 0<=i,j<n }", [ @@ -305,7 +299,7 @@ def test_ilp_write_race_detection_global(ctx_factory): knl = lp.tag_inames(knl, dict(j="ilp")) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) with lp.CacheMode(False): from loopy.diagnostic import WriteRaceConditionWarning @@ -317,9 +311,7 @@ def test_ilp_write_race_detection_global(ctx_factory): for w in warn_list) -def test_ilp_write_race_avoidance_local(ctx_factory): - ctx = ctx_factory() - +def test_ilp_write_race_avoidance_local(): knl = lp.make_kernel( "{[i,j]: 0<=i<16 and 0<=j<17 }", [ @@ -329,14 +321,12 @@ def test_ilp_write_race_avoidance_local(ctx_factory): knl = lp.tag_inames(knl, dict(i="l.0", j="ilp")) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) for k in lp.generate_loop_schedules(knl): assert k.temporary_variables["a"].shape == (16, 17) -def test_ilp_write_race_avoidance_private(ctx_factory): - ctx = ctx_factory() - +def test_ilp_write_race_avoidance_private(): knl = lp.make_kernel( "{[j]: 0<=j<16 }", [ @@ -346,7 +336,7 @@ def test_ilp_write_race_avoidance_private(ctx_factory): knl = lp.tag_inames(knl, dict(j="ilp")) - knl = lp.preprocess_kernel(knl, ctx.devices[0]) + knl = lp.preprocess_kernel(knl) for k in lp.generate_loop_schedules(knl): assert k.temporary_variables["a"].shape == (16,) @@ -494,9 +484,7 @@ def test_offsets_and_slicing(ctx_factory): assumptions="n>=1 and m>=1", default_offset=lp.auto) - knl = lp.tag_data_axes(knl, "a,b", "stride:auto,stride:1") - - cknl = lp.CompiledKernel(ctx, knl) + knl = lp.tag_array_axes(knl, "a,b", "stride:auto,stride:1") a_full = cl.clrandom.rand(queue, (n, n), np.float64) a_full_h = a_full.get() @@ -511,8 +499,10 @@ def test_offsets_and_slicing(ctx_factory): b_full_h[b_sub] = 2*a_full_h[a_sub] - print(cknl.get_highlighted_code({"a": a.dtype})) - cknl(queue, a=a, b=b) + #print(cknl.get_highlighted_code({"a": a.dtype})) + knl = lp.set_options(knl, write_cl=True) + + knl(queue, a=a, b=b) import numpy.linalg as la assert la.norm(b_full.get() - b_full_h) < 1e-13 @@ -657,7 +647,7 @@ def test_vector_types(ctx_factory, vec_len): ref_knl = knl - knl = lp.tag_data_axes(knl, "out", "c,vec") + knl = lp.tag_array_axes(knl, "out", "c,vec") knl = lp.tag_inames(knl, dict(j="unr")) knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") @@ -947,7 +937,7 @@ def test_atomic_load(ctx_factory, dtype): lp.GlobalArg("a", dtype, shape=lp.auto), lp.GlobalArg("b", dtype, shape=lp.auto), lp.TemporaryVariable('temp', dtype, for_atomic=True, - scope=AddressSpace.LOCAL), + address_space=AddressSpace.LOCAL), "..." ], silenced_warnings=["write_race(init)", "write_race(temp_sum)"]) @@ -1030,7 +1020,7 @@ def test_literal_local_barrier(ctx_factory): def test_local_barrier_mem_kind(): - def __test_type(mtype, expected): + def _test_type(mtype, expected): insn = '... lbarrier' if mtype: insn += '{mem_kind=%s}' % mtype @@ -1046,9 +1036,9 @@ def test_local_barrier_mem_kind(): cgr = lp.generate_code_v2(knl) assert 'barrier(%s)' % expected in cgr.device_code() - __test_type('', 'CLK_LOCAL_MEM_FENCE') - __test_type('global', 'CLK_GLOBAL_MEM_FENCE') - __test_type('local', 'CLK_LOCAL_MEM_FENCE') + _test_type('', 'CLK_LOCAL_MEM_FENCE') + _test_type('global', 'CLK_GLOBAL_MEM_FENCE') + _test_type('local', 'CLK_LOCAL_MEM_FENCE') def test_kernel_splitting(ctx_factory): @@ -1862,13 +1852,13 @@ def test_temp_initializer(ctx_factory, src_order, tmp_order): lp.TemporaryVariable("tmp", initializer=a, shape=lp.auto, - scope=lp.AddressSpace.PRIVATE, + address_space=lp.AddressSpace.PRIVATE, read_only=True, order=tmp_order), "..." ]) - knl = lp.set_options(knl, write_cl=True, highlight_cl=True) + knl = lp.set_options(knl, write_cl=True) knl = lp.fix_parameters(knl, n=a.shape[0]) evt, (a2,) = knl(queue, out_host=True) @@ -1887,7 +1877,7 @@ def test_const_temp_with_initializer_not_saved(): lp.TemporaryVariable("tmp", initializer=np.arange(10), shape=lp.auto, - scope=lp.AddressSpace.PRIVATE, + address_space=lp.AddressSpace.PRIVATE, read_only=True), "..." ], @@ -2036,6 +2026,13 @@ def test_tight_loop_bounds(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + if (queue.device.platform.vendor == "Intel(R) Corporation" + and queue.device.driver_version in [ + "2019.8.7.0", + "2019.8.8.0", + ]): + pytest.skip("Intel CL miscompiles this kernel") + knl = lp.make_kernel( ["{ [i] : 0 <= i <= 5 }", "[i] -> { [j] : 2 * i - 2 < j <= 2 * i and 0 <= j <= 9 }"], @@ -2050,6 +2047,8 @@ def test_tight_loop_bounds(ctx_factory): knl = lp.split_iname(knl, "i", 5, inner_tag="l.0", outer_tag="g.0") + knl = lp.set_options(knl, write_cl=True) + evt, (out,) = knl(queue, out_host=True) assert (out == np.arange(10)).all() @@ -2110,7 +2109,7 @@ def test_integer_reduction(ctx_factory): var_int = np.random.randint(1000, size=n).astype(vtype) var_lp = lp.TemporaryVariable('var', initializer=var_int, read_only=True, - scope=lp.AddressSpace.PRIVATE, + address_space=lp.AddressSpace.PRIVATE, dtype=to_loopy_type(vtype), shape=lp.auto) @@ -2169,11 +2168,11 @@ def test_complicated_argmin_reduction(ctx_factory): and qbx_forced_limit * center_side[ictr] > 0) ) - <> post_dist_sq = if(matches, dist_sq, HUGE) + <> post_dist_sq = dist_sq if matches else HUGE end <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq) - tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1) + tgt_to_qbx_center[itgt] = min_ictr if min_dist_sq < HUGE else -1 end """) @@ -2301,7 +2300,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): end """, [lp.TemporaryVariable("a", np.float32, shape=(10,), order='C', - scope=lp.AddressSpace.LOCAL), + address_space=lp.AddressSpace.LOCAL), lp.GlobalArg("b", np.float32, shape=(11,), order='C')], seq_dependencies=True) @@ -2553,7 +2552,7 @@ def test_preamble_with_separate_temporaries(ctx_factory): [lp.GlobalArg('out', shape=('n',)), lp.TemporaryVariable( 'offsets', shape=(offsets.size,), initializer=offsets, - scope=lp.AddressSpace.GLOBAL, + address_space=lp.AddressSpace.GLOBAL, read_only=True), lp.GlobalArg('data', shape=(data.size,), dtype=np.float64)], ) @@ -2686,8 +2685,8 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): a[%s] = 13 {id=second,dep=first} """ % second_index, [ - lp.TemporaryVariable("a", lp.auto, shape=(256,), - scope=lp.AddressSpace.LOCAL), + lp.TemporaryVariable("a", dtype=None, shape=(256,), + address_space=lp.AddressSpace.LOCAL), ]) knl = lp.tag_inames(knl, "i:l.0") @@ -2705,7 +2704,7 @@ def test_half_complex_conditional(ctx_factory): knl = lp.make_kernel( "{[i]: 0 <= i < 10}", """ - tmp[i] = if(i < 5, 0, 0j) + tmp[i] = 0 if i < 5 else 0j """) knl(queue) @@ -2769,6 +2768,15 @@ def test_backwards_dep_printing_and_error(): def test_dump_binary(ctx_factory): ctx = ctx_factory() + device = ctx.devices[0] + + if (device.platform.vendor == "Intel(R) Corporation" + and device.driver_version in [ + "2019.8.7.0", + "2019.8.8.0", + ]): + pytest.skip("Intel CL doesn't implement Kernel.program") + knl = lp.make_kernel( "{ [i]: 0<=i<n }", """ diff --git a/test/test_reduction.py b/test/test_reduction.py index ef229d5cd08554d6656d23d83bc0c6b66ee77b9f..4ce06345499480e521f7dc12a6620271d7b99522 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -240,7 +240,7 @@ def test_global_parallel_reduction(ctx_factory, size): knl = reduction_arg_to_subst_rule(knl, "i_outer") knl = lp.precompute(knl, "red_i_outer_arg", "i_outer", - temporary_scope=lp.temp_var_scope.GLOBAL, + temporary_address_space=lp.AddressSpace.GLOBAL, default_tag="l.auto") knl = lp.realize_reduction(knl) knl = lp.tag_inames(knl, "i_outer_0:g.0") @@ -285,7 +285,7 @@ def test_global_mc_parallel_reduction(ctx_factory, size): from loopy.transform.data import reduction_arg_to_subst_rule knl = reduction_arg_to_subst_rule(knl, "i_outer") knl = lp.precompute(knl, "red_i_outer_arg", "i_outer", - temporary_scope=lp.temp_var_scope.GLOBAL, + temporary_address_space=lp.AddressSpace.GLOBAL, default_tag="l.auto") knl = lp.realize_reduction(knl) knl = lp.add_dependency( diff --git a/test/test_transform.py b/test/test_transform.py index 3ee67b703964d1f7773b10a9199687d78b883a60..cdc0c14b8bacc4fe5279d000461c0ea2244af021 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -135,7 +135,7 @@ def test_to_batched_temp(ctx_factory): "cnst", dtype=np.float32, shape=(), - scope=lp.temp_var_scope.PRIVATE), '...']) + address_space=lp.AddressSpace.PRIVATE), '...']) knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32, x=np.float32, a=np.float32)) diff --git a/test/testlib.py b/test/testlib.py index ad290ee7c60297aadd4a6baa0814b8976403cb53..67c5ba04fefde9a7516f22bce679744ce61a4f20 100644 --- a/test/testlib.py +++ b/test/testlib.py @@ -80,7 +80,6 @@ class SeparateTemporariesPreambleTestMangler( class SeparateTemporariesPreambleTestPreambleGenerator( SeparateTemporariesPreambleTestDataHolder): def __call__(self, preamble_info): - from loopy.kernel.data import temp_var_scope as scopes # find a function matching our name func_match = next( @@ -96,7 +95,7 @@ class SeparateTemporariesPreambleTestPreambleGenerator( var = lp.TemporaryVariable( 'lookup', initializer=self.arr, dtype=self.arr.dtype, shape=self.arr.shape, - scope=scopes.GLOBAL, read_only=True) + address_space=lp.AddressSpace.GLOBAL, read_only=True) # and code code = """ int {name}(int start, int end, int match)