diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a6f81e5b6e19af6362b51aec26f277a8d83bb829..c11e507ee79cdc6f1567acbf6c12bbd7ed22f1cc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,38 +17,36 @@ Python 2.7 POCL: junit: test/pytest.xml -Python 2.7 with legacy PyOpenCL: +Python 3 POCL: script: - - export PY_EXE=python2.7 + - export PY_EXE=python3 - export PYOPENCL_TEST=portable - export EXTRA_INSTALL="pybind11 numpy mako" - - export REQUIREMENTS_TXT="requirements-old-pyopencl.txt" - export LOOPY_NO_CACHE=1 - - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - - python2.7 + - python3 - pocl except: - tags - retry: 2 artifacts: reports: junit: test/pytest.xml - -Python 3 POCL: +Python 3 Intel: script: - export PY_EXE=python3 - - export PYOPENCL_TEST=portable + - export PYOPENCL_TEST=intel - export EXTRA_INSTALL="pybind11 numpy mako" - export LOOPY_NO_CACHE=1 + - export LOOPY_INTEL_CL_OK_FOR_TEST_REF=1 + - source /opt/enable-intel-cl.sh - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: - python3 - - pocl + - intel-cl-cpu except: - tags artifacts: diff --git a/loopy/__init__.py b/loopy/__init__.py index a1c97d2f402656affabb97d1484dc34572240f39..b60de6e2dcd35c1c167bf5e303401f2c6242ebec 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -473,7 +473,8 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None): ) result = make_kernel(set_str, "output[%s] = input[%s]" - % (commad_indices, commad_indices)) + % (commad_indices, commad_indices), + lang_version=MOST_RECENT_LANGUAGE_VERSION) result = tag_array_axes(result, "input", old_dim_tags) result = tag_array_axes(result, "output", new_dim_tags) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 66b3cb0bde00dee3ae46fcf63052219cc3664360..6837b99a026debf32b12aceef00ed3863c620639 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -23,9 +23,11 @@ THE SOFTWARE. """ from six.moves import range, zip -from pytools import Record +import os from warnings import warn +from pytools import Record + import numpy as np import loopy as lp @@ -321,7 +323,7 @@ def _default_check_result(result, ref_result): # {{{ find device for reference test -def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): +def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors, need_image_support): import pyopencl as cl noncpu_devs = [] @@ -336,8 +338,17 @@ def _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): for bl in blacklist_ref_vendors): continue + if need_image_support: + if not dev.image_support: + continue + if pf.vendor == "The pocl project": + # Hahaha, no. + continue + if dev.type & cl.device_type.CPU: - if "Intel" in dev.platform.vendor: + if ("Intel" in dev.platform.vendor + and os.environ.get("LOOPY_INTEL_CL_OK_FOR_TEST_REF") + is None): # Sorry, Intel, your CPU CL has gotten too crashy of late. # (Feb 2016) continue @@ -427,7 +438,12 @@ def auto_test_vs_ref( ref_errors = [] - for dev in _enumerate_cl_devices_for_ref_test(blacklist_ref_vendors): + from loopy.kernel.data import ImageArg + need_ref_image_support = any(isinstance(arg, ImageArg) for arg in ref_knl.args) + + for dev in _enumerate_cl_devices_for_ref_test( + blacklist_ref_vendors, need_ref_image_support): + ref_ctx = cl.Context([dev]) ref_queue = cl.CommandQueue(ref_ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 2f32d94f662fe59d23ffb06dd13ade4a057304aa..157099df5a2133baa109f24e8216d63577b5dcb4 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1868,7 +1868,7 @@ def infer_arg_is_output_only(kernel): from loopy.kernel.data import ArrayArg, ValueArg, ConstantArg, ImageArg new_args = [] for arg in kernel.args: - if isinstance(arg, (ArrayArg, ImageArg, ValueArg)): + if isinstance(arg, ArrayArg): if arg.is_output_only is not None: assert isinstance(arg.is_output_only, bool) new_args.append(arg) @@ -1877,7 +1877,7 @@ def infer_arg_is_output_only(kernel): new_args.append(arg.copy(is_output_only=True)) else: new_args.append(arg.copy(is_output_only=False)) - elif isinstance(arg, ConstantArg): + elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): new_args.append(arg) else: raise NotImplementedError("Unkonwn argument type %s." % type(arg)) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 8f5337533fd9e96c77b56154c1848f5ac419b425..ccac5e199d2b53e202dd735ffd8dfe20a7dc29a2 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -247,7 +247,7 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase): """ def __init__(self): - super(EqualityPreservingStringifyMapper, self).__init__(constant_mapper=repr) + super(EqualityPreservingStringifyMapper, self).__init__() def map_constant(self, expr, enclosing_prec): if isinstance(expr, np.generic): @@ -257,8 +257,15 @@ class EqualityPreservingStringifyMapper(StringifyMapperBase): return "%s(%s)" % (type(expr).__name__, repr(expr)) else: - return super(EqualityPreservingStringifyMapper, self).map_constant( - expr, enclosing_prec) + result = repr(expr) + + from pymbolic.mapper.stringifier import PREC_SUM + if not (result.startswith("(") and result.endswith(")")) \ + and ("-" in result or "+" in result) \ + and (enclosing_prec > PREC_SUM): + return self.parenthesize(result) + else: + return result class UnidirectionalUnifier(UnidirectionalUnifierBase): diff --git a/loopy/transform/fusion.py b/loopy/transform/fusion.py index 49e30a7516cbbf00a07aace34831eb857a877432..70ad2406aabdd63ee21c448aac1091999247925e 100644 --- a/loopy/transform/fusion.py +++ b/loopy/transform/fusion.py @@ -209,9 +209,9 @@ def _fuse_two_kernels(knla, knlb): knlb = _apply_renames_in_exprs(knlb, b_var_renames) from pymbolic.imperative.transform import \ - fuse_instruction_streams_with_unique_ids + fuse_statement_streams_with_unique_ids new_instructions, old_b_id_to_new_b_id = \ - fuse_instruction_streams_with_unique_ids( + fuse_statement_streams_with_unique_ids( knla.instructions, knlb.instructions) # {{{ fuse assumptions diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index f2b184a4119485e53d7dee14b1a322be45a0bfe3..9f426f76bc6902fd09bd7685c73f187df935be1e 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -956,13 +956,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ set up temp variable import loopy as lp - if dtype is None: - dtype = lp.auto - else: + if dtype is not None: dtype = np.dtype(dtype) - import loopy as lp - if temporary_address_space is None: temporary_address_space = lp.auto diff --git a/requirements-old-pyopencl.txt b/requirements-old-pyopencl.txt deleted file mode 100644 index 1f1f16075cd9178209a0e601af4177eb3480a517..0000000000000000000000000000000000000000 --- a/requirements-old-pyopencl.txt +++ /dev/null @@ -1,11 +0,0 @@ -git+https://github.com/inducer/pytools.git -git+https://github.com/inducer/islpy.git -git+https://github.com/inducer/cgen.git -git+https://github.com/inducer/pyopencl.git@deprecated-boost-python -git+https://github.com/inducer/pymbolic.git -git+https://github.com/inducer/genpy.git - -hg+https://bitbucket.org/inducer/f2py - -# Optional, needed for using the C preprocessor on Fortran -ply>=3.6 diff --git a/test/test_expression.py b/test/test_expression.py index 752d9ab2da3e2865891da8eedb4d5295b8b35826..41a8de656efcfc44fe404fa4722572d36c974409 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -411,9 +411,13 @@ def test_indexof_vec(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) - if ctx.devices[0].platform.name.startswith("Portable"): - # Accurate as of 2015-10-08 - pytest.skip("POCL miscompiles vector code") + if ( + # Accurate as of 2015-10-08 + ctx.devices[0].platform.name.startswith("Portable") + or + # Accurate as of 2019-11-04 + ctx.devices[0].platform.name.startswith("Intel")): + pytest.skip("target ICD miscompiles vector code") knl = lp.make_kernel( ''' { [i,j,k]: 0<=i,j,k<4 } ''', diff --git a/test/test_linalg.py b/test/test_linalg.py index fec6cd5e7f3ffbb823f36d2de2b17ff9190273d6..f075d3493195ec3364c4de0d26f92c4a987e7187 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -27,7 +27,8 @@ import pytest import sys import numpy as np import pyopencl as cl -import pyopencl.array as cl_array +import pyopencl.array as cl_array # noqa: F401 +import pyopencl.cltypes as cltypes import loopy as lp import logging @@ -71,15 +72,14 @@ def test_axpy(ctx_factory): n = 3145182 - vec = cl_array.vec - if ctx.devices[0].platform.vendor.startswith("Advanced Micro"): pytest.skip("crashes on AMD 15.12") for dtype, check, a, b in [ (np.complex64, None, 5, 7), - (vec.float4, check_float4, - vec.make_float4(1, 2, 3, 4), vec.make_float4(6, 7, 8, 9)), + (cltypes.float4, check_float4, # pylint:disable=no-member + cltypes.make_float4(1, 2, 3, 4), # pylint:disable=no-member + cltypes.make_float4(6, 7, 8, 9)), # pylint:disable=no-member (np.float32, None, 5, 7), ]: knl = lp.make_kernel( @@ -163,7 +163,7 @@ def test_plain_matrix_mul(ctx_factory): n = get_suitable_size(ctx) for dtype, check, vec_size in [ - (cl_array.vec.float4, check_float4, 4), + (cltypes.float4, check_float4, 4), # pylint:disable=no-member (np.float32, None, 1), ]: knl = lp.make_kernel( diff --git a/test/test_loopy.py b/test/test_loopy.py index c06d1d5e199a093e5153f2ad6268827a209e425e..6b78ac26b78d8c85dab3cd41af0ce1d99d52ec07 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -107,9 +107,7 @@ def test_complicated_subst(ctx_factory): assert substs_with_letter == how_many -def test_type_inference_no_artificial_doubles(ctx_factory): - ctx = ctx_factory() - +def test_type_inference_no_artificial_doubles(): knl = lp.make_kernel( "{[i]: 0<=i {[i,j]: 0<=i,j { [j] : 2 * i - 2 < j <= 2 * i and 0 <= j <= 9 }"], @@ -2050,6 +2047,8 @@ def test_tight_loop_bounds(ctx_factory): knl = lp.split_iname(knl, "i", 5, inner_tag="l.0", outer_tag="g.0") + knl = lp.set_options(knl, write_cl=True) + evt, (out,) = knl(queue, out_host=True) assert (out == np.arange(10)).all() @@ -2169,11 +2168,11 @@ def test_complicated_argmin_reduction(ctx_factory): and qbx_forced_limit * center_side[ictr] > 0) ) - <> post_dist_sq = if(matches, dist_sq, HUGE) + <> post_dist_sq = dist_sq if matches else HUGE end <> min_dist_sq, <> min_ictr = argmin(ictr, ictr, post_dist_sq) - tgt_to_qbx_center[itgt] = if(min_dist_sq < HUGE, min_ictr, -1) + tgt_to_qbx_center[itgt] = min_ictr if min_dist_sq < HUGE else -1 end """) @@ -2686,7 +2685,7 @@ def test_no_barriers_for_nonoverlapping_access(second_index, expect_barrier): a[%s] = 13 {id=second,dep=first} """ % second_index, [ - lp.TemporaryVariable("a", lp.auto, shape=(256,), + lp.TemporaryVariable("a", dtype=None, shape=(256,), address_space=lp.AddressSpace.LOCAL), ]) @@ -2705,7 +2704,7 @@ def test_half_complex_conditional(ctx_factory): knl = lp.make_kernel( "{[i]: 0 <= i < 10}", """ - tmp[i] = if(i < 5, 0, 0j) + tmp[i] = 0 if i < 5 else 0j """) knl(queue) @@ -2769,6 +2768,15 @@ def test_backwards_dep_printing_and_error(): def test_dump_binary(ctx_factory): ctx = ctx_factory() + device = ctx.devices[0] + + if (device.platform.vendor == "Intel(R) Corporation" + and device.driver_version in [ + "2019.8.7.0", + "2019.8.8.0", + ]): + pytest.skip("Intel CL doesn't implement Kernel.program") + knl = lp.make_kernel( "{ [i]: 0<=i