diff --git a/loopy/preprocess.py b/loopy/preprocess.py index a84ac4359fe3876654838d29a9759d85d045d6c8..54311d7899d39e8a2965d3f1a70821df6fa269d2 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -43,7 +43,6 @@ from loopy.symbolic import RuleAwareIdentityMapper from loopy.kernel.instruction import (MultiAssignmentBase, CInstruction, CallInstruction, _DataObliviousInstruction) from loopy.kernel.function_interface import CallableKernel, ScalarCallable -from loopy.transform.data import allocate_temporaries_for_base_storage from loopy.kernel.array import ArrayDimImplementationTag from loopy.kernel.data import _ArraySeparationInfo, KernelArgument from loopy.translation_unit import TranslationUnit, for_each_kernel @@ -769,10 +768,6 @@ def _preprocess_single_kernel(kernel: LoopKernel, is_entrypoint: bool) -> LoopKe kernel = find_temporary_address_space(kernel) - # Ordering restriction: temporary address spaces need to be found before - # allocating base_storage - kernel = allocate_temporaries_for_base_storage(kernel, _implicitly_run=True) - # check for atomic loads, much easier to do here now that the dependencies # have been established kernel = check_atomic_loads(kernel) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 1b97087b859e3b3350980a0528781b6b746a0ec2..d96faf1be9ecfb1efc90abc393c297034f67c274 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -988,7 +988,6 @@ def allocate_temporaries_for_base_storage(kernel: LoopKernel, only_address_space: Optional[int] = None, aliased=True, max_nbytes: Optional[int] = None, - _implicitly_run=False, ) -> LoopKernel: from pytools import product @@ -1077,12 +1076,6 @@ def allocate_temporaries_for_base_storage(kernel: LoopKernel, new_tvs[bsi.name] = new_tvs[bsi.name].copy(shape=(new_bs_size,)) if made_changes: - if _implicitly_run: - warn("Base storage allocation was performed implicitly during " - "preprocessing. This is deprecated and will stop working " - "in 2023. Call loopy.allocate_temporaries_for_base_storage " - "explicitly to avoid this warning.", DeprecationWarning) - return kernel.copy(temporary_variables=new_tvs) else: return kernel diff --git a/test/test_loopy.py b/test/test_loopy.py index 2d1c7bc22e7df18e5fc5b7fbd2a837d5e37ef582..8e1ce26ba496c33581595632cb39ad12eba2e3c1 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1342,6 +1342,9 @@ def test_save_with_base_storage(ctx_factory, debug=False): knl = lp.alias_temporaries(knl, ["a", "b"], synchronize_for_exclusive_use=False) + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) + save_and_reload_temporaries_test(queue, knl, np.arange(10), debug) @@ -1416,6 +1419,8 @@ def test_missing_definition_check_respects_aliases(): target=lp.CTarget(), silenced_warnings=frozenset(["read_no_write(b)"])) + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) lp.generate_code_v2(knl) @@ -1911,6 +1916,8 @@ def test_scalars_with_base_storage(ctx_factory): shape=(), base_storage="base"), ]) + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) knl(queue, out_host=True) @@ -3199,6 +3206,9 @@ def test_global_tv_with_base_storage_across_gbarrier(ctx_factory): t_unit = lp.tag_inames(t_unit, {"i": "g.0", "j": "g.0"}) + t_unit = lp.preprocess_kernel(t_unit) + t_unit = lp.allocate_temporaries_for_base_storage(t_unit) + _, (out,) = t_unit(cq) np.testing.assert_allclose(out.get(), np.arange(9, -1, -1)) @@ -3559,6 +3569,9 @@ def test_no_barrier_err_for_global_temps_with_base_storage(ctx_factory): knl = lp.split_iname(knl, "i", 4, inner_tag="l.0", outer_tag="g.0") knl = lp.split_iname(knl, "j", 4, inner_tag="l.0", outer_tag="g.0") + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) + _, (out,) = knl(cq, out_host=True) np.testing.assert_allclose(2*np.arange(16) + 2, out) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 6d97104a9542340db8c2c227105ef7175e55547c..43f754a1b4d2a6c615fee7b27d117e352eabed62 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -236,6 +236,9 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa hsv = tap_hsv + hsv = lp.preprocess_kernel(hsv) + hsv = lp.allocate_temporaries_for_base_storage(hsv) + hsv = lp.set_options(hsv, build_options=[ "-cl-denorms-are-zero", diff --git a/test/test_target.py b/test/test_target.py index 3403ddf153848a1452b91fd3d7bce1e89cf4416a..c53cf3199bed83065188f2b85f0229adc4a7c391 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -595,6 +595,9 @@ def test_pyopencl_target_with_global_temps_with_base_storage(ctx_factory): knl = lp.tag_inames(knl, {"i": "g.0", "j": "g.0"}) knl = lp.set_options(knl, return_dict=True) + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) + my_allocator = RecordingAllocator(cq) _, out = knl(cq, allocator=my_allocator) diff --git a/test/test_transform.py b/test/test_transform.py index 5ca01dea044b9243fbbb7d8487c9babe929b9631..3f4ad06986f9b1d1ddb6ef9ef6e32ef7f3f91b59 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -270,6 +270,8 @@ def test_alias_temporaries(ctx_factory): knl = lp.alias_temporaries(knl, ["times2_0", "times3_0", "times4_0"]) + knl = lp.preprocess_kernel(knl) + knl = lp.allocate_temporaries_for_base_storage(knl) lp.auto_test_vs_ref( ref_knl, ctx, knl, parameters=dict(n=30))