From 7b4424f2a47465ba7fb402f5a974b632bdad7ca1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Mon, 26 Mar 2012 11:51:25 -0400 Subject: [PATCH] Fix precompute/prefetch bugs that kept the n-body test from working. --- loopy/__init__.py | 1 - loopy/cse.py | 12 +++++++++--- test/test_nbody.py | 4 ++-- test/test_sem.py | 2 ++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index a35060141..78342b9e9 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -502,7 +502,6 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None, kernel = extract_subst(kernel, rule_name, uni_template, parameters) - if footprint_subscripts is not None: if not isinstance(footprint_subscripts, (list, tuple)): footprint_subscripts = [footprint_subscripts] diff --git a/loopy/cse.py b/loopy/cse.py index 640a69bb4..54618ed71 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -671,12 +671,18 @@ def precompute(kernel, subst_use, dtype, sweep_inames=[], found = True break - if not invdesc.is_in_footprint: + if footprint_generators is None: + # We only have a right to find the expression if the + # invocation descriptors if they were generated by a scan + # of the code in the first place. If the user gave us + # the footprint generators, that isn't true. + + assert found, expr + + if not found or not invdesc.is_in_footprint: left_unused_subst_rule_invocations[0] = True return expr - assert found, expr - else: # The current subsitution *was* found inside another substitution # rule. We can't dig up the corresponding invocation descriptor, diff --git a/test/test_nbody.py b/test/test_nbody.py index 4f888ef09..f1641256f 100644 --- a/test/test_nbody.py +++ b/test/test_nbody.py @@ -1,4 +1,3 @@ - from __future__ import division import numpy as np @@ -38,6 +37,7 @@ def test_nbody(ctx_factory): return knl, [] def variant_cpu(knl): + knl = lp.expand_subst(knl) knl = lp.split_dimension(knl, "i", 1024, outer_tag="g.0", slabs=(0,1)) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) @@ -56,7 +56,7 @@ def test_nbody(ctx_factory): n = 3000 - for variant in [variant_gpu]: + for variant in [variant_1, variant_cpu, variant_gpu]: variant_knl, loop_prio = variant(knl) kernel_gen = lp.generate_loop_schedules(variant_knl, loop_priority=loop_prio) diff --git a/test/test_sem.py b/test/test_sem.py index a83a74c90..8a93d11c0 100644 --- a/test/test_sem.py +++ b/test/test_sem.py @@ -10,6 +10,8 @@ from pyopencl.tools import pytest_generate_tests_for_pyopencl \ +1/0 # not ready + def test_laplacian(ctx_factory): 1/0 # not adapted to new language -- GitLab