diff --git a/loopy/cse.py b/loopy/cse.py index f2086b2c2367bed56b82d7d8903c673c798e6930..124f462b9c891c5dfcdf68e50d9ee2c3812b6a07 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -443,7 +443,8 @@ def precompute(kernel, subst_name, dtype, sweep_inames=[], sub_map = SubstitutionCallbackMapper([subst_name], do_substs) for insn in kernel.instructions: - new_insns.append(insn.copy(expression=sub_map(insn.expression))) + new_insn = insn.copy(expression=sub_map(insn.expression)) + new_insns.append(new_insn) new_substs = dict( (s.name, s.copy(expression=sub_map(s.expression))) diff --git a/loopy/schedule.py b/loopy/schedule.py index c1a1fdea0bd84db05bb08168c78c73f28809d405..64204f20da7f76800a17ce68edef2a04626d1384 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -167,6 +167,8 @@ def dump_schedule(schedule): entries.append("" % sched_item.iname) elif isinstance(sched_item, RunInstruction): entries.append(sched_item.insn_id) + elif isinstance(sched_item, Barrier): + entries.append("|") else: assert False @@ -270,6 +272,7 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b and len(schedule) >= debug.debug_length): debug_mode = True + #print dump_schedule(schedule), len(schedule) if debug_mode: print kernel print "--------------------------------------------" @@ -293,8 +296,6 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b reachable_insn_ids = set() for insn_id in all_insn_ids - scheduled_insn_ids: - if debug_mode: - print insn_id insn = kernel.id_to_insn[insn_id] schedule_now = set(insn.insn_deps) <= scheduled_insn_ids diff --git a/test/test_fem_assembly.py b/test/test_fem_assembly.py index 4502fd13b95da505f71edc7bcf28f6a43beace8f..77a5757a0b716b97ada9e778fd6aeed31cdca3dc 100644 --- a/test/test_fem_assembly.py +++ b/test/test_fem_assembly.py @@ -1,9 +1,7 @@ from __future__ import division import numpy as np -import numpy.linalg as la import pyopencl as cl -import pyopencl.array as cl_array import loopy as lp from pyopencl.tools import pytest_generate_tests_for_pyopencl \ @@ -17,8 +15,6 @@ def test_laplacian_stiffness(ctx_factory): ctx = ctx_factory() order = "C" - # FIXME: make dim-independent - dim = 2 Nq = 40 # num. quadrature points @@ -32,14 +28,97 @@ def test_laplacian_stiffness(ctx_factory): knl = lp.make_kernel(ctx.devices[0], "[Nc] -> {[K,i,j,q, ax_a, ax_b, ax_c]: 0<=K {[K,i,j,q, ax_a, ax_b, ax_c]: 0<=K 1: