diff --git a/MEMO b/MEMO index 9aec2cce968276cf82e0f1742ec0944e4e0dcfee..8eb2fd5ee55a487ffcc08eb92a8687e57343c7cd 100644 --- a/MEMO +++ b/MEMO @@ -80,6 +80,9 @@ TODO - Flag, exploit idempotence +- Implement insert_parallel_dim_check_points + (but first: find a kernel that needs it) + Dealt with ^^^^^^^^^^ diff --git a/loopy/compiled.py b/loopy/compiled.py index ed80d23719416648f52f57d68632158729d51bf8..85daa16121cebd2a3e827dd643852cc02c3f82f0 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -98,7 +98,16 @@ def drive_timing_run(kernel_generator, queue, launch, flop_count=None, print "SOLUTION #%d" % soln_count print "-----------------------------------------------" if print_code: - print compiled.code + try: + from pygments import highlight + except ImportError: + print compiled.code + else: + from pygments.lexers import CLexer + from pygments.formatters import TerminalFormatter + + print highlight(compiled.code, CLexer(), TerminalFormatter()) + print "-----------------------------------------------" elapsed = time_run(compiled) diff --git a/loopy/schedule.py b/loopy/schedule.py index f761bc1c95fcf3e2271d5eb9021881ce13643513..5fef6bcfbd60d7ca82b7cbe0abe40d1c5510588e 100644 --- a/loopy/schedule.py +++ b/loopy/schedule.py @@ -668,8 +668,7 @@ def insert_barriers(kernel, schedule, level=0): def insert_parallel_dim_check_points(kernel, schedule): - from warnings import warn - warn("insert_parallel_dim_check_points is unimplemented") + # FIXME: Unimplemented return kernel diff --git a/test/test_matmul.py b/test/test_matmul.py index 418ab1201aecaa68b5624f607af5b003716b5595..6a54e58643ce28b5b4ae00fd098011ed0bdf25ec 100644 --- a/test/test_matmul.py +++ b/test/test_matmul.py @@ -217,7 +217,7 @@ def test_plain_matrix_mul_new_ui(ctx_factory): outer_tag="g.0", inner_tag="l.1", no_slabs=True) knl = lp.split_dimension(knl, "j", 16, outer_tag="g.1", inner_tag="l.0", no_slabs=True) - knl = lp.split_dimension(knl, "k", 16, no_slabs=True) + knl = lp.split_dimension(knl, "k", 16) knl = lp.realize_cse(knl, "lhsmat", dtype, ["k_inner", "i_inner"]) knl = lp.realize_cse(knl, "rhsmat", dtype, ["j_inner", "k_inner"])