diff --git a/MEMO b/MEMO index 09245716d3a6b914634c60a6cc9e16a5bb2fc3b6..2d88a1040902fc20d43903069df781a611ad7340 100644 --- a/MEMO +++ b/MEMO @@ -47,12 +47,10 @@ To-do - Make tests run on GPUs -- Streamline arg +- Streamline argument specification - Fix timer / call code -- variant_prefetch_fields in test_dg - - make sure simple side effects in global work - syntax for linear array access diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 38ebb87fb7b9cde97753106a22e8a4c5a7177a10..a1420d7bc92b6430f4c26378c57da6d825e48efa 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -151,6 +151,16 @@ def mark_local_temporaries(kernel): assert locparallel_assignee_inames <= locparallel_compute_inames + if (locparallel_assignee_inames != locparallel_compute_inames + and bool(locparallel_assignee_inames)): + raise RuntimeError("instruction '%s' looks invalid: " + "it assigns to indices based on local IDs, but " + "its temporary '%s' cannot be made local because " + "a write race across the iname(s) '%s' would emerge. " + "(Do you need to add an extra iname to your prefetch?)" + % (insn_id, temp_var.name, ", ".join( + locparallel_compute_inames - locparallel_assignee_inames))) + wants_to_be_local_per_insn.append( locparallel_assignee_inames == locparallel_compute_inames diff --git a/proto-tests/test_dg.py b/proto-tests/test_dg.py index 9f39b1a4b68f08dbc861924aedc4ce500985cb75..1295e60472a3ab2592cf05f9bad43b5281e0ab97 100644 --- a/proto-tests/test_dg.py +++ b/proto-tests/test_dg.py @@ -88,15 +88,13 @@ def test_dg_volume(ctx_factory): knl = lp.tag_inames(knl, dict(n="l.0")) knl = lp.split_iname(knl, "k", 3, outer_tag="g.0", inner_tag="l.1") for name in ["u", "v", "w", "p"]: - # FIXME - knl = lp.add_prefetch(knl, "%s[:,k]" % name) + knl = lp.add_prefetch(knl, "%s[:,k]" % name, ["k_inner"]) return knl def variant_k_ilp(knl): knl = lp.tag_inames(knl, dict(n="l.0")) - # FIXME knl = lp.split_iname(knl, "k", 3, outer_tag="g.0", inner_tag="ilp") knl = lp.tag_inames(knl, dict(m="unr")) return knl @@ -123,17 +121,13 @@ def test_dg_volume(ctx_factory): pad_mult = lp.find_padding_multiple(knl, "u", 1, 32) - knl = lp.split_iname(knl, "k", pad_mult, outer_tag="g.0", inner_tag="l.1") - arg_names = [ prefix+name for name in ["u", "v", "w", "p"] for prefix in ["", "rhs"]] - # FIXME knl = lp.split_arg_axis(knl, [(nm, 1) for nm in arg_names], pad_mult) - return knl parameters_dict = dict(K=K) @@ -141,9 +135,9 @@ def test_dg_volume(ctx_factory): for variant in [ #variant_basic, #variant_more_per_work_group, - variant_image_d, + #variant_image_d, #variant_prefetch_d, - #variant_prefetch_fields, + variant_prefetch_fields, #variant_k_ilp, #variant_simple_padding, #variant_fancy_padding