diff --git a/MEMO b/MEMO index ac53d89d97e890590f84554cf90eae627b2d4e81..504788e49e013b757708607ec81e9431d364907b 100644 --- a/MEMO +++ b/MEMO @@ -45,12 +45,8 @@ To-do - Kernel splitting (via what variables get computed in a kernel) -- Make xfail test for strided access. - - Test join_inames -- *_dimension -> *_iname - - Debug axpy nondet fail - Make tests run on GPUs @@ -103,16 +99,21 @@ Future ideas - DMA engine threads? -- Divisibility, modulo, strides? - - Try, fix indirect addressing - Nested slab decomposition (in conjunction with conditional hoisting) could generate nested conditional code. +- Better code for strides. + Dealt with ^^^^^^^^^^ +- Divisibility, modulo, strides? + -> Tested, gives correct (but suboptimal) code. + +- *_dimension -> *_iname + - Use gists (why do disjoint sets arise?) - Automatically verify that all array access is within bounds. diff --git a/test/test_loopy.py b/test/test_loopy.py index 0203bb1d8fbd84e297415a6e6754e12c98870187..c3029091538ed94ff2558e1080fab8c3b0467792 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -558,6 +558,70 @@ def test_equality_constraints(ctx_factory): +def test_stride(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + + order = "C" + + n = 10 + + knl = lp.make_kernel(ctx.devices[0], [ + "{[i]: 0<=i<n and (exists l: i = 2*l)}", + ], + [ + "a[i] = 5", + ], + [ + lp.GlobalArg("a", dtype, shape="n", order=order), + lp.ValueArg("n", np.int32, approximately=1000), + ], + assumptions="n>=1") + + seq_knl = knl + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen, dict(n=n)) + + lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, + parameters=dict(n=n), fills_entire_output=False) + + + + +def test_domain_dependency_via_existentially_quantified_variable(ctx_factory): + dtype = np.float32 + ctx = ctx_factory() + + order = "C" + + n = 10 + + knl = lp.make_kernel(ctx.devices[0], [ + "{[i]: 0<=i<n }", + "{[k]: k=i and (exists l: k = 2*l) }", + ], + [ + "a[i] = 5 {id=set}", + "a[k] = 6 {dep=set}", + ], + [ + lp.GlobalArg("a", dtype, shape="n", order=order), + lp.ValueArg("n", np.int32, approximately=1000), + ], + assumptions="n>=1") + + seq_knl = knl + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen, dict(n=n)) + + lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, + parameters=dict(n=n), ) + + + + # {{{ test race detection def test_ilp_write_race_detection_global(ctx_factory):