diff --git a/MEMO b/MEMO index 4d1acf21bc21bc9189152406048a1b217f6f78fc..dee9d32f850bd5b22dfedf81fdf64d78e8f871c7 100644 --- a/MEMO +++ b/MEMO @@ -49,8 +49,6 @@ To-do - Fuse: store/fetch elimination? -- divisibility constraints - Fixes: - Group instructions by dependency/inames for scheduling, to @@ -109,6 +107,8 @@ Future ideas Dealt with ^^^^^^^^^^ +- test divisibility constraints + - Test join_inames - Divisibility, modulo, strides? diff --git a/test/test_loopy.py b/test/test_loopy.py index 3849318bae0234081646ed1fb3717d9a7ec12436..e748e7cc9f7187be73e11dcfe2b05e4e4ca0ca59 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -89,6 +89,38 @@ def test_join_inames(ctx_factory): +def test_divisibility_assumption(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel(ctx.devices[0], + "[n] -> {[i]: 0<=i<n}", + [ + "b[i] = 2*a[i]" + ], + [ + lp.GlobalArg("a", np.float32, shape=("n",)), + lp.GlobalArg("b", np.float32, shape=("n",)), + lp.ValueArg("n", np.int32), + ], + assumptions="n>=1 and (exists zz: n = 16*zz)") + + ref_knl = knl + + knl = lp.split_iname(knl, "i", 16) + + for k in lp.generate_loop_schedules(knl): + code = lp.generate_code(k) + assert "if" not in code + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen) + + lp.auto_test_vs_ref(ref_knl, ctx, kernel_gen, + parameters={"n": 16**3}) + + + + def test_multi_cse(ctx_factory): ctx = ctx_factory()