diff --git a/loopy/statistics.py b/loopy/statistics.py index e27a0f482885658888c97081e4fc1d97fcd149fd..0ff82e5abb30eca279c66f8ce1cc0ce08dfc68a2 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1197,6 +1197,9 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False): insn_inames = knl.insn_inames(insn) + if not insn_inames: + p = isl.PwQPolynomial('{ 1 }') + return GuardedPwQPolynomial(p, isl.Set.universe(p.domain().space)) if disregard_local_axes: from loopy.kernel.data import LocalIndexTag diff --git a/test/test_statistics.py b/test/test_statistics.py index ea0bdb62bb75d8a5bcf7dd987c00c33b848091fd..14944407c9573fcbe69acf28a8e83524a76a1c31 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -71,6 +71,24 @@ def test_op_counter_basic(): assert i32add == n*m*2 +def test_op_outsite(): + + data = [lp.GlobalArg("a", shape=(10,), dtype=float), + lp.GlobalArg("b", shape=(10,), dtype=float), + lp.GlobalArg("c", shape=(10,), dtype=float)] + + knl = lp.make_kernel( + "[] -> {[]}", + "a[0] = b[0] + c[0]", + kernel_data=data + ) + + op = lp.get_op_map(knl) + flops = op.sum().eval_with_dict({}) + + assert flops == 1 + + def test_op_counter_reduction(): knl = lp.make_kernel(