diff --git a/loopy/statistics.py b/loopy/statistics.py index 1e61e07ed3ec68f308c6dfa7393c99cb681ba135..af9d6d47dc53e2e1ef6d585d368365f5ffa41c57 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1061,7 +1061,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): return mult_grid_factor(g_used, gsize) * mult_grid_factor(l_used, lsize) -def count_insn_runs(knl, insn, disregard_local_axes=False): +def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False): insn_inames = knl.insn_inames(insn) if disregard_local_axes: @@ -1077,17 +1077,21 @@ def count_insn_runs(knl, insn, disregard_local_axes=False): set=[], params=knl.outer_params()) c = count(knl, domain, space=space) - unused_fac = get_unused_hw_axes_factor(knl, insn, - disregard_local_axes=disregard_local_axes, - space=space) - return c * unused_fac + + if count_redundant_work: + unused_fac = get_unused_hw_axes_factor(knl, insn, + disregard_local_axes=disregard_local_axes, + space=space) + return c * unused_fac + else: + return c # }}} # {{{ get_op_map -def get_op_map(knl, numpy_types=True): +def get_op_map(knl, numpy_types=True, count_redundant_work=False): """Count the number of operations in a loopy kernel. @@ -1097,6 +1101,12 @@ def get_op_map(knl, numpy_types=True): in the returned mapping should be numpy types instead of :class:`loopy.LoopyType`. + :arg count_redundant_work: Based on usage of hardware axes or other + specifics, a kernel may perform work redundantly. This :class:`bool` + flag indicates whether this work should be included in the count. + (Likely desirable for performance modeling, but undesirable for + code optimization.) + :return: A :class:`ToCountMap` of **{** :class:`Op` **:** :class:`islpy.PwQPolynomial` **}**. @@ -1128,7 +1138,9 @@ def get_op_map(knl, numpy_types=True): op_counter = ExpressionOpCounter(knl) for insn in knl.instructions: ops = op_counter(insn.assignee) + op_counter(insn.expression) - op_map = op_map + ops*count_insn_runs(knl, insn) + op_map = op_map + ops*count_insn_runs( + knl, insn, + count_redundant_work=count_redundant_work) if numpy_types: op_map.count_map = dict((Op(dtype=op.dtype.numpy_dtype, name=op.name), @@ -1142,7 +1154,7 @@ def get_op_map(knl, numpy_types=True): # {{{ get_mem_access_map -def get_mem_access_map(knl, numpy_types=True): +def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): """Count the number of memory accesses in a loopy kernel. :arg knl: A :class:`loopy.LoopKernel` whose memory accesses are to be @@ -1152,6 +1164,11 @@ def get_mem_access_map(knl, numpy_types=True): in the returned mapping should be numpy types instead of :class:`loopy.LoopyType`. + :arg count_redundant_work: Based on usage of hardware axes or other + specifics, a kernel may perform work redundantly. This :class:`bool` + flag indicates whether this work should be included in the count. + (Likely desirable for performance modeling, but undesirable for + code optimization.) :return: A :class:`ToCountMap` of **{** :class:`MemAccess` **:** :class:`islpy.PwQPolynomial` **}**. @@ -1208,7 +1225,9 @@ def get_mem_access_map(knl, numpy_types=True): @memoize_in(cache_holder, "insn_count") def get_insn_count(knl, insn_id, uniform=False): insn = knl.id_to_insn[insn_id] - return count_insn_runs(knl, insn, disregard_local_axes=uniform) + return count_insn_runs( + knl, insn, disregard_local_axes=uniform, + count_redundant_work=count_redundant_work) knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl)