diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 36fbb49f4bb77c959877fb0bd21e1de6fb49c74b..2986e3625cf54bb38de2a597752291a9ef0564a4 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -583,6 +583,14 @@ def dim_max_with_elimination(obj, idx): # {{{ get_simple_strides +def get_dim_idx_and_coeff(dts, aff_like): + return [ + (dt, dim_idx, aff_like.get_coefficient_val(dt, dim_idx)) + for dt in dts + for dim_idx in range(aff_like.dim(dt)) + if not aff_like.get_coefficient_val(dt, dim_idx).is_zero()] + + def get_simple_strides(bset, key_by="name"): """Return a dictionary from inames to strides in bset. Each stride is returned as a :class:`islpy.Val`. If no stride can be determined, the @@ -598,50 +606,84 @@ def get_simple_strides(bset, key_by="name"): assert len(comp_div_set_pieces) == 1 bset, = comp_div_set_pieces - lspace = bset.get_local_space() - for idiv in range(lspace.dim(dim_type.div)): - div = lspace.get_div(idiv) - - # check for sub-divs - supported = True - for dim_idx in range(div.dim(dim_type.div)): - coeff_val = div.get_coefficient_val(dim_type.div, dim_idx) - if not coeff_val.is_zero(): - # sub-divs not supported - supported = False - break + for constr in bset.get_constraints(): + if not constr.is_equality(): + continue + + # Pick apart constraints of the form + # Constraint("[n] -> { [i] : 2*floor((i)/2) + -1*i = 0 }") + # (and only those) + + aff = constr.get_aff() + + relevant_div_indices = get_dim_idx_and_coeff([dim_type.div], aff) + + if aff.get_denominator_val().to_python() != 1: + # not supported + continue + + if len(relevant_div_indices) == 0: + # won't cause striding + continue + + if len(relevant_div_indices) > 1: + # not supported + continue - if not supported: + (_, div_idx, div_coeff), = relevant_div_indices + div_coeff = div_coeff.to_python() + div = aff.get_div(div_idx) + + if get_dim_idx_and_coeff([dim_type.div], div): + # sub-divs not supported continue - denom = div.get_denominator_val().to_python() + in_div_denom = div.get_denominator_val().to_python() + + in_div_inames_and_coeffs = get_dim_idx_and_coeff( + [dim_type.param, dim_type.in_], div) + + if len(in_div_inames_and_coeffs) != 1: + continue - inames_and_coeffs = [] - for dt in [dim_type.param, dim_type.in_]: - for dim_idx in range(div.dim(dt)): - coeff_val = div.get_coefficient_val(dt, dim_idx) * denom - if not coeff_val.is_zero(): - inames_and_coeffs.append((dt, dim_idx, coeff_val)) + (in_div_dt, in_div_dim_idx, in_div_coeff), = in_div_inames_and_coeffs + in_div_coeff = (in_div_coeff * in_div_denom).to_python() - if len(inames_and_coeffs) != 1: + if in_div_coeff != 1: + # not supported + continue + if div_coeff != in_div_denom: + # not supported + continue + if in_div_dt == dim_type.param: + # not a stride, don't care continue + assert in_div_dt == dim_type.in_ - (dt, dim_idx, coeff), = inames_and_coeffs + aff_inames_and_coeffs = get_dim_idx_and_coeff( + [dim_type.param, dim_type.in_], aff) - if coeff != 1: + if len(aff_inames_and_coeffs) != 1: + # not supported + continue + + (aff_dt, aff_dim_idx, aff_coeff), = aff_inames_and_coeffs + + if not (aff_coeff + 1).is_zero(): # must be -1 + # not supported + continue + if (aff_dt, aff_dim_idx) != (dim_type.in_, in_div_dim_idx): # not supported continue if key_by == "name": - key = bset.get_dim_name(dt, dim_idx) + key = bset.get_dim_name(in_div_dt, in_div_dim_idx) elif key_by == "index": - key_dt = dt if dt != dim_type.in_ else dim_type.set - - key = (key_dt, dim_idx) + key = (dim_type.set, in_div_dim_idx) else: raise ValueError("invalid value of 'key_by") - result[key] = denom + result[key] = in_div_denom return result diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 14b18150f5b84218f39ba23662eb6106ffb596a0..61106e12095e382fd9987f7c91e6e5caedcc7fb5 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -942,7 +942,10 @@ def parse_instructions(instructions, defines): # {{{ domain parsing -EMPTY_SET_DIMS_RE = re.compile(r"^\s*\{\s*\:") +EMPTY_SET_DIMS_RE = re.compile( + r"^\s*\{\s*" + r"(?:\[\])?" + r"\s*\:") SET_DIMS_RE = re.compile(r"^\s*\{\s*\[([a-zA-Z0-9_, ]+)\]\s*\:") diff --git a/loopy/statistics.py b/loopy/statistics.py index cb15eb55498bcafe4ae537747e387e47ddbd8254..97e9651838dff958cc00a08c9597e26534930b1e 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -940,7 +940,11 @@ class AccessFootprintGatherer(CombineMapper): # {{{ count -def count(kernel, set): +def count(set, kernel=None, sloppy=False): + """ + :arg kernel: only used for error reporting, may be *None* + """ + try: return set.card() except AttributeError: @@ -1003,6 +1007,8 @@ def count(kernel, set): # }}} + # end for (i) + if bset_count is not None: count += bset_count @@ -1011,23 +1017,37 @@ def count(kernel, set): if not (is_subset and is_superset): if is_subset: - warn_with_kernel(kernel, "count_overestimate", - "Barvinok wrappers are not installed. " - "Counting routines have overestimated the " + warn_tp = "count_overestimate" + msg = ("Fallback counting routines have overestimated the " "number of integer points in your loop " "domain.") elif is_superset: - warn_with_kernel(kernel, "count_underestimate", - "Barvinok wrappers are not installed. " - "Counting routines have underestimated the " + warn_tp = "count_underestimate" + msg = ("Fallback counting routines have underestimated the " "number of integer points in your loop " "domain.") else: - warn_with_kernel(kernel, "count_misestimate", - "Barvinok wrappers are not installed. " - "Counting routines have misestimated the " + warn_tp = "count_misestimate", + msg = ("Fallback counting routines have misestimated the " "number of integer points in your loop " "domain.") + msg = (msg + + " Correct estimates in all cases can be obtained " + "by compiling islpy with Barvinok wrappers. This " + "will automatically stop using the (simple-minded) " + "fallback counting routines.") + + if sloppy: + if kernel is not None: + warn_with_kernel(kernel, warn_tp, msg) + else: + from warnings import warn + warn(msg) + else: + raise LoopyError(msg + + " You may ignore this error " + "(at your own peril--you'll get incorrect results) " + "by passing sloppy=True.") return count @@ -1097,7 +1117,7 @@ def get_op_map(knl, numpy_types=True): domain = (inames_domain.project_out_except( insn_inames, [dim_type.set])) ops = op_counter(insn.assignee) + op_counter(insn.expression) - op_map = op_map + ops*count(knl, domain) + op_map = op_map + ops*count(domain, kernel=knl) if numpy_types: op_map.count_map = dict((Op(dtype=op.dtype.numpy_dtype, name=op.name), @@ -1225,7 +1245,7 @@ def get_mem_access_map(knl, numpy_types=True): inames_domain = knl.get_inames_domain(insn_inames) domain = (inames_domain.project_out_except( insn_inames, [dim_type.set])) - return count(knl, domain) + return count(domain, kernel=knl) knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) @@ -1348,10 +1368,10 @@ def get_synchronization_map(knl): def get_count_poly(iname_list): if iname_list: # (if iname_list is not empty) - ct = (count(knl, ( - knl.get_inames_domain(iname_list). - project_out_except(iname_list, [dim_type.set]) - )), ) + ct = (count( + knl.get_inames_domain(iname_list) + .project_out_except(iname_list, [dim_type.set]), + kernel=knl), ) return reduce(mul, ct) else: return one @@ -1467,7 +1487,7 @@ def gather_access_footprint_bytes(kernel, ignore_uncountable=False): var_descr = kernel.get_var_descriptor(vname) bytes_transferred = ( int(var_descr.dtype.numpy_dtype.itemsize) - * count(kernel, var_fp)) + * count(var_fp, kernel=kernel)) if key in result: result[key] += bytes_transferred else: diff --git a/test/test_statistics.py b/test/test_statistics.py index 5e363f13594ee8e4cf170faa232b0783cca9d018..7fd4ff37a85c7439e93c1780e09685d7bb6823e7 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -29,9 +29,11 @@ from pyopencl.tools import ( # noqa as pytest_generate_tests) import loopy as lp from loopy.types import to_loopy_type +from loopy.diagnostic import LoopyError import numpy as np from pymbolic.primitives import Variable +import pytest def test_op_counter_basic(): @@ -704,7 +706,7 @@ def test_gather_access_footprint(): fp = gather_access_footprints(knl) for key, footprint in six.iteritems(fp): - print(key, count(knl, footprint)) + print(key, count(footprint, kernel=knl)) def test_gather_access_footprint_2(): @@ -719,8 +721,8 @@ def test_gather_access_footprint_2(): params = {"n": 200} for key, footprint in six.iteritems(fp): - assert count(knl, footprint).eval_with_dict(params) == 200 - print(key, count(knl, footprint)) + assert count(footprint, kernel=knl).eval_with_dict(params) == 200 + print(key, count(footprint, kernel=knl)) def test_summations_and_filters(): @@ -802,6 +804,46 @@ def test_summations_and_filters(): assert s1f64l == 2*n*m +def test_count_with_div_bounds(): + from loopy.statistics import count + import islpy as isl + + knl = lp.make_kernel("{[]: }", [""" """]) + + s = isl.Set("[n] -> { [i0, i1] : 96*floor((n)/96) = n and n > 0 " + "and i0 >= 0 and i1 >= 0 and 16*floor((i0)/16) <= -16 + n " + "and 16*floor((i1)/16) <= -16 + n }") + ct = count(s) + + print(ct) + expected = isl.PwQPolynomial( + "[n] -> { n^2 : 96*floor((n)/96) = n and n >= 16 }" + ) + + assert ct.plain_is_equal(expected) + + +def test_count_with_strides(): + from loopy.statistics import count + import islpy as isl + + s = isl.Set("[n] -> { [i] : 0 <= i < n and i mod 2 = 0 }") + ct = count(s) + + expected = isl.PwQPolynomial("[n] -> { (n - floor((n)/2)) : n > 0 }") + + assert ct.plain_is_equal(expected) + + +def test_count_diagonal_strides(): + from loopy.statistics import count + import islpy as isl + + s = isl.Set("[n] -> { [i,j] : 0 <= i,j < n and (i +j) mod 2 = 0 }") + with pytest.raises(LoopyError): + count(s) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])