diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py
index 36fbb49f4bb77c959877fb0bd21e1de6fb49c74b..2986e3625cf54bb38de2a597752291a9ef0564a4 100644
--- a/loopy/isl_helpers.py
+++ b/loopy/isl_helpers.py
@@ -583,6 +583,14 @@ def dim_max_with_elimination(obj, idx):
 
 # {{{ get_simple_strides
 
+def get_dim_idx_and_coeff(dts, aff_like):
+    return [
+            (dt, dim_idx, aff_like.get_coefficient_val(dt, dim_idx))
+            for dt in dts
+            for dim_idx in range(aff_like.dim(dt))
+            if not aff_like.get_coefficient_val(dt, dim_idx).is_zero()]
+
+
 def get_simple_strides(bset, key_by="name"):
     """Return a dictionary from inames to strides in bset. Each stride is
     returned as a :class:`islpy.Val`. If no stride can be determined, the
@@ -598,50 +606,84 @@ def get_simple_strides(bset, key_by="name"):
     assert len(comp_div_set_pieces) == 1
     bset, = comp_div_set_pieces
 
-    lspace = bset.get_local_space()
-    for idiv in range(lspace.dim(dim_type.div)):
-        div = lspace.get_div(idiv)
-
-        # check for sub-divs
-        supported = True
-        for dim_idx in range(div.dim(dim_type.div)):
-            coeff_val = div.get_coefficient_val(dim_type.div, dim_idx)
-            if not coeff_val.is_zero():
-                # sub-divs not supported
-                supported = False
-                break
+    for constr in bset.get_constraints():
+        if not constr.is_equality():
+            continue
+
+        # Pick apart constraints of the form
+        # Constraint("[n] -> { [i] : 2*floor((i)/2) + -1*i = 0 }")
+        # (and only those)
+
+        aff = constr.get_aff()
+
+        relevant_div_indices = get_dim_idx_and_coeff([dim_type.div], aff)
+
+        if aff.get_denominator_val().to_python() != 1:
+            # not supported
+            continue
+
+        if len(relevant_div_indices) == 0:
+            # won't cause striding
+            continue
+
+        if len(relevant_div_indices) > 1:
+            # not supported
+            continue
 
-        if not supported:
+        (_, div_idx, div_coeff), = relevant_div_indices
+        div_coeff = div_coeff.to_python()
+        div = aff.get_div(div_idx)
+
+        if get_dim_idx_and_coeff([dim_type.div], div):
+            # sub-divs not supported
             continue
 
-        denom = div.get_denominator_val().to_python()
+        in_div_denom = div.get_denominator_val().to_python()
+
+        in_div_inames_and_coeffs = get_dim_idx_and_coeff(
+                [dim_type.param, dim_type.in_], div)
+
+        if len(in_div_inames_and_coeffs) != 1:
+            continue
 
-        inames_and_coeffs = []
-        for dt in [dim_type.param, dim_type.in_]:
-            for dim_idx in range(div.dim(dt)):
-                coeff_val = div.get_coefficient_val(dt, dim_idx) * denom
-                if not coeff_val.is_zero():
-                    inames_and_coeffs.append((dt, dim_idx, coeff_val))
+        (in_div_dt, in_div_dim_idx, in_div_coeff), = in_div_inames_and_coeffs
+        in_div_coeff = (in_div_coeff * in_div_denom).to_python()
 
-        if len(inames_and_coeffs) != 1:
+        if in_div_coeff != 1:
+            # not supported
+            continue
+        if div_coeff != in_div_denom:
+            # not supported
+            continue
+        if in_div_dt == dim_type.param:
+            # not a stride, don't care
             continue
+        assert in_div_dt == dim_type.in_
 
-        (dt, dim_idx, coeff), = inames_and_coeffs
+        aff_inames_and_coeffs = get_dim_idx_and_coeff(
+                [dim_type.param, dim_type.in_], aff)
 
-        if coeff != 1:
+        if len(aff_inames_and_coeffs) != 1:
+            # not supported
+            continue
+
+        (aff_dt, aff_dim_idx, aff_coeff), = aff_inames_and_coeffs
+
+        if not (aff_coeff + 1).is_zero():  # must be -1
+            # not supported
+            continue
+        if (aff_dt, aff_dim_idx) != (dim_type.in_, in_div_dim_idx):
             # not supported
             continue
 
         if key_by == "name":
-            key = bset.get_dim_name(dt, dim_idx)
+            key = bset.get_dim_name(in_div_dt, in_div_dim_idx)
         elif key_by == "index":
-            key_dt = dt if dt != dim_type.in_ else dim_type.set
-
-            key = (key_dt, dim_idx)
+            key = (dim_type.set, in_div_dim_idx)
         else:
             raise ValueError("invalid value of 'key_by")
 
-        result[key] = denom
+        result[key] = in_div_denom
 
     return result
 
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index 14b18150f5b84218f39ba23662eb6106ffb596a0..61106e12095e382fd9987f7c91e6e5caedcc7fb5 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -942,7 +942,10 @@ def parse_instructions(instructions, defines):
 
 # {{{ domain parsing
 
-EMPTY_SET_DIMS_RE = re.compile(r"^\s*\{\s*\:")
+EMPTY_SET_DIMS_RE = re.compile(
+        r"^\s*\{\s*"
+        r"(?:\[\])?"
+        r"\s*\:")
 SET_DIMS_RE = re.compile(r"^\s*\{\s*\[([a-zA-Z0-9_, ]+)\]\s*\:")
 
 
diff --git a/loopy/statistics.py b/loopy/statistics.py
index cb15eb55498bcafe4ae537747e387e47ddbd8254..97e9651838dff958cc00a08c9597e26534930b1e 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -940,7 +940,11 @@ class AccessFootprintGatherer(CombineMapper):
 
 # {{{ count
 
-def count(kernel, set):
+def count(set, kernel=None, sloppy=False):
+    """
+    :arg kernel: only used for error reporting, may be *None*
+    """
+
     try:
         return set.card()
     except AttributeError:
@@ -1003,6 +1007,8 @@ def count(kernel, set):
 
             # }}}
 
+        # end for (i)
+
         if bset_count is not None:
             count += bset_count
 
@@ -1011,23 +1017,37 @@ def count(kernel, set):
 
         if not (is_subset and is_superset):
             if is_subset:
-                warn_with_kernel(kernel, "count_overestimate",
-                        "Barvinok wrappers are not installed. "
-                        "Counting routines have overestimated the "
+                warn_tp = "count_overestimate"
+                msg = ("Fallback counting routines have overestimated the "
                         "number of integer points in your loop "
                         "domain.")
             elif is_superset:
-                warn_with_kernel(kernel, "count_underestimate",
-                        "Barvinok wrappers are not installed. "
-                        "Counting routines have underestimated the "
+                warn_tp = "count_underestimate"
+                msg = ("Fallback counting routines have underestimated the "
                         "number of integer points in your loop "
                         "domain.")
             else:
-                warn_with_kernel(kernel, "count_misestimate",
-                        "Barvinok wrappers are not installed. "
-                        "Counting routines have misestimated the "
+                warn_tp = "count_misestimate",
+                msg = ("Fallback counting routines have misestimated the "
                         "number of integer points in your loop "
                         "domain.")
+            msg = (msg +
+                    " Correct estimates in all cases can be obtained "
+                    "by compiling islpy with Barvinok wrappers. This "
+                    "will automatically stop using the (simple-minded) "
+                    "fallback counting routines.")
+
+            if sloppy:
+                if kernel is not None:
+                    warn_with_kernel(kernel, warn_tp, msg)
+                else:
+                    from warnings import warn
+                    warn(msg)
+            else:
+                raise LoopyError(msg
+                    + " You may ignore this error "
+                    "(at your own peril--you'll get incorrect results) "
+                    "by passing sloppy=True.")
 
     return count
 
@@ -1097,7 +1117,7 @@ def get_op_map(knl, numpy_types=True):
         domain = (inames_domain.project_out_except(
                                         insn_inames, [dim_type.set]))
         ops = op_counter(insn.assignee) + op_counter(insn.expression)
-        op_map = op_map + ops*count(knl, domain)
+        op_map = op_map + ops*count(domain, kernel=knl)
 
     if numpy_types:
         op_map.count_map = dict((Op(dtype=op.dtype.numpy_dtype, name=op.name),
@@ -1225,7 +1245,7 @@ def get_mem_access_map(knl, numpy_types=True):
         inames_domain = knl.get_inames_domain(insn_inames)
         domain = (inames_domain.project_out_except(
                                 insn_inames, [dim_type.set]))
-        return count(knl, domain)
+        return count(domain, kernel=knl)
 
     knl = infer_unknown_types(knl, expect_completion=True)
     knl = preprocess_kernel(knl)
@@ -1348,10 +1368,10 @@ def get_synchronization_map(knl):
 
     def get_count_poly(iname_list):
         if iname_list:  # (if iname_list is not empty)
-            ct = (count(knl, (
-                            knl.get_inames_domain(iname_list).
-                            project_out_except(iname_list, [dim_type.set])
-                            )), )
+            ct = (count(
+                            knl.get_inames_domain(iname_list)
+                            .project_out_except(iname_list, [dim_type.set]),
+                            kernel=knl), )
             return reduce(mul, ct)
         else:
             return one
@@ -1467,7 +1487,7 @@ def gather_access_footprint_bytes(kernel, ignore_uncountable=False):
         var_descr = kernel.get_var_descriptor(vname)
         bytes_transferred = (
                 int(var_descr.dtype.numpy_dtype.itemsize)
-                * count(kernel, var_fp))
+                * count(var_fp, kernel=kernel))
         if key in result:
             result[key] += bytes_transferred
         else:
diff --git a/test/test_statistics.py b/test/test_statistics.py
index 5e363f13594ee8e4cf170faa232b0783cca9d018..7fd4ff37a85c7439e93c1780e09685d7bb6823e7 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -29,9 +29,11 @@ from pyopencl.tools import (  # noqa
         as pytest_generate_tests)
 import loopy as lp
 from loopy.types import to_loopy_type
+from loopy.diagnostic import LoopyError
 import numpy as np
 
 from pymbolic.primitives import Variable
+import pytest
 
 
 def test_op_counter_basic():
@@ -704,7 +706,7 @@ def test_gather_access_footprint():
     fp = gather_access_footprints(knl)
 
     for key, footprint in six.iteritems(fp):
-        print(key, count(knl, footprint))
+        print(key, count(footprint, kernel=knl))
 
 
 def test_gather_access_footprint_2():
@@ -719,8 +721,8 @@ def test_gather_access_footprint_2():
 
     params = {"n": 200}
     for key, footprint in six.iteritems(fp):
-        assert count(knl, footprint).eval_with_dict(params) == 200
-        print(key, count(knl, footprint))
+        assert count(footprint, kernel=knl).eval_with_dict(params) == 200
+        print(key, count(footprint, kernel=knl))
 
 
 def test_summations_and_filters():
@@ -802,6 +804,46 @@ def test_summations_and_filters():
     assert s1f64l == 2*n*m
 
 
+def test_count_with_div_bounds():
+    from loopy.statistics import count
+    import islpy as isl
+
+    knl = lp.make_kernel("{[]: }", [""" """])
+
+    s = isl.Set("[n] -> { [i0, i1] : 96*floor((n)/96) = n and n > 0 "
+                "and i0 >= 0 and i1 >= 0 and 16*floor((i0)/16) <= -16 + n "
+                "and 16*floor((i1)/16) <= -16 + n }")
+    ct = count(s)
+
+    print(ct)
+    expected = isl.PwQPolynomial(
+            "[n] -> { n^2 : 96*floor((n)/96) = n and n >= 16 }"
+            )
+
+    assert ct.plain_is_equal(expected)
+
+
+def test_count_with_strides():
+    from loopy.statistics import count
+    import islpy as isl
+
+    s = isl.Set("[n] -> { [i] : 0 <= i < n and i mod 2 = 0 }")
+    ct = count(s)
+
+    expected = isl.PwQPolynomial("[n] -> { (n - floor((n)/2)) : n > 0 }")
+
+    assert ct.plain_is_equal(expected)
+
+
+def test_count_diagonal_strides():
+    from loopy.statistics import count
+    import islpy as isl
+
+    s = isl.Set("[n] -> { [i,j] : 0 <= i,j < n and (i +j) mod 2 = 0 }")
+    with pytest.raises(LoopyError):
+        count(s)
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])