diff --git a/loopy/statistics.py b/loopy/statistics.py
index d04b84e3cfda464825334b48b489290d5c025356..91d15e7e079d250ec879fb84be45e7bb40fe4320 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -529,7 +529,7 @@ class Op(Record):
                             count_granularity=count_granularity)
 
     def __hash__(self):
-        return hash(str(self))
+        return hash(repr(self))
 
     def __repr__(self):
         # Record.__repr__ overridden for consistent ordering and conciseness
@@ -612,14 +612,16 @@ class MemAccess(Record):
                             variable=variable, count_granularity=count_granularity)
 
     def __hash__(self):
-        return hash(str(self))
+        # Note that this means lid_strides must be sorted in self.__repr__()
+        return hash(repr(self))
 
     def __repr__(self):
         # Record.__repr__ overridden for consistent ordering and conciseness
         return "MemAccess(%s, %s, %s, %s, %s, %s)" % (
             self.mtype,
             self.dtype,
-            self.lid_strides,
+            None if self.lid_strides is None else dict(
+                sorted(six.iteritems(self.lid_strides))),
             self.direction,
             self.variable,
             self.count_granularity)
@@ -870,7 +872,7 @@ class GlobalMemAccessCounter(MemAccessCounter):
             return ToCountMap()
 
         return ToCountMap({MemAccess(mtype='global',
-                                     dtype=self.type_inf(expr), lid_strides=[],
+                                     dtype=self.type_inf(expr), lid_strides={},
                                      variable=name,
                                      count_granularity=CountGranularity.WORKITEM): 1}
                           ) + self.rec(expr.index)
@@ -906,18 +908,18 @@ class GlobalMemAccessCounter(MemAccessCounter):
         if not lid_to_iname:
 
             # no local id found, count as uniform access
-            # Note:
-            # lid_strides=[] when no local ids were found,
-            # lid_strides=[0, ...] if any local id is found and the lid0 stride is 0,
-            # either because no lid0 is found or because the stride of lid0 is 0
+            # Note, a few different cases may be considered uniform:
+            # lid_strides={} if no local ids were found,
+            # lid_strides={1:1, 2:32} if no local id 0 was found,
+            # lid_strides={0:0, ...} if a local id 0 is found and its stride is 0
             warn_with_kernel(self.knl, "no_lid_found",
                              "GlobalSubscriptCounter: No local id found, "
-                             "setting lid_strides to []. Expression: %s"
+                             "setting lid_strides to {}. Expression: %s"
                              % (expr))
 
             return ToCountMap({MemAccess(
                                 mtype='global',
-                                dtype=self.type_inf(expr), lid_strides=[],
+                                dtype=self.type_inf(expr), lid_strides={},
                                 variable=name,
                                 count_granularity=CountGranularity.SUBGROUP): 1}
                               ) + self.rec(expr.index)
@@ -965,18 +967,18 @@ class GlobalMemAccessCounter(MemAccessCounter):
             lid_strides[ltag] = ltag_stride
 
         # insert 0s for coeffs of missing *lesser* lids
-        for i in range(max(lid_strides.keys())+1):
-            if i not in lid_strides.keys():
-                lid_strides[i] = 0
+        #for i in range(max(lid_strides.keys())+1):
+        #    if i not in lid_strides.keys():
+        #        lid_strides[i] = 0
 
-        count_granularity = CountGranularity.WORKITEM if lid_strides[0] != 0 \
-                                else CountGranularity.SUBGROUP
+        count_granularity = CountGranularity.WORKITEM if (
+                                0 in lid_strides and lid_strides[0] != 0
+                                ) else CountGranularity.SUBGROUP
 
         return ToCountMap({MemAccess(
                             mtype='global',
                             dtype=self.type_inf(expr),
-                            lid_strides=[lid_strides[i]
-                                         for i in sorted(lid_strides)],
+                            lid_strides=dict(sorted(six.iteritems(lid_strides))),
                             variable=name,
                             count_granularity=count_granularity
                             ): 1}
diff --git a/test/test_statistics.py b/test/test_statistics.py
index cc81e1592d9cd57fc26357dce6bf3b8d55129174..e42c43f60179321114fb695978cc1c91f182e8ee 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -269,19 +269,19 @@ def test_mem_access_counter_basic():
     subgroups_per_group = div_ceil(group_size, subgroup_size)
 
     f32l = mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='a',
+                         lid_strides={}, direction='load', variable='a',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     f32l += mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='b',
+                         lid_strides={}, direction='load', variable='b',
                          count_granularity=CG.SUBGROUP)
                     ].eval_with_dict(params)
     f64l = mem_map[lp.MemAccess('global', np.float64,
-                         lid_strides=[], direction='load', variable='g',
+                         lid_strides={}, direction='load', variable='g',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     f64l += mem_map[lp.MemAccess('global', np.float64,
-                         lid_strides=[], direction='load', variable='h',
+                         lid_strides={}, direction='load', variable='h',
                          count_granularity=CG.SUBGROUP)
                     ].eval_with_dict(params)
 
@@ -290,11 +290,11 @@ def test_mem_access_counter_basic():
     assert f64l == (2*n*m)*n_workgroups*subgroups_per_group
 
     f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                         lid_strides=[], direction='store', variable='c',
+                         lid_strides={}, direction='store', variable='c',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     f64s = mem_map[lp.MemAccess('global', np.dtype(np.float64),
-                         lid_strides=[], direction='store', variable='e',
+                         lid_strides={}, direction='store', variable='e',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
 
@@ -328,11 +328,11 @@ def test_mem_access_counter_reduction():
     subgroups_per_group = div_ceil(group_size, subgroup_size)
 
     f32l = mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='a',
+                         lid_strides={}, direction='load', variable='a',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     f32l += mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='b',
+                         lid_strides={}, direction='load', variable='b',
                          count_granularity=CG.SUBGROUP)
                     ].eval_with_dict(params)
 
@@ -340,7 +340,7 @@ def test_mem_access_counter_reduction():
     assert f32l == (2*n*m*ell)*n_workgroups*subgroups_per_group
 
     f32s = mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                         lid_strides=[], direction='store', variable='c',
+                         lid_strides={}, direction='store', variable='c',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
 
@@ -430,19 +430,19 @@ def test_mem_access_counter_specialops():
     subgroups_per_group = div_ceil(group_size, subgroup_size)
 
     f32 = mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='a',
+                         lid_strides={}, direction='load', variable='a',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
     f32 += mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='load', variable='b',
+                         lid_strides={}, direction='load', variable='b',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     f64 = mem_map[lp.MemAccess('global', np.dtype(np.float64),
-                         lid_strides=[], direction='load', variable='g',
+                         lid_strides={}, direction='load', variable='g',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
     f64 += mem_map[lp.MemAccess('global', np.dtype(np.float64),
-                         lid_strides=[], direction='load', variable='h',
+                         lid_strides={}, direction='load', variable='h',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
 
@@ -451,11 +451,11 @@ def test_mem_access_counter_specialops():
     assert f64 == (2*n*m)*n_workgroups*subgroups_per_group
 
     f32 = mem_map[lp.MemAccess('global', np.float32,
-                         lid_strides=[], direction='store', variable='c',
+                         lid_strides={}, direction='store', variable='c',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
     f64 = mem_map[lp.MemAccess('global', np.float64,
-                         lid_strides=[], direction='store', variable='e',
+                         lid_strides={}, direction='store', variable='e',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
 
@@ -502,19 +502,19 @@ def test_mem_access_counter_bitwise():
     subgroups_per_group = div_ceil(group_size, subgroup_size)
 
     i32 = mem_map[lp.MemAccess('global', np.int32,
-                         lid_strides=[], direction='load', variable='a',
+                         lid_strides={}, direction='load', variable='a',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
     i32 += mem_map[lp.MemAccess('global', np.int32,
-                         lid_strides=[], direction='load', variable='b',
+                         lid_strides={}, direction='load', variable='b',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     i32 += mem_map[lp.MemAccess('global', np.int32,
-                         lid_strides=[], direction='load', variable='g',
+                         lid_strides={}, direction='load', variable='g',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
     i32 += mem_map[lp.MemAccess('global', np.dtype(np.int32),
-                         lid_strides=[], direction='load', variable='h',
+                         lid_strides={}, direction='load', variable='h',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
 
@@ -522,11 +522,11 @@ def test_mem_access_counter_bitwise():
     assert i32 == (4*n*m+2*n*m*ell)*n_workgroups*subgroups_per_group
 
     i32 = mem_map[lp.MemAccess('global', np.int32,
-                         lid_strides=[], direction='store', variable='c',
+                         lid_strides={}, direction='store', variable='c',
                          count_granularity=CG.SUBGROUP)
                   ].eval_with_dict(params)
     i32 += mem_map[lp.MemAccess('global', np.int32,
-                         lid_strides=[], direction='store', variable='e',
+                         lid_strides={}, direction='store', variable='e',
                          count_granularity=CG.SUBGROUP)
                    ].eval_with_dict(params)
 
@@ -567,24 +567,24 @@ def test_mem_access_counter_mixed():
     mem_map = lp.get_mem_access_map(knl, count_redundant_work=True,
                                     subgroup_size=subgroup_size)
     f64uniform = mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[], direction='load', variable='g',
+                                lid_strides={}, direction='load', variable='g',
                                 count_granularity=CG.SUBGROUP)
                          ].eval_with_dict(params)
     f64uniform += mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[], direction='load', variable='h',
+                                lid_strides={}, direction='load', variable='h',
                                 count_granularity=CG.SUBGROUP)
                           ].eval_with_dict(params)
     f32uniform = mem_map[lp.MemAccess('global', np.float32,
-                                lid_strides=[], direction='load', variable='x',
+                                lid_strides={}, direction='load', variable='x',
                                 count_granularity=CG.SUBGROUP)
                          ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                lid_strides=[Variable('m')], direction='load',
+                                lid_strides={0: Variable('m')}, direction='load',
                                 variable='a',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
     f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                lid_strides=[Variable('m')], direction='load',
+                                lid_strides={0: Variable('m')}, direction='load',
                                 variable='b',
                                 count_granularity=CG.WORKITEM)
                             ].eval_with_dict(params)
@@ -611,11 +611,11 @@ def test_mem_access_counter_mixed():
         assert f32nonconsec == 3*n*m*ell
 
     f64uniform = mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[], direction='store', variable='e',
+                                lid_strides={}, direction='store', variable='e',
                                 count_granularity=CG.SUBGROUP)
                          ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.float32,
-                                lid_strides=[Variable('m')], direction='store',
+                                lid_strides={0: Variable('m')}, direction='store',
                                 variable='c',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
@@ -655,22 +655,22 @@ def test_mem_access_counter_nonconsec():
     ell = 128
     params = {'n': n, 'm': m, 'ell': ell}
     f64nonconsec = mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[Variable('m')], direction='load',
+                                lid_strides={0: Variable('m')}, direction='load',
                                 variable='g',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
     f64nonconsec += mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[Variable('m')], direction='load',
+                                lid_strides={0: Variable('m')}, direction='load',
                                 variable='h',
                                 count_granularity=CG.WORKITEM)
                             ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                lid_strides=[Variable('m')*Variable('ell')],
+                                lid_strides={0: Variable('m')*Variable('ell')},
                                 direction='load', variable='a',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
     f32nonconsec += mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                                lid_strides=[Variable('m')*Variable('ell')],
+                                lid_strides={0: Variable('m')*Variable('ell')},
                                 direction='load', variable='b',
                                 count_granularity=CG.WORKITEM)
                             ].eval_with_dict(params)
@@ -678,12 +678,12 @@ def test_mem_access_counter_nonconsec():
     assert f32nonconsec == 3*n*m*ell
 
     f64nonconsec = mem_map[lp.MemAccess('global', np.float64,
-                                lid_strides=[Variable('m')], direction='store',
+                                lid_strides={0: Variable('m')}, direction='store',
                                 variable='e',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
     f32nonconsec = mem_map[lp.MemAccess('global', np.float32,
-                                lid_strides=[Variable('m')*Variable('ell')],
+                                lid_strides={0: Variable('m')*Variable('ell')},
                                 direction='store', variable='c',
                                 count_granularity=CG.WORKITEM)
                            ].eval_with_dict(params)
@@ -694,20 +694,20 @@ def test_mem_access_counter_nonconsec():
                                       subgroup_size=64)
     f64nonconsec = mem_map64[lp.MemAccess(
                     'global',
-                    np.float64, lid_strides=[Variable('m')],
+                    np.float64, lid_strides={0: Variable('m')},
                     direction='load', variable='g',
                     count_granularity=CG.WORKITEM)
                     ].eval_with_dict(params)
     f64nonconsec += mem_map64[lp.MemAccess(
                     'global',
-                    np.float64, lid_strides=[Variable('m')],
+                    np.float64, lid_strides={0: Variable('m')},
                     direction='load', variable='h',
                     count_granularity=CG.WORKITEM)
                     ].eval_with_dict(params)
     f32nonconsec = mem_map64[lp.MemAccess(
                     'global',
                     np.dtype(np.float32),
-                    lid_strides=[Variable('m')*Variable('ell')],
+                    lid_strides={0: Variable('m')*Variable('ell')},
                     direction='load',
                     variable='a',
                     count_granularity=CG.WORKITEM)
@@ -715,7 +715,7 @@ def test_mem_access_counter_nonconsec():
     f32nonconsec += mem_map64[lp.MemAccess(
                     'global',
                     np.dtype(np.float32),
-                    lid_strides=[Variable('m')*Variable('ell')],
+                    lid_strides={0: Variable('m')*Variable('ell')},
                     direction='load',
                     variable='b',
                     count_granularity=CG.WORKITEM)
@@ -747,30 +747,30 @@ def test_mem_access_counter_consec():
     params = {'n': n, 'm': m, 'ell': ell}
 
     f64consec = mem_map[lp.MemAccess('global', np.float64,
-                        lid_strides=[1], direction='load', variable='g',
+                        lid_strides={0: 1}, direction='load', variable='g',
                         count_granularity=CG.WORKITEM)
                         ].eval_with_dict(params)
     f64consec += mem_map[lp.MemAccess('global', np.float64,
-                        lid_strides=[1], direction='load', variable='h',
+                        lid_strides={0: 1}, direction='load', variable='h',
                         count_granularity=CG.WORKITEM)
                          ].eval_with_dict(params)
     f32consec = mem_map[lp.MemAccess('global', np.float32,
-                        lid_strides=[1], direction='load', variable='a',
+                        lid_strides={0: 1}, direction='load', variable='a',
                         count_granularity=CG.WORKITEM)
                         ].eval_with_dict(params)
     f32consec += mem_map[lp.MemAccess('global', np.dtype(np.float32),
-                        lid_strides=[1], direction='load', variable='b',
+                        lid_strides={0: 1}, direction='load', variable='b',
                         count_granularity=CG.WORKITEM)
                          ].eval_with_dict(params)
     assert f64consec == 2*n*m*ell
     assert f32consec == 3*n*m*ell
 
     f64consec = mem_map[lp.MemAccess('global', np.float64,
-                        lid_strides=[1], direction='store', variable='e',
+                        lid_strides={0: 1}, direction='store', variable='e',
                         count_granularity=CG.WORKITEM)
                         ].eval_with_dict(params)
     f32consec = mem_map[lp.MemAccess('global', np.float32,
-                        lid_strides=[1], direction='store', variable='c',
+                        lid_strides={0: 1}, direction='store', variable='c',
                         count_granularity=CG.WORKITEM)
                         ].eval_with_dict(params)
     assert f64consec == n*m*ell
@@ -897,11 +897,12 @@ def test_all_counters_parallel_matmul():
                                            subgroup_size=32)
 
     f32s1lb = mem_access_map[lp.MemAccess('global', np.float32,
-                             lid_strides=[1, Variable('ell')], direction='load',
-                             variable='b', count_granularity=CG.WORKITEM)
+                             lid_strides={0: 1, 1: Variable('ell')},
+                             direction='load', variable='b',
+                             count_granularity=CG.WORKITEM)
                              ].eval_with_dict(params)
     f32s1la = mem_access_map[lp.MemAccess('global', np.float32,
-                             lid_strides=[1, Variable('m')], direction='load',
+                             lid_strides={0: 1, 1: Variable('m')}, direction='load',
                              variable='a', count_granularity=CG.WORKITEM)
                              ].eval_with_dict(params)
 
@@ -909,8 +910,9 @@ def test_all_counters_parallel_matmul():
     assert f32s1la == n*m*ell/bsize
 
     f32coal = mem_access_map[lp.MemAccess('global', np.float32,
-                             lid_strides=[1, Variable('ell')], direction='store',
-                             variable='c', count_granularity=CG.WORKITEM)
+                             lid_strides={0: 1, 1: Variable('ell')},
+                             direction='store', variable='c',
+                             count_granularity=CG.WORKITEM)
                              ].eval_with_dict(params)
 
     assert f32coal == n*ell
@@ -1056,7 +1058,7 @@ def test_summations_and_filters():
     assert f64ops_all == n*m
 
     def func_filter(key):
-        return key.lid_strides == [] and key.dtype == to_loopy_type(np.float64) and \
+        return key.lid_strides == {} and key.dtype == to_loopy_type(np.float64) and \
                key.direction == 'load'
     f64l = mem_map.filter_by_func(func_filter).eval_and_sum(params)