diff --git a/doc/reference.rst b/doc/reference.rst
index 1c9dda7b5db4831534809ff59ef76216cd21b5cc..e79f17554119c5efe9351f46b8c501c2c27d2387 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -535,7 +535,7 @@ Obtaining Kernel Statistics
 
 .. autofunction:: get_op_poly
 
-.. autofunction:: get_DRAM_access_poly
+.. autofunction:: get_gmem_access_poly
 
 .. autofunction:: get_barrier_poly
 
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 40f171356cda9ec8106f39c1c786efbd8757fcea..ca22c2a64beb69d990797537cae272b587b182f6 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -1256,14 +1256,14 @@ We can evaluate these polynomials using :func:`islpy.eval_with_dict`:
 Counting array accesses
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-:func:`loopy.get_DRAM_access_poly` provides information on the number and type of
+:func:`loopy.get_gmem_access_poly` provides information on the number and type of
 array loads and stores being performed in a kernel. To demonstrate this, we'll
 continue using the kernel from the previous example:
 
 .. doctest::
 
-    >>> from loopy.statistics import get_DRAM_access_poly
-    >>> load_store_map = get_DRAM_access_poly(knl)
+    >>> from loopy.statistics import get_gmem_access_poly
+    >>> load_store_map = get_gmem_access_poly(knl)
     >>> print(load_store_map)
     (dtype('float32'), 'uniform', 'load') : [n, m, l] -> { 3 * n * m * l : n >= 1 and m >= 1 and l >= 1 }
     (dtype('float32'), 'uniform', 'store') : [n, m, l] -> { n * m * l : n >= 1 and m >= 1 and l >= 1 }
@@ -1271,7 +1271,7 @@ continue using the kernel from the previous example:
     (dtype('float64'), 'uniform', 'store') : [n, m, l] -> { n * m : n >= 1 and m >= 1 and l >= 1 }
     <BLANKLINE>
 
-:func:`loopy.get_DRAM_access_poly` returns a mapping of **{(**
+:func:`loopy.get_gmem_access_poly` returns a mapping of **{(**
 :class:`numpy.dtype` **,** :class:`string` **,** :class:`string` **)**
 **:** :class:`islpy.PwQPolynomial` **}**.
 
@@ -1313,7 +1313,7 @@ We can evaluate these polynomials using :func:`islpy.eval_with_dict`:
 ~~~~~~~~~~~
 
 Since we have not tagged any of the inames or parallelized the kernel across threads
-(which would have produced iname tags), :func:`loopy.get_DRAM_access_poly` considers
+(which would have produced iname tags), :func:`loopy.get_gmem_access_poly` considers
 the array accesses *uniform*. Now we'll parallelize the kernel and count the array
 accesses again. The resulting :class:`islpy.PwQPolynomial` will be more complicated
 this time, so we'll print the mapping manually to make it more legible:
@@ -1321,7 +1321,7 @@ this time, so we'll print the mapping manually to make it more legible:
 .. doctest::
 
     >>> knl_consec = lp.split_iname(knl, "k", 128, outer_tag="l.1", inner_tag="l.0")
-    >>> load_store_map = get_DRAM_access_poly(knl_consec)
+    >>> load_store_map = get_gmem_access_poly(knl_consec)
     >>> for key in sorted(load_store_map.dict.keys(), key=lambda k: str(k)):
     ...     print("%s :\n%s\n" % (key, load_store_map.dict[key]))
     (dtype('float32'), 'consecutive', 'load') :
@@ -1368,7 +1368,7 @@ our parallelization of the kernel:
 .. doctest::
 
     >>> knl_nonconsec = lp.split_iname(knl, "k", 128, outer_tag="l.0", inner_tag="l.1")
-    >>> load_store_map = get_DRAM_access_poly(knl_nonconsec)
+    >>> load_store_map = get_gmem_access_poly(knl_nonconsec)
     >>> for key in sorted(load_store_map.dict.keys(), key=lambda k: str(k)):
     ...     print("%s :\n%s\n" % (key, load_store_map.dict[key]))
     (dtype('float32'), 'nonconsecutive', 'load') :
diff --git a/loopy/__init__.py b/loopy/__init__.py
index d24f507c2fb1e87cca3e9cd7b8dd67778bbf6253..8956856d4735a9554ed8c34741790ef1286d9e54 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -63,7 +63,8 @@ from loopy.padding import (split_arg_axis, find_padding_multiple,
 from loopy.preprocess import (preprocess_kernel, realize_reduction,
         infer_unknown_types)
 from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel
-from loopy.statistics import get_op_poly, get_DRAM_access_poly, get_barrier_poly
+from loopy.statistics import (get_op_poly, get_gmem_access_poly,
+        get_DRAM_access_poly, get_barrier_poly)
 from loopy.codegen import generate_code, generate_body
 from loopy.compiled import CompiledKernel
 from loopy.options import Options
@@ -103,7 +104,8 @@ __all__ = [
         "generate_loop_schedules", "get_one_scheduled_kernel",
         "generate_code", "generate_body",
 
-        "get_op_poly", "get_DRAM_access_poly", "get_barrier_poly",
+        "get_op_poly", "get_gmem_access_poly", "get_DRAM_access_poly",
+        "get_barrier_poly",
 
         "CompiledKernel",
 
diff --git a/loopy/statistics.py b/loopy/statistics.py
index 76345f097b9c3100a9a74ece5b65af94ec276454..d25ea3eaca44187e8ad7020b6ddfd9e5bedc95bc 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -55,7 +55,7 @@ class ToCountMap:
                                 "to {} {}. ToCountMap may only be added to "
                                 "0 and other ToCountMap objects."
                                 .format(type(other), other))
-            return
+
         return self
 
     def __mul__(self, other):
@@ -207,7 +207,7 @@ class ExpressionOpCounter(CombineMapper):
                                   "map_slice not implemented.")
 
 
-class ExpressionSubscriptCounter(CombineMapper):
+class GlobalSubscriptCounter(CombineMapper):
 
     def __init__(self, knl):
         self.knl = knl
@@ -345,12 +345,12 @@ class ExpressionSubscriptCounter(CombineMapper):
     map_logical_and = map_logical_or
 
     def map_if(self, expr):
-        warnings.warn("ExpressionSubscriptCounter counting DRAM accesses as "
+        warnings.warn("GlobalSubscriptCounter counting DRAM accesses as "
                       "sum of if-statement branches.")
         return self.rec(expr.condition) + self.rec(expr.then) + self.rec(expr.else_)
 
     def map_if_positive(self, expr):
-        warnings.warn("ExpressionSubscriptCounter counting DRAM accesses as "
+        warnings.warn("GlobalSubscriptCounter counting DRAM accesses as "
                       "sum of if_pos-statement branches.")
         return self.rec(expr.criterion) + self.rec(expr.then) + self.rec(expr.else_)
 
@@ -358,22 +358,22 @@ class ExpressionSubscriptCounter(CombineMapper):
     map_max = map_min
 
     def map_common_subexpression(self, expr):
-        raise NotImplementedError("ExpressionSubscriptCounter encountered "
+        raise NotImplementedError("GlobalSubscriptCounter encountered "
                                   "common_subexpression, "
                                   "map_common_subexpression not implemented.")
 
     def map_substitution(self, expr):
-        raise NotImplementedError("ExpressionSubscriptCounter encountered "
+        raise NotImplementedError("GlobalSubscriptCounter encountered "
                                   "substitution, "
                                   "map_substitution not implemented.")
 
     def map_derivative(self, expr):
-        raise NotImplementedError("ExpressionSubscriptCounter encountered "
+        raise NotImplementedError("GlobalSubscriptCounter encountered "
                                   "derivative, "
                                   "map_derivative not implemented.")
 
     def map_slice(self, expr):
-        raise NotImplementedError("ExpressionSubscriptCounter encountered slice, "
+        raise NotImplementedError("GlobalSubscriptCounter encountered slice, "
                                   "map_slice not implemented.")
 
 
@@ -450,9 +450,8 @@ def get_op_poly(knl):
     return op_poly
 
 
-def get_DRAM_access_poly(knl):  # for now just counting subscripts
-
-    """Count the number of DRAM accesses in a loopy kernel.
+def get_gmem_access_poly(knl):  # for now just counting subscripts
+    """Count the number of global memory accesses in a loopy kernel.
 
     :parameter knl: A :class:`loopy.LoopKernel` whose DRAM accesses are to be
                     counted.
@@ -477,7 +476,7 @@ def get_DRAM_access_poly(knl):  # for now just counting subscripts
 
         # (first create loopy kernel and specify array data types)
 
-        subscript_map = get_DRAM_access_poly(knl)
+        subscript_map = get_gmem_access_poly(knl)
         params = {'n': 512, 'm': 256, 'l': 128}
 
         f32_uncoalesced_load = subscript_map.dict[
@@ -499,7 +498,7 @@ def get_DRAM_access_poly(knl):  # for now just counting subscripts
     knl = preprocess_kernel(knl)
 
     subs_poly = 0
-    subscript_counter = ExpressionSubscriptCounter(knl)
+    subscript_counter = GlobalSubscriptCounter(knl)
     for insn in knl.instructions:
         insn_inames = knl.insn_inames(insn)
         inames_domain = knl.get_inames_domain(insn_inames)
@@ -518,6 +517,13 @@ def get_DRAM_access_poly(knl):  # for now just counting subscripts
     return subs_poly
 
 
+def get_DRAM_access_poly(knl):
+    from warnings import warn
+    warn("get_DRAM_access_poly is deprecated. Use get_gmem_access_poly instead",
+            DeprecationWarning, stacklevel=2)
+    return get_gmem_access_poly(knl)
+
+
 def get_barrier_poly(knl):
 
     """Count the number of barriers each thread encounters in a loopy kernel.
diff --git a/test/test_statistics.py b/test/test_statistics.py
index 87ed797e74fd709c29ad9d763e195ff46985ed96..a58ce6d582a8d03d622028156adff35c61009bc0 100644
--- a/test/test_statistics.py
+++ b/test/test_statistics.py
@@ -27,7 +27,7 @@ from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl
         as pytest_generate_tests)
 import loopy as lp
-from loopy.statistics import get_op_poly, get_DRAM_access_poly, get_barrier_poly
+from loopy.statistics import get_op_poly, get_gmem_access_poly, get_barrier_poly
 import numpy as np
 
 
@@ -185,7 +185,7 @@ def test_op_counter_triangular_domain():
         assert flops == 78
 
 
-def test_DRAM_access_counter_basic():
+def test_gmem_access_counter_basic():
 
     knl = lp.make_kernel(
             "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -199,7 +199,7 @@ def test_DRAM_access_counter_basic():
 
     knl = lp.add_and_infer_dtypes(knl,
                         dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -222,7 +222,7 @@ def test_DRAM_access_counter_basic():
     assert f64 == n*m
 
 
-def test_DRAM_access_counter_reduction():
+def test_gmem_access_counter_reduction():
 
     knl = lp.make_kernel(
             "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -232,7 +232,7 @@ def test_DRAM_access_counter_reduction():
             name="matmul", assumptions="n,m,l >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -247,7 +247,7 @@ def test_DRAM_access_counter_reduction():
     assert f32 == n*l
 
 
-def test_DRAM_access_counter_logic():
+def test_gmem_access_counter_logic():
 
     knl = lp.make_kernel(
             "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -259,7 +259,7 @@ def test_DRAM_access_counter_logic():
             name="logic", assumptions="n,m,l >= 1")
 
     knl = lp.add_and_infer_dtypes(knl, dict(g=np.float32, h=np.float64))
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -278,7 +278,7 @@ def test_DRAM_access_counter_logic():
     assert f64 == n*m
 
 
-def test_DRAM_access_counter_specialops():
+def test_gmem_access_counter_specialops():
 
     knl = lp.make_kernel(
             "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -292,7 +292,7 @@ def test_DRAM_access_counter_specialops():
 
     knl = lp.add_and_infer_dtypes(knl,
                         dict(a=np.float32, b=np.float32, g=np.float64, h=np.float64))
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -315,7 +315,7 @@ def test_DRAM_access_counter_specialops():
     assert f64 == n*m
 
 
-def test_DRAM_access_counter_bitwise():
+def test_gmem_access_counter_bitwise():
 
     knl = lp.make_kernel(
             "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -332,7 +332,7 @@ def test_DRAM_access_counter_bitwise():
                 a=np.int32, b=np.int32,
                 g=np.int32, h=np.int32))
 
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -347,7 +347,7 @@ def test_DRAM_access_counter_bitwise():
     assert i32 == n*m+n*m*l
 
 
-def test_DRAM_access_counter_mixed():
+def test_gmem_access_counter_mixed():
 
     knl = lp.make_kernel(
             "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -363,7 +363,7 @@ def test_DRAM_access_counter_mixed():
     knl = lp.split_iname(knl, "j", 16)
     knl = lp.tag_inames(knl, {"j_inner": "l.0", "j_outer": "g.0"})
 
-    poly = get_DRAM_access_poly(knl)  # noqa
+    poly = get_gmem_access_poly(knl)  # noqa
     n = 512
     m = 256
     l = 128
@@ -386,7 +386,7 @@ def test_DRAM_access_counter_mixed():
     assert f32nonconsec == n*m*l
 
 
-def test_DRAM_access_counter_nonconsec():
+def test_gmem_access_counter_nonconsec():
 
     knl = lp.make_kernel(
             "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -402,7 +402,7 @@ def test_DRAM_access_counter_nonconsec():
     knl = lp.split_iname(knl, "i", 16)
     knl = lp.tag_inames(knl, {"i_inner": "l.0", "i_outer": "g.0"})
 
-    poly = get_DRAM_access_poly(knl)  # noqa
+    poly = get_gmem_access_poly(knl)  # noqa
     n = 512
     m = 256
     l = 128
@@ -425,7 +425,7 @@ def test_DRAM_access_counter_nonconsec():
     assert f32nonconsec == n*m*l
 
 
-def test_DRAM_access_counter_consec():
+def test_gmem_access_counter_consec():
 
     knl = lp.make_kernel(
             "[n,m,l] -> {[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<l}",
@@ -440,7 +440,7 @@ def test_DRAM_access_counter_consec():
                 a=np.float32, b=np.float32, g=np.float64, h=np.float64))
     knl = lp.tag_inames(knl, {"k": "l.0", "i": "g.0", "j": "g.1"})
 
-    poly = get_DRAM_access_poly(knl)
+    poly = get_gmem_access_poly(knl)
     n = 512
     m = 256
     l = 128
@@ -541,7 +541,7 @@ def test_all_counters_parallel_matmul():
     assert f32ops == n*m*l*2
     assert i32ops == n*m*l*4 + l*n*4
 
-    subscript_map = get_DRAM_access_poly(knl)
+    subscript_map = get_gmem_access_poly(knl)
     f32uncoal = subscript_map.dict[
                         (np.dtype(np.float32), 'nonconsecutive', 'load')
                         ].eval_with_dict({'n': n, 'm': m, 'l': l})