diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index d1903a282aa0e8c6cd958af8a20819d3eed1cd75..9503a2a1899969ebd19f72f312498e7c4a195922 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -307,15 +307,15 @@ class P2PMatrixBlockGenerator(P2PBase):
         arguments = (
             self.get_default_src_tgt_arguments() +
             [
-                lp.GlobalArg("srcindices", None, shape="nresults"),
-                lp.GlobalArg("tgtindices", None, shape="nresults"),
-                lp.ValueArg("nresults", None)
+                lp.GlobalArg("srcindices", None, shape="nresult"),
+                lp.GlobalArg("tgtindices", None, shape="nresult"),
+                lp.ValueArg("nresult", None)
             ] +
-            [lp.GlobalArg("result_%d" % i, dtype, shape="nresults")
+            [lp.GlobalArg("result_%d" % i, dtype, shape="nresult")
              for i, dtype in enumerate(self.value_dtypes)])
 
         loopy_knl = lp.make_kernel(
-            "{[imat, idim]: 0 <= imat < nresults and 0 <= idim < dim}",
+            "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}",
             self.get_kernel_scaling_assignments()
             + ["""
                 for imat
@@ -335,7 +335,7 @@ class P2PMatrixBlockGenerator(P2PBase):
                 for iknl in range(len(self.kernels))]
             + ["end"],
             arguments,
-            assumptions="nresults>=1",
+            assumptions="nresult>=1",
             silenced_warnings="write_race(write_p2p*)",
             name=self.name,
             fixed_parameters=dict(dim=self.dim),
@@ -392,9 +392,9 @@ class P2PMatrixBlockGenerator(P2PBase):
                 sources_is_obj_array=(
                     is_obj_array(sources) or isinstance(sources, (tuple, list))))
 
-        rowindices, colindices = index_set.linear_indices()
+        tgtindices, srcindices = index_set.linear_indices()
         return knl(queue, targets=targets, sources=sources,
-            tgtindices=rowindices, srcindices=colindices, **kwargs)
+                tgtindices=tgtindices, srcindices=srcindices, **kwargs)
 
 # }}}
 
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index f3b2d6f78964d3d4f5a88248f7b9a08d32b722eb..73f7ec3d12e818cd2b111332d8285a930f306d3e 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -335,68 +335,44 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
         arguments = (
             self.get_default_src_tgt_arguments() +
             [
-                lp.GlobalArg("srcindices", None, shape="nsrcindices"),
-                lp.GlobalArg("tgtindices", None, shape="ntgtindices"),
-                lp.GlobalArg("srcranges", None, shape="nranges + 1"),
-                lp.GlobalArg("tgtranges", None, shape="nranges + 1"),
-                lp.ValueArg("nsrcindices", None),
-                lp.ValueArg("ntgtindices", None),
-                lp.ValueArg("nranges", None)
+                lp.GlobalArg("srcindices", None, shape="nresult"),
+                lp.GlobalArg("tgtindices", None, shape="nresult"),
+                lp.ValueArg("nresult", None)
             ] +
-            [lp.GlobalArg("result_%d" % i,
-                dtype, shape="ntgtindices, nsrcindices")
-            for i, dtype in enumerate(self.value_dtypes)])
+            [lp.GlobalArg("result_%d" % i, dtype, shape="nresult")
+             for i, dtype in enumerate(self.value_dtypes)])
 
         loopy_knl = lp.make_kernel([
-            "{[irange]: 0 <= irange < nranges}",
-            "{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}",
-            "{[idim]: 0 <= idim < dim}"
+            "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}"
             ],
             self.get_kernel_scaling_assignments()
             + ["""
-                for irange
-                <> tgtstart = tgtranges[irange]
-                <> tgtend = tgtranges[irange + 1]
-                <> ntgtblock = tgtend - tgtstart
-                <> srcstart = srcranges[irange]
-                <> srcend = srcranges[irange + 1]
-                <> nsrcblock = srcend - srcstart
-
-                for itgt, isrc
-                    <> a[idim] = center[idim, tgtindices[tgtstart + itgt]] - \
-                                 src[idim, srcindices[srcstart + isrc]] \
-                                 {dup=idim}
-                    <> b[idim] = tgt[idim, tgtindices[tgtstart + itgt]] - \
-                                 center[idim, tgtindices[tgtstart + itgt]] \
-                                 {dup=idim}
-                    <> rscale = expansion_radii[tgtindices[tgtstart + itgt]]
+                for imat
+                    <> a[idim] = center[idim, tgtindices[imat]] - \
+                                 src[idim, srcindices[imat]] {dup=idim}
+                    <> b[idim] = tgt[idim, tgtindices[imat]] - \
+                                 center[idim, tgtindices[imat]] {dup=idim}
+                    <> rscale = expansion_radii[tgtindices[imat]]
             """]
             + loopy_insns + kernel_exprs
             + ["""
-                    result_{i}[tgtstart + itgt, srcstart + isrc] = \
-                        knl_{i}_scaling * pair_result_{i} \
-                            {{id_prefix=write_lpot,inames=irange}}
+                    result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \
+                            {{id_prefix=write_lpot}}
                 """.format(i=iknl)
                 for iknl in range(len(self.expansions))]
-            + ["""
-                    end
-                end
-            """],
+            + ["end"],
             arguments,
             name=self.name,
-            assumptions="nranges>=1",
+            assumptions="nresult>=1",
             default_offset=lp.auto,
             silenced_warnings="write_race(write_lpot*)",
             fixed_parameters=dict(dim=self.dim),
             lang_version=MOST_RECENT_LANGUAGE_VERSION)
 
-        loopy_knl = lp.add_dtypes(loopy_knl, dict(
-            nsources=np.int32,
-            ntargets=np.int32,
-            ntgtindices=np.int32,
-            nsrcindices=np.int32))
-
         loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
+        loopy_knl = lp.add_dtypes(loopy_knl,
+            dict(nsources=np.int32, ntargets=np.int32))
+
         for expn in self.expansions:
             loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
 
@@ -405,17 +381,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
     def get_optimized_kernel(self):
         loopy_knl = self.get_kernel()
 
-        loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
+        loopy_knl = lp.split_iname(loopy_knl, "imat", 1024, outer_tag="g.0")
         return loopy_knl
 
     def __call__(self, queue, targets, sources, centers, expansion_radii,
-            tgtindices, srcindices, tgtranges, srcranges, **kwargs):
+                 index_set, **kwargs):
         knl = self.get_cached_optimized_kernel()
 
+        tgtindices, srcindices = index_set.linear_indices()
         return knl(queue, src=sources, tgt=targets, center=centers,
                 expansion_radii=expansion_radii,
-                tgtindices=tgtindices, srcindices=srcindices,
-                tgtranges=tgtranges, srcranges=srcranges, **kwargs)
+                tgtindices=tgtindices, srcindices=srcindices, **kwargs)
 
 # }}}
 
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index d6e85c23dc835034f99b8c16d9f422730da7d650..a687ee16b04ad2af406ceff02aa54e2279404346 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -31,6 +31,8 @@ import pyopencl as cl
 from pyopencl.tools import (  # noqa
         pytest_generate_tests_for_pyopencl as pytest_generate_tests)
 
+from sumpy.tools import MatrixBlockIndex
+
 import logging
 logger = logging.getLogger(__name__)
 
@@ -82,8 +84,8 @@ def create_index_subset(nnodes, nblks, factor):
 
     return indices_, ranges_
 
-
-def test_qbx_direct(ctx_getter):
+@pytest.mark.parametrize('factor', [1.0, 0.6])
+def test_qbx_direct(ctx_getter, factor):
     # This evaluates a single layer potential on a circle.
     logging.basicConfig(level=logging.INFO)
 
@@ -114,14 +116,8 @@ def test_qbx_direct(ctx_getter):
         h = 2 * np.pi / n
         strengths = (sigma * h,)
 
-        tgtindices = np.arange(0, n)
-        tgtindices = np.random.choice(tgtindices, size=int(0.8 * n))
-        tgtranges = np.arange(0, tgtindices.shape[0] + 1,
-                              tgtindices.shape[0] // nblks)
-        srcindices = np.arange(0, n)
-        srcindices = np.random.choice(srcindices, size=int(0.8 * n))
-        srcranges = np.arange(0, srcindices.shape[0] + 1,
-                              srcindices.shape[0] // nblks)
+        tgtindices, tgtranges = create_index_subset(n, nblks, factor)
+        srcindices, srcranges = create_index_subset(n, nblks, factor)
         assert tgtranges.shape == srcranges.shape
 
         _, (mat,) = mat_gen(queue, targets, sources, centers,
@@ -134,16 +130,14 @@ def test_qbx_direct(ctx_getter):
         eps = 1.0e-10 * la.norm(result_lpot)
         assert la.norm(result_mat - result_lpot) < eps
 
+        index_set = MatrixBlockIndex(queue,
+            tgtindices, srcindices, tgtranges, srcranges)
         _, (blk,) = blk_gen(queue, targets, sources, centers, expansion_radii,
-                            tgtindices, srcindices, tgtranges, srcranges)
+                            index_set)
 
-        for i in range(srcranges.shape[0] - 1):
-            itgt = np.s_[tgtranges[i]:tgtranges[i + 1]]
-            isrc = np.s_[srcranges[i]:srcranges[i + 1]]
-            block = np.ix_(tgtindices[itgt], srcindices[isrc])
-
-            eps = 1.0e-10 * la.norm(mat[block])
-            assert la.norm(blk[itgt, isrc] - mat[block]) < eps
+        rowindices, colindices = index_set.linear_indices()
+        eps = 1.0e-10 * la.norm(mat)
+        assert la.norm(blk - mat[rowindices, colindices].reshape(-1)) < eps
 
 
 @pytest.mark.parametrize(("exclude_self", "factor"),
@@ -161,7 +155,6 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
     mode_nr = 25
 
     from sumpy.p2p import P2P
-    from sumpy.tools import MatrixBlockIndex
     from sumpy.p2p import P2PMatrixGenerator, P2PMatrixBlockGenerator
     lpot = P2P(ctx, [lknl], exclude_self=exclude_self)
     mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self)