From 01872e98236fd24119cefb6558c29a4158fefe0f Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Thu, 2 Jul 2020 22:05:03 -0500
Subject: [PATCH 01/12] port test_matrix to array-context

---
 pytential/symbolic/execution.py |   8 +-
 pytential/symbolic/matrix.py    | 208 +++++++++++++++++++-------------
 test/test_matrix.py             |  93 +++++++-------
 3 files changed, 175 insertions(+), 134 deletions(-)

diff --git a/pytential/symbolic/execution.py b/pytential/symbolic/execution.py
index e46f7509..4b36a56b 100644
--- a/pytential/symbolic/execution.py
+++ b/pytential/symbolic/execution.py
@@ -1031,10 +1031,10 @@ def _bmat(blocks, dtypes):
     return result
 
 
-def build_matrix(queue, places, exprs, input_exprs, domains=None,
+def build_matrix(actx, places, exprs, input_exprs, domains=None,
         auto_where=None, context=None):
     """
-    :arg queue: a :class:`pyopencl.CommandQueue`.
+    :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
     :arg places: a :class:`~pytential.symbolic.execution.GeometryCollection`.
         Alternatively, any list or mapping that is a valid argument for its
         constructor can also be used.
@@ -1085,7 +1085,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
                 domains[ibcol].geometry, domains[ibcol].discr_stage)
 
         mbuilder = MatrixBuilder(
-                queue,
+                actx,
                 dep_expr=input_exprs[ibcol],
                 other_dep_exprs=(input_exprs[:ibcol]
                                  + input_exprs[ibcol + 1:]),
@@ -1102,7 +1102,7 @@ def build_matrix(queue, places, exprs, input_exprs, domains=None,
             if isinstance(block, np.ndarray):
                 dtypes.append(block.dtype)
 
-    return cl.array.to_device(queue, _bmat(blocks, dtypes))
+    return actx.from_numpy(_bmat(blocks, dtypes))
 
 # }}}
 
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index d63f3054..12ec489a 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -26,14 +26,13 @@ THE SOFTWARE.
 """
 
 import numpy as np
-import pyopencl as cl  # noqa
-import pyopencl.array  # noqa
 
 import six
 from six.moves import intern
 
 from pytools import memoize_method
 from pytential.symbolic.mappers import EvaluationMapperBase
+from pytential.utils import flatten_if_needed
 
 
 # {{{ helpers
@@ -56,17 +55,44 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
                 and arg_name not in include_args):
             continue
 
-        kernel_args[arg_name] = mapper.rec(arg_expr)
+        kernel_args[arg_name] = flatten_if_needed(mapper.array_context,
+                mapper.rec(arg_expr)
+                )
 
     return kernel_args
 
+
+def _unflatten_from_numpy(actx, ary, discr=None):
+    from pytools.obj_array import obj_array_vectorize
+    from meshmode.dof_array import unflatten
+
+    if isinstance(ary, np.ndarray) and ary.dtype.char == "O":
+        ary = obj_array_vectorize(actx.from_numpy, ary)
+    else:
+        ary = actx.from_numpy(ary)
+
+    if discr is None:
+        return ary
+    else:
+        return unflatten(actx, discr, ary)
+
+
+def _flatten_to_numpy(actx, ary):
+    result = flatten_if_needed(actx, ary)
+
+    from pytools.obj_array import obj_array_vectorize
+    if isinstance(result, np.ndarray) and ary.dtype.char == "O":
+        return obj_array_vectorize(actx.to_numpy, result)
+    else:
+        return actx.to_numpy(result)
+
 # }}}
 
 
 # {{{ base classes for matrix builders
 
 class MatrixBuilderBase(EvaluationMapperBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
         """
         :arg queue: a :class:`pyopencl.CommandQueue`.
@@ -84,7 +110,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
         """
         super(MatrixBuilderBase, self).__init__(context=context)
 
-        self.queue = queue
+        self.array_context = actx
         self.dep_expr = dep_expr
         self.other_dep_exprs = other_dep_exprs
         self.dep_source = dep_source
@@ -94,7 +120,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
     # {{{
 
     def get_dep_variable(self):
-        return np.eye(self.dep_discr.nnodes, dtype=np.float64)
+        return np.eye(self.dep_discr.ndofs, dtype=np.float64)
 
     def is_kind_vector(self, x):
         return len(x.shape) == 1
@@ -196,17 +222,25 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if self.is_kind_matrix(rec_operand):
             raise NotImplementedError("derivatives")
 
-        rec_operand = cl.array.to_device(self.queue, rec_operand)
+        dofdesc = expr.dofdesc
         op = sym.NumReferenceDerivative(
                 ref_axes=expr.ref_axes,
                 operand=sym.var("u"),
-                dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue, u=rec_operand).get()
+                dofdesc=dofdesc)
+
+        discr = self.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+        rec_operand = _unflatten_from_numpy(self.array_context, rec_operand, discr)
+
+        return _flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context, u=rec_operand)
+                )
 
     def map_node_coordinate_component(self, expr):
         from pytential import bind, sym
         op = sym.NodeCoordinateComponent(expr.ambient_axis, dofdesc=expr.dofdesc)
-        return bind(self.places, op)(self.queue).get()
+        return _flatten_to_numpy(self.array_context,
+                bind(self.places, op)(self.array_context)
+                )
 
     def map_call(self, expr):
         arg, = expr.parameters
@@ -215,17 +249,13 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if isinstance(rec_arg, np.ndarray) and self.is_kind_matrix(rec_arg):
             raise RuntimeError("expression is nonlinear in variable")
 
-        if isinstance(rec_arg, np.ndarray):
-            rec_arg = cl.array.to_device(self.queue, rec_arg)
-
-        from pytential import bind, sym
-        op = expr.function(sym.var("u"))
-        result = bind(self.places, op)(self.queue, u=rec_arg)
-
-        if isinstance(result, cl.array.Array):
-            result = result.get()
-
-        return result
+        from numbers import Number
+        if isinstance(rec_arg, Number):
+            return getattr(np, expr.function.name)(rec_arg)
+        else:
+            rec_arg = _unflatten_from_numpy(self.array_context, rec_arg)
+            result = getattr(self.array_context.np, expr.function.name)(rec_arg)
+            return _flatten_to_numpy(self.array_context, result)
 
     # }}}
 
@@ -240,14 +270,14 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
     assume that each operator acts directly on the density.
     """
 
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, index_set, context):
         """
         :arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` class
             describing which blocks are going to be evaluated.
         """
 
-        super(MatrixBlockBuilderBase, self).__init__(queue,
+        super(MatrixBlockBuilderBase, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
         self.index_set = index_set
@@ -259,7 +289,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # be computed on the full discretization, ignoring our index_set,
         # e.g the normal in a double layer potential
 
-        return MatrixBuilderBase(self.queue,
+        return MatrixBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -272,7 +302,7 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
         # blk_mapper is used to recursively compute the density to
         # a layer potential operator to ensure there is no composition
 
-        return MatrixBlockBuilderBase(self.queue,
+        return MatrixBlockBuilderBase(self.array_context,
                 self.dep_expr,
                 self.other_dep_exprs,
                 self.dep_source,
@@ -302,9 +332,10 @@ class MatrixBlockBuilderBase(MatrixBuilderBase):
 # We'll cheat and build the matrix on the host.
 
 class MatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context):
-        super(MatrixBuilder, self).__init__(queue, dep_expr, other_dep_exprs,
+        super(MatrixBuilder, self).__init__(
+                actx, dep_expr, other_dep_exprs,
                 dep_source, dep_discr, places, context)
 
     def map_interpolation(self, expr):
@@ -313,13 +344,17 @@ class MatrixBuilder(MatrixBuilderBase):
         if expr.to_dd.discr_stage != sym.QBX_SOURCE_QUAD_STAGE2:
             raise RuntimeError("can only interpolate to QBX_SOURCE_QUAD_STAGE2")
         operand = self.rec(expr.operand)
+        actx = self.array_context
 
         if isinstance(operand, (int, float, complex, np.number)):
             return operand
         elif isinstance(operand, np.ndarray) and operand.ndim == 1:
             conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-            return conn(self.queue,
-                    cl.array.to_device(self.queue, operand)).get(self.queue)
+            discr = self.places.get_discretization(
+                    expr.from_dd.geometry, expr.from_dd.discr_stage)
+
+            operand = _unflatten_from_numpy(actx, operand, discr)
+            return _flatten_to_numpy(actx, conn(operand))
         elif isinstance(operand, np.ndarray) and operand.ndim == 2:
             cache = self.places._get_cache("direct_resampler")
             key = (expr.from_dd.geometry,
@@ -333,8 +368,8 @@ class MatrixBuilder(MatrixBuilderBase):
                     flatten_chained_connection
 
                 conn = self.places.get_connection(expr.from_dd, expr.to_dd)
-                conn = flatten_chained_connection(self.queue, conn)
-                mat = conn.full_resample_matrix(self.queue).get(self.queue)
+                conn = flatten_chained_connection(actx, conn)
+                mat = actx.to_numpy(conn.full_resample_matrix(actx))
 
                 # FIXME: the resample matrix is slow to compute and very big
                 # to store, so caching it may not be the best idea
@@ -359,6 +394,7 @@ class MatrixBuilder(MatrixBuilderBase):
         if not self.is_kind_matrix(rec_density):
             raise NotImplementedError("layer potentials on non-variables")
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self, expr)
 
@@ -366,31 +402,31 @@ class MatrixBuilder(MatrixBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixGenerator
-        mat_gen = LayerPotentialMatrixGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 **kernel_args)
-        mat = mat.get()
+        mat = actx.to_numpy(mat)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat[:, :] *= waa.get(self.queue)
+            dofdesc=expr.source))(actx)
+        mat[:, :] *= actx.to_numpy(flatten(waa))
         mat = mat.dot(rec_density)
 
         return mat
@@ -401,9 +437,9 @@ class MatrixBuilder(MatrixBuilderBase):
 # {{{ p2p matrix builder
 
 class P2PMatrixBuilder(MatrixBuilderBase):
-    def __init__(self, queue, dep_expr, other_dep_exprs,
+    def __init__(self, actx, dep_expr, other_dep_exprs,
             dep_source, dep_discr, places, context, exclude_self=True):
-        super(P2PMatrixBuilder, self).__init__(queue,
+        super(P2PMatrixBuilder, self).__init__(actx,
                 dep_expr, other_dep_exprs, dep_source, dep_discr,
                 places, context)
 
@@ -430,25 +466,26 @@ class P2PMatrixBuilder(MatrixBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixGenerator
-        mat_gen = P2PMatrixGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 **kernel_args)
 
-        mat = mat.get()
-        mat = mat.dot(rec_density)
+        return actx.to_numpy(mat).dot(rec_density)
 
-        return mat
 # }}}
 
 
@@ -462,8 +499,9 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
                 places, index_set, context)
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -484,6 +522,7 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         if not np.isscalar(rec_density):
             raise NotImplementedError
 
+        actx = self.array_context
         kernel = expr.kernel
         kernel_args = _get_layer_potential_args(self._mat_mapper, expr)
 
@@ -491,34 +530,34 @@ class NearFieldBlockBuilder(MatrixBlockBuilderBase):
         local_expn = LineTaylorLocalExpansion(kernel, lpot_source.qbx_order)
 
         from sumpy.qbx import LayerPotentialMatrixBlockGenerator
-        mat_gen = LayerPotentialMatrixBlockGenerator(
-                self.queue.context, (local_expn,))
+        mat_gen = LayerPotentialMatrixBlockGenerator(actx.context, (local_expn,))
 
         assert abs(expr.qbx_forced_limit) > 0
         from pytential import bind, sym
         radii = bind(self.places, sym.expansion_radii(
             source_discr.ambient_dim,
-            dofdesc=expr.target))(self.queue)
+            dofdesc=expr.target))(actx)
         centers = bind(self.places, sym.expansion_centers(
             source_discr.ambient_dim,
             expr.qbx_forced_limit,
-            dofdesc=expr.target))(self.queue)
-
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
-                centers=centers,
-                expansion_radii=radii,
+            dofdesc=expr.target))(actx)
+
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
+                centers=flatten(centers),
+                expansion_radii=flatten(radii),
                 index_set=self.index_set,
                 **kernel_args)
 
         waa = bind(self.places, sym.weights_and_area_elements(
             source_discr.ambient_dim,
-            dofdesc=expr.source))(self.queue)
-        mat *= waa[self.index_set.linear_col_indices]
-        mat = rec_density * mat.get(self.queue)
+            dofdesc=expr.source))(actx)
+        waa = flatten(waa)
 
-        return mat
+        mat *= waa[self.index_set.linear_col_indices]
+        return rec_density * actx.to_numpy(mat)
 
 
 class FarFieldBlockBuilder(MatrixBlockBuilderBase):
@@ -530,8 +569,9 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         self.exclude_self = exclude_self
 
     def get_dep_variable(self):
-        tgtindices = self.index_set.linear_row_indices.get(self.queue)
-        srcindices = self.index_set.linear_col_indices.get(self.queue)
+        queue = self.array_context.queue
+        tgtindices = self.index_set.linear_row_indices.get(queue)
+        srcindices = self.index_set.linear_col_indices.get(queue)
 
         return np.equal(tgtindices, srcindices).astype(np.float64)
 
@@ -558,24 +598,26 @@ class FarFieldBlockBuilder(MatrixBlockBuilderBase):
         kernel_args = kernel.get_args() + kernel.get_source_args()
         kernel_args = set(arg.loopy_arg.name for arg in kernel_args)
 
+        actx = self.array_context
         kernel_args = _get_layer_potential_args(self._mat_mapper,
                 expr, include_args=kernel_args)
         if self.exclude_self:
-            kernel_args["target_to_source"] = \
-                cl.array.arange(self.queue, 0, target_discr.nnodes, dtype=np.int)
+            kernel_args["target_to_source"] = actx.from_numpy(
+                    np.arange(0, target_discr.ndofs, dtype=np.int)
+                    )
 
         from sumpy.p2p import P2PMatrixBlockGenerator
-        mat_gen = P2PMatrixBlockGenerator(
-                self.queue.context, (kernel,), exclude_self=self.exclude_self)
+        mat_gen = P2PMatrixBlockGenerator(actx.context, (kernel,),
+                exclude_self=self.exclude_self)
 
-        _, (mat,) = mat_gen(self.queue,
-                targets=target_discr.nodes(),
-                sources=source_discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        _, (mat,) = mat_gen(actx.queue,
+                targets=flatten(thaw(actx, target_discr.nodes())),
+                sources=flatten(thaw(actx, source_discr.nodes())),
                 index_set=self.index_set,
                 **kernel_args)
-        mat = rec_density * mat.get(self.queue)
 
-        return mat
+        return rec_density * actx.to_numpy(mat)
 
 # }}}
 
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 12be496c..5de6780a 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -38,9 +38,10 @@ from pytools.obj_array import make_obj_array, is_obj_array
 from sumpy.tools import BlockIndexRanges, MatrixBlockIndexRanges
 from sumpy.symbolic import USE_SYMENGINE
 
-from pytential import sym
+from pytential import bind, sym
 from pytential import GeometryCollection
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import (  # noqa
         ellipse, NArmedStarfish, make_curve_mesh, generate_torus)
 
@@ -55,7 +56,7 @@ except ImportError:
     pass
 
 
-def _build_geometry(queue,
+def _build_geometry(actx,
         ambient_dim=2,
         nelements=30,
         target_order=7,
@@ -79,8 +80,7 @@ def _build_geometry(queue,
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
     from pytential.qbx import QBXLayerPotentialSource
-    density_discr = Discretization(
-            queue.context, mesh,
+    density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     qbx = QBXLayerPotentialSource(density_discr,
@@ -92,24 +92,24 @@ def _build_geometry(queue,
     return places, places.auto_source
 
 
-def _build_block_index(queue,
-                       discr,
+def _build_block_index(actx, discr,
                        nblks=10,
                        factor=1.0,
                        use_tree=True):
-    nnodes = discr.nnodes
-    max_particles_in_box = nnodes // nblks
+    max_particles_in_box = discr.ndofs // nblks
 
     # create index ranges
     from pytential.linalg.proxy import partition_by_nodes
     indices = partition_by_nodes(discr,
-            use_tree=use_tree, max_nodes_in_box=max_particles_in_box)
+            use_tree=use_tree,
+            max_nodes_in_box=max_particles_in_box)
 
     if abs(factor - 1.0) < 1.0e-14:
         return indices
 
     # randomly pick a subset of points
-    indices = indices.get(queue)
+    # FIXME: this needs porting in sumpy.tools.BlockIndexRanges
+    indices = indices.get(actx.queue)
 
     indices_ = np.empty(indices.nblocks, dtype=np.object)
     for i in range(indices.nblocks):
@@ -120,13 +120,11 @@ def _build_block_index(queue,
         indices_[i] = np.sort(
                 np.random.choice(iidx, size=isize, replace=False))
 
-    ranges_ = cl.array.to_device(queue,
-            np.cumsum([0] + [r.shape[0] for r in indices_]))
-    indices_ = cl.array.to_device(queue, np.hstack(indices_))
+    ranges_ = actx.from_numpy(np.cumsum([0] + [r.shape[0] for r in indices_]))
+    indices_ = actx.from_numpy(np.hstack(indices_))
 
-    indices = BlockIndexRanges(discr.cl_context,
-                               indices_.with_queue(None),
-                               ranges_.with_queue(None))
+    indices = BlockIndexRanges(actx.context,
+            actx.freeze(indices_), actx.freeze(ranges_))
 
     return indices
 
@@ -137,8 +135,8 @@ def _build_op(lpot_id,
               source=sym.DEFAULT_SOURCE,
               target=sym.DEFAULT_TARGET,
               qbx_forced_limit="avg"):
-
     from sumpy.kernel import LaplaceKernel, HelmholtzKernel
+
     if k:
         knl = HelmholtzKernel(ambient_dim)
         knl_kwargs = {"k": k}
@@ -200,6 +198,7 @@ def _max_block_error(mat, blk, index_set):
 def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     cl_ctx = ctx_factory()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache 'splosion
     from sympy.core.cache import clear_cache
@@ -215,8 +214,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     from meshmode.discretization import Discretization
     from meshmode.discretization.poly_element import \
             InterpolatoryQuadratureSimplexGroupFactory
-    pre_density_discr = Discretization(
-            cl_ctx, mesh,
+    pre_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -228,7 +226,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
 
     from pytential.qbx.refinement import refine_geometry_collection
     places = GeometryCollection(qbx)
-    places = refine_geometry_collection(queue, places,
+    places = refine_geometry_collection(places,
             kernel_length_scale=(5 / k if k else None))
 
     source = places.auto_source.to_stage1()
@@ -237,15 +235,14 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
     op, u_sym, knl_kwargs = _build_op(lpot_id, k=k,
             source=places.auto_source,
             target=places.auto_target)
-    from pytential import bind
     bound_op = bind(places, op)
 
     from pytential.symbolic.execution import build_matrix
-    mat = build_matrix(queue, places, op, u_sym).get()
+    mat = build_matrix(actx, places, op, u_sym).get()
 
     if visualize:
         from sumpy.tools import build_matrix as build_matrix_via_matvec
-        mat2 = bound_op.scipy_op(queue, "u", dtype=mat.dtype, **knl_kwargs)
+        mat2 = bound_op.scipy_op(actx, "u", dtype=mat.dtype, **knl_kwargs)
         mat2 = build_matrix_via_matvec(mat2)
         print(la.norm((mat - mat2).real, "fro") / la.norm(mat2.real, "fro"),
               la.norm((mat - mat2).imag, "fro") / la.norm(mat2.imag, "fro"))
@@ -267,23 +264,22 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
         pt.colorbar()
         pt.show()
 
-    from sumpy.tools import vector_to_device, vector_from_device
+    from pytential.symbolic.matrix import _unflatten_from_numpy, _flatten_to_numpy
     np.random.seed(12)
     for i in range(5):
         if is_obj_array(u_sym):
             u = make_obj_array([
-                np.random.randn(density_discr.nnodes)
+                np.random.randn(density_discr.ndofs)
                 for _ in range(len(u_sym))
                 ])
         else:
-            u = np.random.randn(density_discr.nnodes)
+            u = np.random.randn(density_discr.ndofs)
+        u_dev = _unflatten_from_numpy(actx, u, density_discr)
 
-        u_dev = vector_to_device(queue, u)
         res_matvec = np.hstack(
-                list(vector_from_device(
-                    queue, bound_op(queue, u=u_dev))))
-
-        res_mat = mat.dot(np.hstack(list(u)))
+                _flatten_to_numpy(actx, bound_op(actx, u=u_dev))
+                )
+        res_mat = mat.dot(np.hstack(u))
 
         abs_err = la.norm(res_mat - res_matvec, np.inf)
         rel_err = abs_err / la.norm(res_matvec, np.inf)
@@ -299,6 +295,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -312,7 +309,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, dofdesc = _build_geometry(queue,
+    places, dofdesc = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -323,14 +320,14 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
     expr = _prepare_expr(places, op)
 
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -341,7 +338,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     mat = mbuilder(expr)
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -352,7 +349,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             exclude_self=True)
     blk = mbuilder(expr)
 
-    index_set = index_set.get(queue)
+    index_set = index_set.get(actx.queue)
     if visualize and ambient_dim == 2:
         blk_full = np.zeros_like(mat)
         mat_full = np.zeros_like(mat)
@@ -381,6 +378,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
                            visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -394,7 +392,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
             )
     target_order = 2 if ambient_dim == 3 else 7
 
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             target_order=target_order,
             ambient_dim=ambient_dim,
             auto_where=place_ids)
@@ -409,11 +407,11 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 
     dd = places.auto_source
     density_discr = places.get_discretization(dd.geometry, dd.discr_stage)
-    index_set = _build_block_index(queue, density_discr, factor=factor)
+    index_set = _build_block_index(actx, density_discr, factor=factor)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -424,7 +422,7 @@ def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
     blk = mbuilder(expr)
 
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -462,6 +460,7 @@ def test_build_matrix_places(ctx_factory,
         source_discr_stage, target_discr_stage, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     # prevent cache explosion
     from sympy.core.cache import clear_cache
@@ -476,7 +475,7 @@ def test_build_matrix_places(ctx_factory,
             )
 
     # build test operators
-    places, _ = _build_geometry(queue,
+    places, _ = _build_geometry(actx,
             nelements=8,
             target_order=2,
             ambient_dim=2,
@@ -493,7 +492,7 @@ def test_build_matrix_places(ctx_factory,
     dd = places.auto_source
     source_discr = places.get_discretization(dd.geometry, dd.discr_stage)
 
-    index_set = _build_block_index(queue, source_discr, factor=0.6)
+    index_set = _build_block_index(actx, source_discr, factor=0.6)
     index_set = MatrixBlockIndexRanges(ctx, index_set, index_set)
 
     from pytential.symbolic.execution import _prepare_expr
@@ -501,7 +500,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full QBX matrix
     from pytential.symbolic.matrix import MatrixBuilder
-    mbuilder = MatrixBuilder(queue,
+    mbuilder = MatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -512,7 +511,7 @@ def test_build_matrix_places(ctx_factory,
 
     # build full p2p matrix
     from pytential.symbolic.matrix import P2PMatrixBuilder
-    mbuilder = P2PMatrixBuilder(queue,
+    mbuilder = P2PMatrixBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -521,11 +520,11 @@ def test_build_matrix_places(ctx_factory,
             context={})
     p2p_mat = mbuilder(op)
 
-    assert p2p_mat.shape == (target_discr.nnodes, source_discr.nnodes)
+    assert p2p_mat.shape == (target_discr.ndofs, source_discr.ndofs)
 
     # build block qbx and p2p matrices
     from pytential.symbolic.matrix import NearFieldBlockBuilder
-    mbuilder = NearFieldBlockBuilder(queue,
+    mbuilder = NearFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
@@ -538,7 +537,7 @@ def test_build_matrix_places(ctx_factory,
         assert _max_block_error(qbx_mat, mat, index_set.get(queue)) < 1.0e-14
 
     from pytential.symbolic.matrix import FarFieldBlockBuilder
-    mbuilder = FarFieldBlockBuilder(queue,
+    mbuilder = FarFieldBlockBuilder(actx,
             dep_expr=u_sym,
             other_dep_exprs=[],
             dep_source=places.get_geometry(dd.geometry),
-- 
GitLab


From 80ab289c30b1e854673cb6394bef421ed3304a58 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Thu, 2 Jul 2020 23:01:59 -0500
Subject: [PATCH 02/12] port test_linalg_proxy to array-context

---
 pytential/linalg/proxy.py    | 403 +++++++++++++++++------------------
 pytential/symbolic/matrix.py |  42 +---
 pytential/utils.py           |  25 +++
 test/test_linalg_proxy.py    | 107 +++++-----
 test/test_matrix.py          |  10 +-
 5 files changed, 297 insertions(+), 290 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index ba2e2ea9..a87e0785 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -26,10 +26,6 @@ THE SOFTWARE.
 import numpy as np
 import numpy.linalg as la
 
-import pyopencl as cl
-import pyopencl.array # noqa
-from pyopencl.array import to_device
-
 from pytools.obj_array import make_obj_array
 from pytools import memoize_method, memoize_in
 from sumpy.tools import BlockIndexRanges
@@ -61,9 +57,7 @@ def _element_node_range(group, ielement):
     return np.arange(istart, iend)
 
 
-def partition_by_nodes(discr,
-                       use_tree=True,
-                       max_nodes_in_box=None):
+def partition_by_nodes(actx, discr, use_tree=True, max_nodes_in_box=None):
     """Generate equally sized ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
     of points, but will split elements across different ranges.
@@ -82,43 +76,45 @@ def partition_by_nodes(discr,
         # FIXME: this is just an arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        if use_tree:
-            from boxtree import box_flags_enum
-            from boxtree import TreeBuilder
+    if use_tree:
+        from boxtree import box_flags_enum
+        from boxtree import TreeBuilder
 
-            builder = TreeBuilder(discr.cl_context)
+        builder = TreeBuilder(actx.context)
 
-            tree, _ = builder(queue, discr.nodes(),
+        from meshmode.dof_array import flatten, thaw
+        tree, _ = builder(actx.queue,
+                flatten(thaw(actx, discr.nodes())),
                 max_particles_in_box=max_nodes_in_box)
 
-            tree = tree.get(queue)
-            leaf_boxes, = (tree.box_flags
-                           & box_flags_enum.HAS_CHILDREN == 0).nonzero()
+        tree = tree.get(actx.queue)
+        leaf_boxes, = (tree.box_flags
+                       & box_flags_enum.HAS_CHILDREN == 0).nonzero()
 
-            indices = np.empty(len(leaf_boxes), dtype=np.object)
-            for i, ibox in enumerate(leaf_boxes):
-                box_start = tree.box_source_starts[ibox]
-                box_end = box_start + tree.box_source_counts_cumul[ibox]
-                indices[i] = tree.user_source_ids[box_start:box_end]
+        indices = np.empty(len(leaf_boxes), dtype=np.object)
+        for i, ibox in enumerate(leaf_boxes):
+            box_start = tree.box_source_starts[ibox]
+            box_end = box_start + tree.box_source_counts_cumul[ibox]
+            indices[i] = tree.user_source_ids[box_start:box_end]
 
-            ranges = to_device(queue,
-                np.cumsum([0] + [box.shape[0] for box in indices]))
-            indices = to_device(queue, np.hstack(indices))
-        else:
-            indices = cl.array.arange(queue, 0, discr.nnodes,
-                                      dtype=np.int)
-            ranges = cl.array.arange(queue, 0, discr.nnodes + 1,
-                                     discr.nnodes // max_nodes_in_box,
-                                     dtype=np.int)
-        assert ranges[-1] == discr.nnodes
+        ranges = actx.from_numpy(
+                np.cumsum([0] + [box.shape[0] for box in indices])
+                )
+        indices = actx.from_numpy(np.hstack(indices))
+    else:
+        indices = actx.from_numpy(np.arange(0, discr.ndofs, dtype=np.int))
+        ranges = actx.from_numpy(np.arange(
+            0,
+            discr.ndofs + 1,
+            discr.ndofs // max_nodes_in_box, dtype=np.int))
 
-        return BlockIndexRanges(discr.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
+    assert ranges[-1] == discr.ndofs
 
+    return BlockIndexRanges(actx.context,
+        actx.freeze(indices), actx.freeze(ranges))
 
-def partition_from_coarse(resampler, from_indices):
+
+def partition_from_coarse(actx, resampler, from_indices):
     """Generate a partition of nodes from an existing partition on a
     coarser discretization. The new partition is generated based on element
     refinement relationships in *resampler*, so the existing partition
@@ -140,54 +136,51 @@ def partition_from_coarse(resampler, from_indices):
     if not hasattr(resampler, "groups"):
         raise ValueError("resampler must be a DirectDiscretizationConnection.")
 
-    with cl.CommandQueue(resampler.cl_context) as queue:
-        from_indices = from_indices.get(queue)
-
-        # construct ranges
-        from_discr = resampler.from_discr
-        from_grp_ranges = np.cumsum(
-            [0] + [grp.nelements for grp in from_discr.mesh.groups])
-        from_el_ranges = np.hstack([
-            np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
-            for grp in from_discr.groups])
-
-        # construct coarse element arrays in each from_range
-        el_indices = np.empty(from_indices.nblocks, dtype=np.object)
-        el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
-        for i in range(from_indices.nblocks):
-            ifrom = from_indices.block_indices(i)
-            el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
-            el_ranges[el_indices[i]] = i
-        el_indices = np.hstack(el_indices)
-
-        # construct lookup table
-        to_el_table = [np.full(g.nelements, -1, dtype=np.int)
-                       for g in resampler.to_discr.groups]
-
-        for igrp, grp in enumerate(resampler.groups):
-            for batch in grp.batches:
-                to_el_table[igrp][batch.to_element_indices.get(queue)] = \
-                    from_grp_ranges[igrp] + batch.from_element_indices.get(queue)
-
-        # construct fine node index list
-        indices = [np.empty(0, dtype=np.int)
-                   for _ in range(from_indices.nblocks)]
-        for igrp in range(len(resampler.groups)):
-            to_element_indices = \
-                    np.where(np.isin(to_el_table[igrp], el_indices))[0]
-
-            for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
-                            to_element_indices):
-                indices[i] = np.hstack([indices[i],
-                    _element_node_range(resampler.to_discr.groups[igrp], j)])
-
-        ranges = to_device(queue,
-                np.cumsum([0] + [b.shape[0] for b in indices]))
-        indices = to_device(queue, np.hstack(indices))
-
-        return BlockIndexRanges(resampler.cl_context,
-                                indices.with_queue(None),
-                                ranges.with_queue(None))
+    from_indices = from_indices.get(actx.queue)
+
+    # construct ranges
+    from_discr = resampler.from_discr
+    from_grp_ranges = np.cumsum(
+        [0] + [grp.nelements for grp in from_discr.mesh.groups])
+    from_el_ranges = np.hstack([
+        np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
+        for grp in from_discr.groups])
+
+    # construct coarse element arrays in each from_range
+    el_indices = np.empty(from_indices.nblocks, dtype=np.object)
+    el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
+    for i in range(from_indices.nblocks):
+        ifrom = from_indices.block_indices(i)
+        el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
+        el_ranges[el_indices[i]] = i
+    el_indices = np.hstack(el_indices)
+
+    # construct lookup table
+    to_el_table = [np.full(g.nelements, -1, dtype=np.int)
+                   for g in resampler.to_discr.groups]
+
+    for igrp, grp in enumerate(resampler.groups):
+        for batch in grp.batches:
+            to_el_table[igrp][actx.to_numpy(batch.to_element_indices)] = \
+                from_grp_ranges[igrp] + actx.to_numpy(batch.from_element_indices)
+
+    # construct fine node index list
+    indices = [np.empty(0, dtype=np.int)
+               for _ in range(from_indices.nblocks)]
+    for igrp in range(len(resampler.groups)):
+        to_element_indices = \
+                np.where(np.isin(to_el_table[igrp], el_indices))[0]
+
+        for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
+                        to_element_indices):
+            indices[i] = np.hstack([indices[i],
+                _element_node_range(resampler.to_discr.groups[igrp], j)])
+
+    ranges = actx.from_numpy(np.cumsum([0] + [b.shape[0] for b in indices]))
+    indices = actx.from_numpy(np.hstack(indices))
+
+    return BlockIndexRanges(resampler.cl_context,
+            actx.freeze(indices), actx.freeze(ranges))
 
 # }}}
 
@@ -340,7 +333,7 @@ class ProxyGenerator(object):
             """.format(radius_expr=radius_expr)],
             [
                 lp.GlobalArg("sources", None,
-                    shape=(self.ambient_dim, "nsources")),
+                    shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_int", None,
                     shape=(self.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("center_ext", None,
@@ -367,11 +360,11 @@ class ProxyGenerator(object):
 
         return knl
 
-    def __call__(self, queue, source_dd, indices, **kwargs):
+    def __call__(self, actx, source_dd, indices, **kwargs):
         """Generate proxy points for each given range of source points in
         the discretization in *source_dd*.
 
-        :arg queue: a :class:`pyopencl.CommandQueue`.
+        :arg actx: a :class:`~meshmode.array_context.ArrayContext`.
         :arg source_dd: a :class:`~pytential.symbolic.primitives.DOFDescriptor`
             for the discretization on which the proxy points are to be
             generated.
@@ -397,47 +390,51 @@ class ProxyGenerator(object):
                 source_dd.geometry, source_dd.discr_stage)
 
         radii = bind(self.places, sym.expansion_radii(
-            self.ambient_dim, dofdesc=source_dd))(queue)
+            self.ambient_dim, dofdesc=source_dd))(actx)
         center_int = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, -1, dofdesc=source_dd))(queue)
+            self.ambient_dim, -1, dofdesc=source_dd))(actx)
         center_ext = bind(self.places, sym.expansion_centers(
-            self.ambient_dim, +1, dofdesc=source_dd))(queue)
+            self.ambient_dim, +1, dofdesc=source_dd))(actx)
 
+        from meshmode.dof_array import flatten, thaw
         knl = self.get_kernel()
-        _, (centers_dev, radii_dev,) = knl(queue,
-            sources=discr.nodes(),
-            center_int=center_int,
-            center_ext=center_ext,
-            expansion_radii=radii,
+        _, (centers_dev, radii_dev,) = knl(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            center_int=flatten(center_int),
+            center_ext=flatten(center_ext),
+            expansion_radii=flatten(radii),
             srcindices=indices.indices,
             srcranges=indices.ranges, **kwargs)
-        centers = centers_dev.get()
-        radii = radii_dev.get()
 
+        from pytential.utils import flatten_to_numpy
+        centers = flatten_to_numpy(actx, centers_dev)
+        radii = flatten_to_numpy(actx, radii_dev)
         proxies = np.empty(indices.nblocks, dtype=np.object)
         for i in range(indices.nblocks):
             proxies[i] = _affine_map(self.ref_points,
                     A=(radii[i] * np.eye(self.ambient_dim)),
                     b=centers[:, i].reshape(-1, 1))
 
-        pxyranges = cl.array.arange(queue,
-                0,
-                proxies.shape[0] * proxies[0].shape[1] + 1,
-                proxies[0].shape[1],
-                dtype=indices.ranges.dtype)
+        pxyranges = actx.from_numpy(np.arange(
+            0,
+            proxies.shape[0] * proxies[0].shape[1] + 1,
+            proxies[0].shape[1],
+            dtype=indices.ranges.dtype))
         proxies = make_obj_array([
-            cl.array.to_device(queue, np.hstack([p[idim] for p in proxies]))
-            for idim in range(self.ambient_dim)])
+            actx.freeze(actx.from_numpy(np.hstack([p[idim] for p in proxies])))
+            for idim in range(self.ambient_dim)
+            ])
         centers = make_obj_array([
-            centers_dev[idim].with_queue(queue).copy()
-            for idim in range(self.ambient_dim)])
+            actx.freeze(centers_dev[idim])
+            for idim in range(self.ambient_dim)
+            ])
 
         assert pxyranges[-1] == proxies[0].shape[0]
-        return proxies, pxyranges, centers, radii_dev
+        return proxies, actx.freeze(pxyranges), centers, actx.freeze(radii_dev)
 
 
-def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
-                                 max_nodes_in_box=None):
+def gather_block_neighbor_points(actx, discr, indices, pxycenters, pxyradii,
+        max_nodes_in_box=None):
     """Generate a set of neighboring points for each range of points in
     *discr*. Neighboring points of a range :math:`i` are defined
     as all the points inside the proxy ball :math:`i` that do not also
@@ -455,79 +452,77 @@ def gather_block_neighbor_points(discr, indices, pxycenters, pxyradii,
         # FIXME: this is a fairly arbitrary value
         max_nodes_in_box = 32
 
-    with cl.CommandQueue(discr.cl_context) as queue:
-        indices = indices.get(queue)
-
-        # NOTE: this is constructed for multiple reasons:
-        #   * TreeBuilder takes object arrays
-        #   * `srcindices` can be a small subset of nodes, so this will save
-        #   some work
-        #   * `srcindices` may reorder the array returned by nodes(), so this
-        #   makes sure that we have the same order in tree.user_source_ids
-        #   and friends
-        sources = discr.nodes().get(queue)
-        sources = make_obj_array([
-            cl.array.to_device(queue, sources[idim, indices.indices])
-            for idim in range(discr.ambient_dim)])
-
-        # construct tree
-        from boxtree import TreeBuilder
-        builder = TreeBuilder(discr.cl_context)
-        tree, _ = builder(queue, sources,
-                          max_particles_in_box=max_nodes_in_box)
-
-        from boxtree.area_query import AreaQueryBuilder
-        builder = AreaQueryBuilder(discr.cl_context)
-        query, _ = builder(queue, tree, pxycenters, pxyradii)
-
-        # find nodes inside each proxy ball
-        tree = tree.get(queue)
-        query = query.get(queue)
-
-        if isinstance(pxycenters[0], cl.array.Array):
-            pxycenters = np.vstack([pxycenters[idim].get(queue)
-                                    for idim in range(discr.ambient_dim)])
-        if isinstance(pxyradii, cl.array.Array):
-            pxyradii = pxyradii.get(queue)
-
-        nbrindices = np.empty(indices.nblocks, dtype=np.object)
-        for iproxy in range(indices.nblocks):
-            # get list of boxes intersecting the current ball
-            istart = query.leaves_near_ball_starts[iproxy]
-            iend = query.leaves_near_ball_starts[iproxy + 1]
-            iboxes = query.leaves_near_ball_lists[istart:iend]
-
-            # get nodes inside the boxes
-            istart = tree.box_source_starts[iboxes]
-            iend = istart + tree.box_source_counts_cumul[iboxes]
-            isources = np.hstack([np.arange(s, e)
-                                  for s, e in zip(istart, iend)])
-            nodes = np.vstack([tree.sources[idim][isources]
-                               for idim in range(discr.ambient_dim)])
-            isources = tree.user_source_ids[isources]
-
-            # get nodes inside the ball but outside the current range
-            center = pxycenters[:, iproxy].reshape(-1, 1)
-            radius = pxyradii[iproxy]
-            mask = ((la.norm(nodes - center, axis=0) < radius)
-                    & ((isources < indices.ranges[iproxy])
-                        | (indices.ranges[iproxy + 1] <= isources)))
-
-            nbrindices[iproxy] = indices.indices[isources[mask]]
-
-        nbrranges = to_device(queue,
-                np.cumsum([0] + [n.shape[0] for n in nbrindices]))
-        nbrindices = to_device(queue, np.hstack(nbrindices))
-
-        return BlockIndexRanges(discr.cl_context,
-                                nbrindices.with_queue(None),
-                                nbrranges.with_queue(None))
-
-
-def gather_block_interaction_points(places, source_dd, indices,
-                                    radius_factor=None,
-                                    approx_nproxy=None,
-                                    max_nodes_in_box=None):
+    indices = indices.get(actx.queue)
+
+    # NOTE: this is constructed for multiple reasons:
+    #   * TreeBuilder takes object arrays
+    #   * `srcindices` can be a small subset of nodes, so this will save
+    #   some work
+    #   * `srcindices` may reorder the array returned by nodes(), so this
+    #   makes sure that we have the same order in tree.user_source_ids
+    #   and friends
+    from pytential.utils import flatten_to_numpy
+    sources = flatten_to_numpy(actx, discr.nodes())
+    sources = make_obj_array([
+        actx.from_numpy(sources[idim][indices.indices])
+        for idim in range(discr.ambient_dim)])
+
+    # construct tree
+    from boxtree import TreeBuilder
+    builder = TreeBuilder(actx.context)
+    tree, _ = builder(actx.queue, sources,
+            max_particles_in_box=max_nodes_in_box)
+
+    from boxtree.area_query import AreaQueryBuilder
+    builder = AreaQueryBuilder(actx.context)
+    query, _ = builder(actx.queue, tree, pxycenters, pxyradii)
+
+    # find nodes inside each proxy ball
+    tree = tree.get(actx.queue)
+    query = query.get(actx.queue)
+
+    pxycenters = np.vstack([
+        actx.to_numpy(pxycenters[idim])
+        for idim in range(discr.ambient_dim)
+        ])
+    pxyradii = actx.to_numpy(pxyradii)
+
+    nbrindices = np.empty(indices.nblocks, dtype=np.object)
+    for iproxy in range(indices.nblocks):
+        # get list of boxes intersecting the current ball
+        istart = query.leaves_near_ball_starts[iproxy]
+        iend = query.leaves_near_ball_starts[iproxy + 1]
+        iboxes = query.leaves_near_ball_lists[istart:iend]
+
+        # get nodes inside the boxes
+        istart = tree.box_source_starts[iboxes]
+        iend = istart + tree.box_source_counts_cumul[iboxes]
+        isources = np.hstack([np.arange(s, e)
+                              for s, e in zip(istart, iend)])
+        nodes = np.vstack([tree.sources[idim][isources]
+                           for idim in range(discr.ambient_dim)])
+        isources = tree.user_source_ids[isources]
+
+        # get nodes inside the ball but outside the current range
+        center = pxycenters[:, iproxy].reshape(-1, 1)
+        radius = pxyradii[iproxy]
+        mask = ((la.norm(nodes - center, axis=0) < radius)
+                & ((isources < indices.ranges[iproxy])
+                    | (indices.ranges[iproxy + 1] <= isources)))
+
+        nbrindices[iproxy] = indices.indices[isources[mask]]
+
+    nbrranges = actx.from_numpy(np.cumsum([0] + [n.shape[0] for n in nbrindices]))
+    nbrindices = actx.from_numpy(np.hstack(nbrindices))
+
+    return BlockIndexRanges(actx.context,
+            actx.freeze(nbrindices), actx.freeze(nbrranges))
+
+
+def gather_block_interaction_points(actx, places, source_dd, indices,
+        radius_factor=None,
+        approx_nproxy=None,
+        max_nodes_in_box=None):
     """Generate sets of interaction points for each given range of indices
     in the *source* discretization. For each input range of indices,
     the corresponding output range of points is consists of:
@@ -583,7 +578,7 @@ def gather_block_interaction_points(places, source_dd, indices,
             """,
             [
                 lp.GlobalArg("sources", None,
-                    shape=(lpot_source.ambient_dim, "nsources")),
+                    shape=(lpot_source.ambient_dim, "nsources"), dim_tags="sep,C"),
                 lp.GlobalArg("proxies", None,
                     shape=(lpot_source.ambient_dim, "nproxies"), dim_tags="sep,C"),
                 lp.GlobalArg("nbrindices", None,
@@ -607,28 +602,28 @@ def gather_block_interaction_points(places, source_dd, indices,
         return loopy_knl
 
     lpot_source = places.get_geometry(source_dd.geometry)
-    with cl.CommandQueue(lpot_source.cl_context) as queue:
-        generator = ProxyGenerator(places,
-                radius_factor=radius_factor,
-                approx_nproxy=approx_nproxy)
-        proxies, pxyranges, pxycenters, pxyradii = \
-                generator(queue, source_dd, indices)
-
-        discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
-        neighbors = gather_block_neighbor_points(discr,
-                indices, pxycenters, pxyradii,
-                max_nodes_in_box=max_nodes_in_box)
-
-        ranges = cl.array.zeros(queue, indices.nblocks + 1, dtype=np.int)
-        _, (nodes, ranges) = knl()(queue,
-                sources=discr.nodes(),
-                proxies=proxies,
-                pxyranges=pxyranges,
-                nbrindices=neighbors.indices,
-                nbrranges=neighbors.ranges,
-                ranges=ranges)
-
-        return nodes.with_queue(None), ranges.with_queue(None)
+    generator = ProxyGenerator(places,
+            radius_factor=radius_factor,
+            approx_nproxy=approx_nproxy)
+    proxies, pxyranges, pxycenters, pxyradii = \
+            generator(actx, source_dd, indices)
+
+    discr = places.get_discretization(source_dd.geometry, source_dd.discr_stage)
+    neighbors = gather_block_neighbor_points(actx, discr,
+            indices, pxycenters, pxyradii,
+            max_nodes_in_box=max_nodes_in_box)
+
+    from meshmode.dof_array import flatten, thaw
+    ranges = actx.zeros(indices.nblocks + 1, dtype=np.int)
+    _, (nodes, ranges) = knl()(actx.queue,
+            sources=flatten(thaw(actx, discr.nodes())),
+            proxies=proxies,
+            pxyranges=pxyranges,
+            nbrindices=neighbors.indices,
+            nbrranges=neighbors.ranges,
+            ranges=ranges)
+
+    return actx.freeze(nodes), actx.freeze(ranges)
 
 # }}}
 
diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index 12ec489a..3ac09c00 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -32,7 +32,8 @@ from six.moves import intern
 
 from pytools import memoize_method
 from pytential.symbolic.mappers import EvaluationMapperBase
-from pytential.utils import flatten_if_needed
+from pytential.utils import (
+        flatten_if_needed, flatten_to_numpy, unflatten_from_numpy)
 
 
 # {{{ helpers
@@ -61,31 +62,6 @@ def _get_layer_potential_args(mapper, expr, include_args=None):
 
     return kernel_args
 
-
-def _unflatten_from_numpy(actx, ary, discr=None):
-    from pytools.obj_array import obj_array_vectorize
-    from meshmode.dof_array import unflatten
-
-    if isinstance(ary, np.ndarray) and ary.dtype.char == "O":
-        ary = obj_array_vectorize(actx.from_numpy, ary)
-    else:
-        ary = actx.from_numpy(ary)
-
-    if discr is None:
-        return ary
-    else:
-        return unflatten(actx, discr, ary)
-
-
-def _flatten_to_numpy(actx, ary):
-    result = flatten_if_needed(actx, ary)
-
-    from pytools.obj_array import obj_array_vectorize
-    if isinstance(result, np.ndarray) and ary.dtype.char == "O":
-        return obj_array_vectorize(actx.to_numpy, result)
-    else:
-        return actx.to_numpy(result)
-
 # }}}
 
 
@@ -229,16 +205,16 @@ class MatrixBuilderBase(EvaluationMapperBase):
                 dofdesc=dofdesc)
 
         discr = self.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-        rec_operand = _unflatten_from_numpy(self.array_context, rec_operand, discr)
+        rec_operand = unflatten_from_numpy(self.array_context, rec_operand, discr)
 
-        return _flatten_to_numpy(self.array_context,
+        return flatten_to_numpy(self.array_context,
                 bind(self.places, op)(self.array_context, u=rec_operand)
                 )
 
     def map_node_coordinate_component(self, expr):
         from pytential import bind, sym
         op = sym.NodeCoordinateComponent(expr.ambient_axis, dofdesc=expr.dofdesc)
-        return _flatten_to_numpy(self.array_context,
+        return flatten_to_numpy(self.array_context,
                 bind(self.places, op)(self.array_context)
                 )
 
@@ -253,9 +229,9 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if isinstance(rec_arg, Number):
             return getattr(np, expr.function.name)(rec_arg)
         else:
-            rec_arg = _unflatten_from_numpy(self.array_context, rec_arg)
+            rec_arg = unflatten_from_numpy(self.array_context, rec_arg)
             result = getattr(self.array_context.np, expr.function.name)(rec_arg)
-            return _flatten_to_numpy(self.array_context, result)
+            return flatten_to_numpy(self.array_context, result)
 
     # }}}
 
@@ -353,8 +329,8 @@ class MatrixBuilder(MatrixBuilderBase):
             discr = self.places.get_discretization(
                     expr.from_dd.geometry, expr.from_dd.discr_stage)
 
-            operand = _unflatten_from_numpy(actx, operand, discr)
-            return _flatten_to_numpy(actx, conn(operand))
+            operand = unflatten_from_numpy(actx, operand, discr)
+            return flatten_to_numpy(actx, conn(operand))
         elif isinstance(operand, np.ndarray) and operand.ndim == 2:
             cache = self.places._get_cache("direct_resampler")
             key = (expr.from_dd.geometry,
diff --git a/pytential/utils.py b/pytential/utils.py
index fb772c0f..be84f416 100644
--- a/pytential/utils.py
+++ b/pytential/utils.py
@@ -43,4 +43,29 @@ def flatten_if_needed(actx: PyOpenCLArrayContext, ary: np.ndarray):
 
     return flatten(ary)
 
+
+def unflatten_from_numpy(actx, ary, discr=None):
+    from pytools.obj_array import obj_array_vectorize
+    from meshmode.dof_array import unflatten
+
+    if isinstance(ary, np.ndarray) and ary.dtype.char == "O":
+        ary = obj_array_vectorize(actx.from_numpy, ary)
+    else:
+        ary = actx.from_numpy(ary)
+
+    if discr is None:
+        return ary
+    else:
+        return unflatten(actx, discr, ary)
+
+
+def flatten_to_numpy(actx, ary):
+    result = flatten_if_needed(actx, ary)
+
+    from pytools.obj_array import obj_array_vectorize
+    if isinstance(result, np.ndarray) and ary.dtype.char == "O":
+        return obj_array_vectorize(actx.to_numpy, result)
+    else:
+        return actx.to_numpy(result)
+
 # vim: foldmethod=marker
diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py
index a4487290..8e485d6e 100644
--- a/test/test_linalg_proxy.py
+++ b/test/test_linalg_proxy.py
@@ -29,6 +29,8 @@ import pyopencl as cl
 import pyopencl.array   # noqa
 
 from pytential import bind, sym
+
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.mesh.generation import ( # noqa
         ellipse, NArmedStarfish, generate_torus, make_curve_mesh)
 
@@ -41,9 +43,9 @@ from pyopencl.tools import (  # noqa
 from test_matrix import _build_geometry, _build_block_index
 
 
-def _plot_partition_indices(queue, discr, indices, **kwargs):
+def _plot_partition_indices(actx, discr, indices, **kwargs):
     import matplotlib.pyplot as pt
-    indices = indices.get(queue)
+    indices = indices.get(actx.queue)
 
     args = [
         kwargs.get("method", "unknown"),
@@ -57,12 +59,13 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
     pt.savefig("test_partition_{0}_{1}_{3}d_ranges_{2}.png".format(*args))
     pt.clf()
 
+    from pytential.utils import flatten_to_numpy
     if discr.ambient_dim == 2:
-        sources = discr.nodes().get(queue)
+        sources = flatten_to_numpy(actx, discr.nodes())
 
         pt.figure(figsize=(10, 8), dpi=300)
 
-        if indices.indices.shape[0] != discr.nnodes:
+        if indices.indices.shape[0] != discr.ndofs:
             pt.plot(sources[0], sources[1], 'ko', alpha=0.5)
         for i in range(indices.nblocks):
             isrc = indices.block_indices(i)
@@ -80,17 +83,20 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
             return
 
         from meshmode.discretization.visualization import make_visualizer
-        marker = -42.0 * np.ones(discr.nnodes)
+        marker = -42.0 * np.ones(discr.ndofs)
 
         for i in range(indices.nblocks):
             isrc = indices.block_indices(i)
             marker[isrc] = 10.0 * (i + 1.0)
 
-        vis = make_visualizer(queue, discr, 10)
+        from meshmode.dof_array import unflatten
+        marker = unflatten(actx, discr, actx.from_numpy(marker))
+
+        vis = make_visualizer(actx, discr, 10)
 
-        filename = "test_partition_{0}_{1}_{3}d_{2}.png".format(*args)
+        filename = "test_partition_{0}_{1}_{3}d_{2}.vtu".format(*args)
         vis.write_vtk_file(filename, [
-            ("marker", cl.array.to_device(queue, marker))
+            ("marker", marker)
             ])
 
 
@@ -99,12 +105,14 @@ def _plot_partition_indices(queue, discr, indices, **kwargs):
 def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
-    _build_block_index(queue,
-            places.get_discretization(dofdesc.geometry, dofdesc.discr_stage),
-            use_tree=use_tree,
-            factor=0.6)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
+    discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
+    indices = _build_block_index(actx, discr, use_tree=use_tree, factor=0.6)
+
+    if visualize:
+        _plot_partition_indices(actx, discr, indices, use_tree=use_tree)
 
 
 @pytest.mark.parametrize("ambient_dim", [2, 3])
@@ -112,24 +120,23 @@ def test_partition_points(ctx_factory, use_tree, ambient_dim, visualize=False):
 def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
     proxies, pxyranges, pxycenters, pxyradii = \
-            generator(queue, dofdesc, srcindices)
+            generator(actx, dofdesc, srcindices)
 
-    proxies = np.vstack([p.get() for p in proxies])
-    pxyranges = pxyranges.get()
-    pxycenters = np.vstack([c.get() for c in pxycenters])
-    pxyradii = pxyradii.get()
+    proxies = np.vstack([actx.to_numpy(p) for p in proxies])
+    pxyranges = actx.to_numpy(pxyranges)
+    pxycenters = np.vstack([actx.to_numpy(c) for c in pxycenters])
+    pxyradii = actx.to_numpy(pxyradii)
 
     for i in range(srcindices.nblocks):
         ipxy = np.s_[pxyranges[i]:pxyranges[i + 1]]
@@ -142,12 +149,14 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
 
-            density_nodes = density_discr.nodes().get(queue)
-            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(queue)
-            ci = np.vstack([c.get(queue) for c in ci])
-            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(queue)
-            ce = np.vstack([c.get(queue) for c in ce])
-            r = bind(places, sym.expansion_radii(ambient_dim))(queue).get()
+            from pytential.utils import flatten_to_numpy
+            density_nodes = np.vstack(flatten_to_numpy(actx, density_discr.nodes()))
+            ci = bind(places, sym.expansion_centers(ambient_dim, -1))(actx)
+            ci = np.vstack(flatten_to_numpy(actx, ci))
+            ce = bind(places, sym.expansion_centers(ambient_dim, +1))(actx)
+            ce = np.vstack(flatten_to_numpy(actx, ce))
+            r = bind(places, sym.expansion_radii(ambient_dim))(actx)
+            r = flatten_to_numpy(actx, r)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -195,10 +204,10 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
                     b=pxycenters[:, i].reshape(-1))
 
                 mesh = merge_disjoint_meshes([mesh, density_discr.mesh])
-                discr = Discretization(ctx, mesh,
+                discr = Discretization(actx, mesh,
                     InterpolatoryQuadratureSimplexGroupFactory(10))
 
-                vis = make_visualizer(queue, discr, 10)
+                vis = make_visualizer(actx, discr, 10)
                 filename = "test_proxy_generator_{}d_{:04}.vtu".format(
                         ambient_dim, i)
                 vis.write_vtk_file(filename, [])
@@ -209,26 +218,25 @@ def test_proxy_generator(ctx_factory, ambient_dim, factor, visualize=False):
 def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
+    actx = PyOpenCLArrayContext(queue)
 
-    places, dofdesc = _build_geometry(queue, ambient_dim=ambient_dim)
+    places, dofdesc = _build_geometry(actx, ambient_dim=ambient_dim)
     dofdesc = dofdesc.to_stage1()
 
     density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-    srcindices = _build_block_index(queue,
-            density_discr,
-            factor=factor)
+    srcindices = _build_block_index(actx, density_discr, factor=factor)
 
     # generate proxy points
     from pytential.linalg.proxy import ProxyGenerator
     generator = ProxyGenerator(places)
-    _, _, pxycenters, pxyradii = generator(queue, dofdesc, srcindices)
+    _, _, pxycenters, pxyradii = generator(actx, dofdesc, srcindices)
 
     from pytential.linalg.proxy import (  # noqa
             gather_block_neighbor_points,
             gather_block_interaction_points)
-    nbrindices = gather_block_neighbor_points(density_discr,
+    nbrindices = gather_block_neighbor_points(actx, density_discr,
             srcindices, pxycenters, pxyradii)
-    nodes, ranges = gather_block_interaction_points(
+    nodes, ranges = gather_block_interaction_points(actx,
             places, dofdesc, srcindices)
 
     srcindices = srcindices.get(queue)
@@ -240,12 +248,13 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
 
         assert not np.any(np.isin(inbr, isrc))
 
+    from pytential.utils import flatten_to_numpy
     if visualize:
         if ambient_dim == 2:
             import matplotlib.pyplot as pt
-            density_nodes = density_discr.nodes().get(queue)
-            nodes = nodes.get(queue)
-            ranges = ranges.get(queue)
+            density_nodes = flatten_to_numpy(actx, density_discr.nodes())
+            nodes = flatten_to_numpy(actx, nodes)
+            ranges = actx.to_numpy(ranges)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -255,14 +264,14 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 pt.figure(figsize=(10, 8))
                 pt.plot(density_nodes[0], density_nodes[1],
                         'ko', ms=2.0, alpha=0.5)
-                pt.plot(density_nodes[0, srcindices.indices],
-                        density_nodes[1, srcindices.indices],
+                pt.plot(density_nodes[0][srcindices.indices],
+                        density_nodes[1][srcindices.indices],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, isrc], density_nodes[1, isrc],
+                pt.plot(density_nodes[0][isrc], density_nodes[1][isrc],
                         'o', ms=2.0)
-                pt.plot(density_nodes[0, inbr], density_nodes[1, inbr],
+                pt.plot(density_nodes[0][inbr], density_nodes[1][inbr],
                         'o', ms=2.0)
-                pt.plot(nodes[0, iall], nodes[1, iall],
+                pt.plot(nodes[0][iall], nodes[1][iall],
                         'x', ms=2.0)
                 pt.xlim([-1.5, 1.5])
                 pt.ylim([-1.5, 1.5])
@@ -272,7 +281,7 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 pt.clf()
         elif ambient_dim == 3:
             from meshmode.discretization.visualization import make_visualizer
-            marker = np.empty(density_discr.nnodes)
+            marker = np.empty(density_discr.ndofs)
 
             for i in range(srcindices.nblocks):
                 isrc = srcindices.block_indices(i)
@@ -282,9 +291,11 @@ def test_interaction_points(ctx_factory, ambient_dim, factor, visualize=False):
                 marker[srcindices.indices] = 0.0
                 marker[isrc] = -42.0
                 marker[inbr] = +42.0
-                marker_dev = cl.array.to_device(queue, marker)
 
-                vis = make_visualizer(queue, density_discr, 10)
+                from meshmode.dof_array import unflatten
+                marker_dev = unflatten(actx, density_discr, actx.from_numpy(marker))
+
+                vis = make_visualizer(actx, density_discr, 10)
                 filename = "test_area_query_{}d_{:04}.vtu".format(ambient_dim, i)
                 vis.write_vtk_file(filename, [
                     ("marker", marker_dev),
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 5de6780a..70a79d15 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -100,7 +100,7 @@ def _build_block_index(actx, discr,
 
     # create index ranges
     from pytential.linalg.proxy import partition_by_nodes
-    indices = partition_by_nodes(discr,
+    indices = partition_by_nodes(actx, discr,
             use_tree=use_tree,
             max_nodes_in_box=max_particles_in_box)
 
@@ -264,7 +264,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
         pt.colorbar()
         pt.show()
 
-    from pytential.symbolic.matrix import _unflatten_from_numpy, _flatten_to_numpy
+    from pytential.utils import unflatten_from_numpy, flatten_to_numpy
     np.random.seed(12)
     for i in range(5):
         if is_obj_array(u_sym):
@@ -274,10 +274,10 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
                 ])
         else:
             u = np.random.randn(density_discr.ndofs)
-        u_dev = _unflatten_from_numpy(actx, u, density_discr)
+        u_dev = unflatten_from_numpy(actx, u, density_discr)
 
         res_matvec = np.hstack(
-                _flatten_to_numpy(actx, bound_op(actx, u=u_dev))
+                flatten_to_numpy(actx, bound_op(actx, u=u_dev))
                 )
         res_mat = mat.dot(np.hstack(u))
 
@@ -375,7 +375,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 @pytest.mark.parametrize("ambient_dim", [2, 3])
 @pytest.mark.parametrize("lpot_id", [1, 2])
 def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
-                           visualize=False):
+                           visualize=True):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
     actx = PyOpenCLArrayContext(queue)
-- 
GitLab


From d4a85856f412ee52a4e817aedfeb4e559856dbbe Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Thu, 2 Jul 2020 23:36:15 -0500
Subject: [PATCH 03/12] port test_stokes to array-context

---
 test/test_matrix.py |  2 +-
 test/test_stokes.py | 59 ++++++++++++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/test/test_matrix.py b/test/test_matrix.py
index 70a79d15..2b24b29e 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -31,7 +31,7 @@ import numpy as np
 import numpy.linalg as la
 
 import pyopencl as cl
-import pyopencl.array   # noqa
+import pyopencl.array
 
 from pytools.obj_array import make_obj_array, is_obj_array
 
diff --git a/test/test_stokes.py b/test/test_stokes.py
index 19167efe..090770e5 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -25,9 +25,10 @@ THE SOFTWARE.
 
 import numpy as np
 import pyopencl as cl
-import pyopencl.clmath  # noqa
+import pyopencl.clmath
 import pytest
 
+from meshmode.array_context import PyOpenCLArrayContext
 from meshmode.discretization import Discretization
 from meshmode.discretization.poly_element import \
         InterpolatoryQuadratureSimplexGroupFactory
@@ -46,7 +47,7 @@ import logging
 
 def run_exterior_stokes_2d(ctx_factory, nelements,
         mesh_order=4, target_order=4, qbx_order=4,
-        fmm_order=10, mu=1, circle_rad=1.5, visualize=False):
+        fmm_order=False, mu=1, circle_rad=1.5, visualize=False):
 
     # This program tests an exterior Stokes flow in 2D using the
     # compound representation given in Hsiao & Kress,
@@ -57,6 +58,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     cl_ctx = cl.create_some_context()
     queue = cl.CommandQueue(cl_ctx)
+    actx = PyOpenCLArrayContext(queue)
 
     ovsmp_target_order = 4*target_order
 
@@ -68,8 +70,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             lambda t: circle_rad * ellipse(1, t),
             np.linspace(0, 1, nelements+1),
             target_order)
-    coarse_density_discr = Discretization(
-            cl_ctx, mesh,
+    coarse_density_discr = Discretization(actx, mesh,
             InterpolatoryQuadratureSimplexGroupFactory(target_order))
 
     from pytential.qbx import QBXLayerPotentialSource
@@ -111,8 +112,8 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     density_discr = places.get_discretization(sym.DEFAULT_SOURCE)
 
-    normal = bind(places, sym.normal(2).as_vector())(queue)
-    path_length = bind(places, sym.integral(2, 1, 1))(queue)
+    normal = bind(places, sym.normal(2).as_vector())(actx)
+    path_length = bind(places, sym.integral(2, 1, 1))(actx)
 
     # }}}
 
@@ -150,47 +151,52 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     def fund_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
-        xcomp = (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+        xcomp = (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
         ycomp = ((x - loc[0])*(y - loc[1])/r**2) * scaling
         return [xcomp, ycomp]
 
     def rotlet_soln(x, y, loc):
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         xcomp = -(y - loc[1])/r**2
         ycomp = (x - loc[0])/r**2
         return [xcomp, ycomp]
 
     def fund_and_rot_soln(x, y, loc, strength):
         #with direction (1,0) for point source
-        r = cl.clmath.sqrt((x - loc[0])**2 + (y - loc[1])**2)
+        r = actx.np.sqrt((x - loc[0])**2 + (y - loc[1])**2)
         scaling = strength/(4*np.pi*mu)
         xcomp = (
-                (-cl.clmath.log(r) + (x - loc[0])**2/r**2) * scaling
+                (-actx.np.log(r) + (x - loc[0])**2/r**2) * scaling
                 - (y - loc[1])*strength*0.125/r**2 + 3.3)
         ycomp = (
                 ((x - loc[0])*(y - loc[1])/r**2) * scaling
                 + (x - loc[0])*strength*0.125/r**2 + 1.5)
-        return [xcomp, ycomp]
+        return make_obj_array([xcomp, ycomp])
 
-    nodes = density_discr.nodes().with_queue(queue)
+    from meshmode.dof_array import unflatten, flatten, thaw
+    nodes = flatten(thaw(actx, density_discr.nodes()))
     fund_soln_loc = np.array([0.5, -0.2])
     strength = 100.
-    bc = fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength)
+    bc = unflatten(actx, density_discr,
+            fund_and_rot_soln(nodes[0], nodes[1], fund_soln_loc, strength))
 
     omega_sym = sym.make_sym_vector("omega", dim)
     u_A_sym_bdry = stokeslet_obj.apply(  # noqa: N806
             omega_sym, mu_sym, qbx_forced_limit=1)
 
-    omega = [
-            cl.array.to_device(queue, (strength/path_length)*np.ones(len(nodes[0]))),
-            cl.array.to_device(queue, np.zeros(len(nodes[0])))]
+    from pytential.utils import unflatten_from_numpy
+    omega = unflatten_from_numpy(actx, make_obj_array([
+            (strength/path_length)*np.ones(len(nodes[0])),
+            np.zeros(len(nodes[0]))
+            ]), density_discr)
+
     bvp_rhs = bind(places,
-            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(queue,
+            sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(actx,
                     bc=bc, mu=mu, omega=omega)
     gmres_result = gmres(
-            bound_op.scipy_op(queue, "sigma", np.float64, mu=mu, normal=normal),
+            bound_op.scipy_op(actx, "sigma", np.float64, mu=mu, normal=normal),
             bvp_rhs,
             x0=bvp_rhs,
             tol=1e-9, progress=True,
@@ -203,7 +209,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     sigma = gmres_result.solution
     sigma_int_val_sym = sym.make_sym_vector("sigma_int_val", 2)
-    int_val = bind(places, sym.integral(2, 1, sigma_sym))(queue, sigma=sigma)
+    int_val = bind(places, sym.integral(2, 1, sigma_sym))(actx, sigma=sigma)
     int_val = -int_val/(2 * np.pi)
     print("int_val = ", int_val)
 
@@ -217,7 +223,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             - u_A_sym_vol + sigma_int_val_sym)
 
     where = (sym.DEFAULT_SOURCE, "point_target")
-    vel = bind(places, representation_sym, auto_where=where)(queue,
+    vel = bind(places, representation_sym, auto_where=where)(actx,
             sigma=sigma,
             mu=mu,
             normal=normal,
@@ -226,7 +232,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
     print("@@@@@@@@")
 
     plot_vel = bind(places, representation_sym,
-            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(queue,
+            auto_where=(sym.DEFAULT_SOURCE, "plot_target"))(actx,
                     sigma=sigma,
                     mu=mu,
                     normal=normal,
@@ -240,8 +246,10 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
                 ])
 
     exact_soln = fund_and_rot_soln(
-            cl.array.to_device(queue, eval_points[0]), cl.array.to_device(
-                queue, eval_points[1]), fund_soln_loc, strength)
+            actx.from_numpy(eval_points[0]),
+            actx.from_numpy(eval_points[1]),
+            fund_soln_loc,
+            strength)
 
     vel = get_obj_array(vel)
     err = vel-get_obj_array(exact_soln)
@@ -289,7 +297,7 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
 
     # }}}
 
-    h_max = bind(places, sym.h_max(qbx.ambient_dim))(queue)
+    h_max = bind(places, sym.h_max(qbx.ambient_dim))(actx)
     return h_max, l2_err
 
 
@@ -301,6 +309,7 @@ def test_exterior_stokes_2d(ctx_factory, qbx_order=3):
     for nelements in [20, 50]:
         h_max, l2_err = run_exterior_stokes_2d(ctx_factory, nelements)
         eoc_rec.add_data_point(h_max, l2_err)
+        break
 
     print(eoc_rec)
     assert eoc_rec.order_estimate() >= qbx_order - 1
-- 
GitLab


From 16b103d41db48423355e4d085341736a95d5c7c2 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Thu, 2 Jul 2020 23:45:18 -0500
Subject: [PATCH 04/12] enable tests in setup.cfg

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 34699863..335a4a03 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,4 +9,4 @@ exclude=
 [tool:pytest]
 markers=
     slowtest: mark a test as slow
-python_files = test_cost_model.py test_tools.py test_target_specific_qbx.py test_symbolic.py test_muller.py test_global_qbx.py test_layer_pot_eigenvalues.py test_layer_pot_identity.py test_layer_pot.py
+python_files = test_cost_model.py test_tools.py test_target_specific_qbx.py test_symbolic.py test_muller.py test_global_qbx.py test_layer_pot_eigenvalues.py test_layer_pot_identity.py test_layer_pot.py test_linalg_proxy.py test_matrix.py test_stokes.py
-- 
GitLab


From 37e9404ae7c6cdf9fda299b16bbff5bda0cd1322 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Fri, 3 Jul 2020 07:56:05 -0500
Subject: [PATCH 05/12] remove stray debugging remnants

---
 test/test_matrix.py | 2 +-
 test/test_stokes.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_matrix.py b/test/test_matrix.py
index 2b24b29e..3c721760 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -375,7 +375,7 @@ def test_p2p_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
 @pytest.mark.parametrize("ambient_dim", [2, 3])
 @pytest.mark.parametrize("lpot_id", [1, 2])
 def test_qbx_block_builder(ctx_factory, factor, ambient_dim, lpot_id,
-                           visualize=True):
+                           visualize=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
     actx = PyOpenCLArrayContext(queue)
diff --git a/test/test_stokes.py b/test/test_stokes.py
index 090770e5..be2d2014 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -309,7 +309,6 @@ def test_exterior_stokes_2d(ctx_factory, qbx_order=3):
     for nelements in [20, 50]:
         h_max, l2_err = run_exterior_stokes_2d(ctx_factory, nelements)
         eoc_rec.add_data_point(h_max, l2_err)
-        break
 
     print(eoc_rec)
     assert eoc_rec.order_estimate() >= qbx_order - 1
-- 
GitLab


From 740cd158e666c254721363fb08c9f42540878a6a Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <fikl2@illinois.edu>
Date: Fri, 3 Jul 2020 23:53:11 +0200
Subject: [PATCH 06/12] Apply 1 suggestion(s) to 1 file(s)

---
 pytential/utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pytential/utils.py b/pytential/utils.py
index be84f416..599fec5c 100644
--- a/pytential/utils.py
+++ b/pytential/utils.py
@@ -48,10 +48,7 @@ def unflatten_from_numpy(actx, ary, discr=None):
     from pytools.obj_array import obj_array_vectorize
     from meshmode.dof_array import unflatten
 
-    if isinstance(ary, np.ndarray) and ary.dtype.char == "O":
-        ary = obj_array_vectorize(actx.from_numpy, ary)
-    else:
-        ary = actx.from_numpy(ary)
+    ary = obj_array_vectorize(actx.from_numpy, ary)
 
     if discr is None:
         return ary
-- 
GitLab


From 67b122838db19f35137c319a9253dfc16719a959 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <fikl2@illinois.edu>
Date: Fri, 3 Jul 2020 23:53:17 +0200
Subject: [PATCH 07/12] Apply 1 suggestion(s) to 1 file(s)

---
 pytential/utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pytential/utils.py b/pytential/utils.py
index 599fec5c..393e41d9 100644
--- a/pytential/utils.py
+++ b/pytential/utils.py
@@ -60,9 +60,6 @@ def flatten_to_numpy(actx, ary):
     result = flatten_if_needed(actx, ary)
 
     from pytools.obj_array import obj_array_vectorize
-    if isinstance(result, np.ndarray) and ary.dtype.char == "O":
-        return obj_array_vectorize(actx.to_numpy, result)
-    else:
-        return actx.to_numpy(result)
+    return obj_array_vectorize(actx.to_numpy, result)
 
 # vim: foldmethod=marker
-- 
GitLab


From c09cfb9c5ad286250d884daf08f409160db75fd6 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <fikl2@illinois.edu>
Date: Fri, 3 Jul 2020 23:53:25 +0200
Subject: [PATCH 08/12] Apply 1 suggestion(s) to 1 file(s)

---
 test/test_stokes.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_stokes.py b/test/test_stokes.py
index be2d2014..27563ef9 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -25,7 +25,6 @@ THE SOFTWARE.
 
 import numpy as np
 import pyopencl as cl
-import pyopencl.clmath
 import pytest
 
 from meshmode.array_context import PyOpenCLArrayContext
-- 
GitLab


From 94a220d490053f7f089db46417e2817fbd04bf65 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Fri, 3 Jul 2020 18:29:16 -0500
Subject: [PATCH 09/12] switch argument order in unflatten_to_numpy

---
 pytential/symbolic/matrix.py | 6 +++---
 pytential/utils.py           | 3 +--
 test/test_matrix.py          | 2 +-
 test/test_stokes.py          | 4 ++--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py
index 3ac09c00..dc0eb373 100644
--- a/pytential/symbolic/matrix.py
+++ b/pytential/symbolic/matrix.py
@@ -205,7 +205,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
                 dofdesc=dofdesc)
 
         discr = self.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage)
-        rec_operand = unflatten_from_numpy(self.array_context, rec_operand, discr)
+        rec_operand = unflatten_from_numpy(self.array_context, discr, rec_operand)
 
         return flatten_to_numpy(self.array_context,
                 bind(self.places, op)(self.array_context, u=rec_operand)
@@ -229,7 +229,7 @@ class MatrixBuilderBase(EvaluationMapperBase):
         if isinstance(rec_arg, Number):
             return getattr(np, expr.function.name)(rec_arg)
         else:
-            rec_arg = unflatten_from_numpy(self.array_context, rec_arg)
+            rec_arg = unflatten_from_numpy(self.array_context, None, rec_arg)
             result = getattr(self.array_context.np, expr.function.name)(rec_arg)
             return flatten_to_numpy(self.array_context, result)
 
@@ -329,7 +329,7 @@ class MatrixBuilder(MatrixBuilderBase):
             discr = self.places.get_discretization(
                     expr.from_dd.geometry, expr.from_dd.discr_stage)
 
-            operand = unflatten_from_numpy(actx, operand, discr)
+            operand = unflatten_from_numpy(actx, discr, operand)
             return flatten_to_numpy(actx, conn(operand))
         elif isinstance(operand, np.ndarray) and operand.ndim == 2:
             cache = self.places._get_cache("direct_resampler")
diff --git a/pytential/utils.py b/pytential/utils.py
index 393e41d9..f1e9f0d1 100644
--- a/pytential/utils.py
+++ b/pytential/utils.py
@@ -44,12 +44,11 @@ def flatten_if_needed(actx: PyOpenCLArrayContext, ary: np.ndarray):
     return flatten(ary)
 
 
-def unflatten_from_numpy(actx, ary, discr=None):
+def unflatten_from_numpy(actx, discr, ary):
     from pytools.obj_array import obj_array_vectorize
     from meshmode.dof_array import unflatten
 
     ary = obj_array_vectorize(actx.from_numpy, ary)
-
     if discr is None:
         return ary
     else:
diff --git a/test/test_matrix.py b/test/test_matrix.py
index 3c721760..ce429642 100644
--- a/test/test_matrix.py
+++ b/test/test_matrix.py
@@ -274,7 +274,7 @@ def test_matrix_build(ctx_factory, k, curve_f, lpot_id, visualize=False):
                 ])
         else:
             u = np.random.randn(density_discr.ndofs)
-        u_dev = unflatten_from_numpy(actx, u, density_discr)
+        u_dev = unflatten_from_numpy(actx, density_discr, u)
 
         res_matvec = np.hstack(
                 flatten_to_numpy(actx, bound_op(actx, u=u_dev))
diff --git a/test/test_stokes.py b/test/test_stokes.py
index 27563ef9..ec7282a0 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -186,10 +186,10 @@ def run_exterior_stokes_2d(ctx_factory, nelements,
             omega_sym, mu_sym, qbx_forced_limit=1)
 
     from pytential.utils import unflatten_from_numpy
-    omega = unflatten_from_numpy(actx, make_obj_array([
+    omega = unflatten_from_numpy(actx, density_discr, make_obj_array([
             (strength/path_length)*np.ones(len(nodes[0])),
             np.zeros(len(nodes[0]))
-            ]), density_discr)
+            ]))
 
     bvp_rhs = bind(places,
             sym.make_sym_vector("bc", dim) + u_A_sym_bdry)(actx,
-- 
GitLab


From 56373b87d5ad7326356cb8798888c9d767e1bbd8 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <fikl2@illinois.edu>
Date: Sat, 4 Jul 2020 02:32:58 +0200
Subject: [PATCH 10/12] Apply 1 suggestion(s) to 1 file(s)

---
 test/test_stokes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_stokes.py b/test/test_stokes.py
index ec7282a0..5ac2ab47 100644
--- a/test/test_stokes.py
+++ b/test/test_stokes.py
@@ -46,7 +46,8 @@ import logging
 
 def run_exterior_stokes_2d(ctx_factory, nelements,
         mesh_order=4, target_order=4, qbx_order=4,
-        fmm_order=False, mu=1, circle_rad=1.5, visualize=False):
+        fmm_order=False,  # FIXME: FMM is slower than direct eval
+        mu=1, circle_rad=1.5, visualize=False):
 
     # This program tests an exterior Stokes flow in 2D using the
     # compound representation given in Hsiao & Kress,
-- 
GitLab


From 27c79226e02829f903f85d6803559c1abfb386eb Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <fikl2@illinois.edu>
Date: Sat, 4 Jul 2020 02:33:10 +0200
Subject: [PATCH 11/12] Apply 1 suggestion(s) to 1 file(s)

---
 pytential/linalg/proxy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index a87e0785..42da45f6 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -143,7 +143,7 @@ def partition_from_coarse(actx, resampler, from_indices):
     from_grp_ranges = np.cumsum(
         [0] + [grp.nelements for grp in from_discr.mesh.groups])
     from_el_ranges = np.hstack([
-        np.arange(grp.node_nr_base, grp.nnodes + 1, grp.nunit_nodes)
+        np.arange(grp.node_nr_base, grp.ndofs + 1, grp.nunit_dofs)
         for grp in from_discr.groups])
 
     # construct coarse element arrays in each from_range
-- 
GitLab


From fb062bb3cf90659aa09ead912e71c1ac9f04c4a3 Mon Sep 17 00:00:00 2001
From: Alexandru Fikl <alexfikl@gmail.com>
Date: Fri, 3 Jul 2020 19:44:23 -0500
Subject: [PATCH 12/12] remove partition_by_coarse

---
 pytential/linalg/proxy.py | 76 ---------------------------------------
 1 file changed, 76 deletions(-)

diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py
index 42da45f6..0f79148e 100644
--- a/pytential/linalg/proxy.py
+++ b/pytential/linalg/proxy.py
@@ -50,13 +50,6 @@ Proxy Point Generation
 
 # {{{ point index partitioning
 
-def _element_node_range(group, ielement):
-    istart = group.node_nr_base + group.nunit_nodes * ielement
-    iend = group.node_nr_base + group.nunit_nodes * (ielement + 1)
-
-    return np.arange(istart, iend)
-
-
 def partition_by_nodes(actx, discr, use_tree=True, max_nodes_in_box=None):
     """Generate equally sized ranges of nodes. The partition is created at the
     lowest level of granularity, i.e. nodes. This results in balanced ranges
@@ -113,75 +106,6 @@ def partition_by_nodes(actx, discr, use_tree=True, max_nodes_in_box=None):
     return BlockIndexRanges(actx.context,
         actx.freeze(indices), actx.freeze(ranges))
 
-
-def partition_from_coarse(actx, resampler, from_indices):
-    """Generate a partition of nodes from an existing partition on a
-    coarser discretization. The new partition is generated based on element
-    refinement relationships in *resampler*, so the existing partition
-    needs to be created using :func:`partition_by_elements`,
-    since we assume that each range contains all the nodes in an element.
-
-    The new partition will have the same number of ranges as the old partition.
-    The nodes inside each range in the new partition are all the nodes in
-    *resampler.to_discr* that were refined from elements in the same
-    range from *resampler.from_discr*.
-
-    :arg resampler: a
-        :class:`meshmode.discretization.connection.DirectDiscretizationConnection`.
-    :arg from_indices: a :class:`sumpy.tools.BlockIndexRanges`.
-
-    :return: a :class:`sumpy.tools.BlockIndexRanges`.
-    """
-
-    if not hasattr(resampler, "groups"):
-        raise ValueError("resampler must be a DirectDiscretizationConnection.")
-
-    from_indices = from_indices.get(actx.queue)
-
-    # construct ranges
-    from_discr = resampler.from_discr
-    from_grp_ranges = np.cumsum(
-        [0] + [grp.nelements for grp in from_discr.mesh.groups])
-    from_el_ranges = np.hstack([
-        np.arange(grp.node_nr_base, grp.ndofs + 1, grp.nunit_dofs)
-        for grp in from_discr.groups])
-
-    # construct coarse element arrays in each from_range
-    el_indices = np.empty(from_indices.nblocks, dtype=np.object)
-    el_ranges = np.full(from_grp_ranges[-1], -1, dtype=np.int)
-    for i in range(from_indices.nblocks):
-        ifrom = from_indices.block_indices(i)
-        el_indices[i] = np.unique(np.digitize(ifrom, from_el_ranges)) - 1
-        el_ranges[el_indices[i]] = i
-    el_indices = np.hstack(el_indices)
-
-    # construct lookup table
-    to_el_table = [np.full(g.nelements, -1, dtype=np.int)
-                   for g in resampler.to_discr.groups]
-
-    for igrp, grp in enumerate(resampler.groups):
-        for batch in grp.batches:
-            to_el_table[igrp][actx.to_numpy(batch.to_element_indices)] = \
-                from_grp_ranges[igrp] + actx.to_numpy(batch.from_element_indices)
-
-    # construct fine node index list
-    indices = [np.empty(0, dtype=np.int)
-               for _ in range(from_indices.nblocks)]
-    for igrp in range(len(resampler.groups)):
-        to_element_indices = \
-                np.where(np.isin(to_el_table[igrp], el_indices))[0]
-
-        for i, j in zip(el_ranges[to_el_table[igrp][to_element_indices]],
-                        to_element_indices):
-            indices[i] = np.hstack([indices[i],
-                _element_node_range(resampler.to_discr.groups[igrp], j)])
-
-    ranges = actx.from_numpy(np.cumsum([0] + [b.shape[0] for b in indices]))
-    indices = actx.from_numpy(np.hstack(indices))
-
-    return BlockIndexRanges(resampler.cl_context,
-            actx.freeze(indices), actx.freeze(ranges))
-
 # }}}
 
 
-- 
GitLab