From 4cb0f33e8dfcfb59c41a20a2449f5f27444247e2 Mon Sep 17 00:00:00 2001
From: Alex Fikl <alexfikl@gmail.com>
Date: Sun, 7 Oct 2018 15:27:40 -0500
Subject: [PATCH] p2p: define isrc, itgt in kernel

---
 sumpy/p2p.py           | 12 ++++++++----
 sumpy/qbx.py           | 11 ++++++-----
 test/test_matrixgen.py | 18 ++++++++++++++----
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/sumpy/p2p.py b/sumpy/p2p.py
index a0e89f24..d8fdadf4 100644
--- a/sumpy/p2p.py
+++ b/sumpy/p2p.py
@@ -319,14 +319,18 @@ class P2PMatrixBlockGenerator(P2PBase):
         loopy_knl = lp.make_kernel(
             "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}",
             self.get_kernel_scaling_assignments()
+            # NOTE: itgt, isrc need to always be defined in case a statement
+            # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in
+            # places like get_kernel_exprs)
             + ["""
                 for imat
-                    <> d[idim] = targets[idim, tgtindices[imat]] - \
-                                 sources[idim, srcindices[imat]]
+                    <> itgt = tgtindices[imat]
+                    <> isrc = srcindices[imat]
+
+                    <> d[idim] = targets[idim, itgt] - sources[idim, isrc]
             """]
             + ["""
-                    <> is_self = (srcindices[imat] ==
-                                  target_to_source[tgtindices[imat]])
+                    <> is_self = (isrc == target_to_source[itgt])
                 """ if self.exclude_self else ""]
             + loopy_insns + kernel_exprs
             + ["""
diff --git a/sumpy/qbx.py b/sumpy/qbx.py
index a0814e01..79de1544 100644
--- a/sumpy/qbx.py
+++ b/sumpy/qbx.py
@@ -352,16 +352,17 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
             "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}"
             ],
             self.get_kernel_scaling_assignments()
+            # NOTE: itgt, isrc need to always be defined in case a statement
+            # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in
+            # places like get_kernel_exprs)
             + ["""
                 for imat
                     <> itgt = tgtindices[imat]
                     <> isrc = srcindices[imat]
 
-                    <> a[idim] = center[idim, tgtindices[imat]] - \
-                                 src[idim, srcindices[imat]] {dup=idim}
-                    <> b[idim] = tgt[idim, tgtindices[imat]] - \
-                                 center[idim, tgtindices[imat]] {dup=idim}
-                    <> rscale = expansion_radii[tgtindices[imat]]
+                    <> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}
+                    <> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}
+                    <> rscale = expansion_radii[itgt]
             """]
             + loopy_insns + kernel_exprs
             + ["""
diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py
index 2f3aabc3..01ebaa0e 100644
--- a/test/test_matrixgen.py
+++ b/test/test_matrixgen.py
@@ -132,7 +132,7 @@ def test_qbx_direct(ctx_getter, factor, lpot_id):
 
     for n in [200, 300, 400]:
         targets, sources, centers, expansion_radii, sigma = \
-                _build_geometry(queue, n, mode_nr)
+                _build_geometry(queue, n, mode_nr, target_radius=1.2)
 
         h = 2 * np.pi / n
         strengths = (sigma * h,)
@@ -180,7 +180,8 @@ def test_qbx_direct(ctx_getter, factor, lpot_id):
 
 @pytest.mark.parametrize("exclude_self", [True, False])
 @pytest.mark.parametrize("factor", [1.0, 0.6])
-def test_p2p_direct(ctx_getter, exclude_self, factor):
+@pytest.mark.parametrize('lpot_id', [1, 2])
+def test_p2p_direct(ctx_getter, exclude_self, factor, lpot_id):
     logging.basicConfig(level=logging.INFO)
 
     ctx = ctx_getter()
@@ -190,8 +191,14 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
     nblks = 10
     mode_nr = 25
 
-    from sumpy.kernel import LaplaceKernel
-    lknl = LaplaceKernel(ndim)
+    from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative
+    if lpot_id == 1:
+        lknl = LaplaceKernel(ndim)
+    elif lpot_id == 2:
+        lknl = LaplaceKernel(ndim)
+        lknl = DirectionalSourceDerivative(lknl, dir_vec_name="dsource_vec")
+    else:
+        raise ValueError("unknow lpot_id")
 
     from sumpy.p2p import P2P
     lpot = P2P(ctx, [lknl], exclude_self=exclude_self)
@@ -217,6 +224,9 @@ def test_p2p_direct(ctx_getter, exclude_self, factor):
         if exclude_self:
             extra_kwargs["target_to_source"] = \
                 cl.array.arange(queue, 0, n, dtype=np.int)
+        if lpot_id == 2:
+            extra_kwargs["dsource_vec"] = \
+                    vector_to_device(queue, np.ones((ndim, n)))
 
         _, (result_lpot,) = lpot(queue,
                 targets=targets,
-- 
GitLab