From 5531b8f76cf3eb58da042e17092635f9a8ff205f Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Sun, 21 Jun 2020 02:39:59 -0500
Subject: [PATCH 1/9] Add a benchmark

---
 .gitignore                      |   2 +
 .gitlab-ci.yml                  |  15 +++
 .test-conda-env.yml             |  18 ++++
 asv.conf.json                   | 158 ++++++++++++++++++++++++++++++++
 benchmarks/__init__.py          |   0
 benchmarks/run_sumpy_kernels.py |  62 +++++++++++++
 6 files changed, 255 insertions(+)
 create mode 100644 .test-conda-env.yml
 create mode 100644 asv.conf.json
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/run_sumpy_kernels.py

diff --git a/.gitignore b/.gitignore
index 7be271c37..e7ea1299a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,5 @@ loopy/_git_rev.py
 virtualenv-[0-9]*[0-9]
 
 *.so
+
+.asv
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 07a049239..c975b4d5f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -141,3 +141,18 @@ Flake8:
   - python3
   except:
   - tags
+
+Benchmarks:
+  script:
+  - export CONDA_ENVIRONMENT=.test-conda-env.yml
+  - export PROJECT=loopy
+  - export PYOPENCL_TEST=portable:pthread
+  - export LOOPY_NO_CACHE=1
+  - export ASV_FACTOR=1.5
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-benchmark-py-project.sh
+  - ". ./build-and-benchmark-py-project.sh"
+  tags:
+  - linux
+  - benchmark
+  except:
+  - tags
diff --git a/.test-conda-env.yml b/.test-conda-env.yml
new file mode 100644
index 000000000..6f7a4d2af
--- /dev/null
+++ b/.test-conda-env.yml
@@ -0,0 +1,18 @@
+name: test-conda-env
+channels:
+- conda-forge
+- nodefaults
+
+dependencies:
+- git
+- numpy
+- pocl
+- pocl-cuda
+- islpy
+- pyopencl
+- python=3
+
+- pip
+- pip:
+    - git+https://github.com/inducer/pytools
+    - git+https://github.com/inducer/pymbolic
diff --git a/asv.conf.json b/asv.conf.json
new file mode 100644
index 000000000..5ef608174
--- /dev/null
+++ b/asv.conf.json
@@ -0,0 +1,158 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "loopy",
+
+    // The project's homepage
+    "project_url": "https://documen.tician.de/loopy",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": ".",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["master"],    // for git
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "http://gitlab.tiker.net/inducer/loopy/commits/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.6"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    "conda_channels": ["conda-forge", "defaults"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // "matrix": {
+    //     "numpy": ["1.6", "1.7"],
+    //     "six": ["", null],        // test with and without six installed
+    //     "pip+emcee": [""],   // emcee is only available for install with pip.
+    // },
+    "matrix": {
+        "numpy" : [""],
+        "pyopencl" : [""],
+        "islpy" : [""],
+        "pocl" : [""],
+        "pip+git+https://github.com/inducer/pymbolic#egg=pymbolic": [""],
+        "pip+git+https://gitlab.tiker.net/inducer/boxtree#egg=boxtree": [""],
+        "pip+git+https://github.com/inducer/loopy#egg=loopy": [""],
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    // "wheel_cache_size": 0
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // }
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // }
+}
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
new file mode 100644
index 000000000..7550d578a
--- /dev/null
+++ b/benchmarks/run_sumpy_kernels.py
@@ -0,0 +1,62 @@
+import numpy as np
+
+from pyopencl.tools import (  # noqa
+        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
+
+import logging
+logger = logging.getLogger(__name__)
+from .sumpy_kernels import m2l_3d_order_6
+
+import loopy as lp
+
+class SumpyBenchmarkSuite:
+
+    params = [
+        "m2l_3d_order_6"
+    ]
+
+    param_names = ['test_name']
+
+    version = 1
+
+    def setup_cache(self):
+        data = {}
+        for param in self.params:
+            self.setup(data, param)
+            data[param] = {}
+            knl = globals()[param]()
+            data[param]["instantiated"] = knl
+            preprocessed = lp.preprocess_kernel(knl)
+            data[param]["preprocessed"] = preprocessed
+            scheduled = lp.get_one_scheduled_kernel(preprocessed)
+            data[param]["scheduled"] = scheduled
+        return data
+
+    def setup(self, data, param):
+        logging.basicConfig(level=logging.INFO)
+        np.random.seed(17)
+
+    def time_instantiate(self, data, param):
+        create_knl = globals()[param]
+        create_knl()
+
+    def time_preprocess(self, data, param):
+        lp.preprocess_kernel(data[param]["instantiated"])
+
+    def time_schedule(self, data, param):
+        lp.get_one_scheduled_kernel(data[param]["preprocessed"])
+
+    def time_generate_code(self, data, param):
+        lp.generate_code_v2(data[param]["scheduled"])
+
+    time_instantiate.timeout = 600.0
+    time_preprocess.timeout = 600.0
+    time_schedule.timeout = 600.0
+    time_generate_code.timeout = 600.0
+
+    # Run memory benchmarks as well
+    peakmem_instantiate = time_instantiate
+    peakmem_preprocess = time_preprocess
+    peakmem_schedule = time_schedule
+    peakmem_generate_code = time_generate_code
+
-- 
GitLab


From 35a57ac00b0b6e3013a2d1c545a28caaace0918f Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Sun, 21 Jun 2020 12:55:57 -0500
Subject: [PATCH 2/9] No warmup and run only once for expensive operations

---
 benchmarks/run_sumpy_kernels.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
index 7550d578a..2a789a198 100644
--- a/benchmarks/run_sumpy_kernels.py
+++ b/benchmarks/run_sumpy_kernels.py
@@ -54,6 +54,16 @@ class SumpyBenchmarkSuite:
     time_schedule.timeout = 600.0
     time_generate_code.timeout = 600.0
 
+    # No warmup is needed
+    time_instantiate.warmup_time = 0
+    time_preprocess.warmup_time = 0
+    time_schedule.warmup_time = 0
+    time_generate_code.warmup_time = 0
+
+    # These are expensive operations. Run only once
+    time_schedule.number = 1
+    time_generate_code.number = 1
+
     # Run memory benchmarks as well
     peakmem_instantiate = time_instantiate
     peakmem_preprocess = time_preprocess
-- 
GitLab


From 1db5bb994e1244cbba32ed55efce5e3095da4383 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Wed, 21 Apr 2021 01:37:12 -0500
Subject: [PATCH 3/9] Update tests

---
 .github/workflows/ci.yml |  1 +
 .gitlab-ci.yml           | 22 ++++++++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 25651f665..0e932ef2f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -126,6 +126,7 @@ jobs:
                 python-version: '3.x'
         -   name: "Main Script"
             run: |
+                PROJECT=loopy
                 CONDA_ENVIRONMENT=.test-conda-env-py3.yml
                 curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/ci-support.sh
                 . ci-support.sh
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c975b4d5f..e4a2e3e0f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,9 @@
+stages:
+  - test
+  - deploy
+
 Python 3 POCL:
+  stage: test
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable:pthread
@@ -16,6 +21,7 @@ Python 3 POCL:
       junit: test/pytest.xml
 
 Python 3 POCL without arg check:
+  stage: test
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=portable:pthread
@@ -34,6 +40,7 @@ Python 3 POCL without arg check:
       junit: test/pytest.xml
 
 Python 3 Intel:
+  stage: test
   script:
   - export PY_EXE=python3
   - export PYOPENCL_TEST=intel
@@ -54,6 +61,7 @@ Python 3 Intel:
 
 
 Python 3 POCL Twice With Cache:
+  stage: test
   script: |
     export PY_EXE=python3
     export PYOPENCL_TEST=portable:pthread
@@ -87,6 +95,7 @@ Python 3 POCL Twice With Cache:
 #   - tags
 
 Python 3 POCL Examples:
+  stage: test
   script: |
     export PY_EXE=python3
     export PYOPENCL_TEST=portable:pthread
@@ -113,6 +122,7 @@ Python 3 POCL Examples:
   - tags
 
 Pylint:
+  stage: test
   script:
   # Needed to avoid name shadowing issues when running from source directory.
   - PROJECT_INSTALL_FLAGS="--editable"
@@ -126,7 +136,9 @@ Pylint:
   - tags
 
 Documentation:
+  stage: deploy
   script:
+  - PROJECT=loopy
   - EXTRA_INSTALL="pybind11 numpy"
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh
   - ". ./build-docs.sh"
@@ -134,6 +146,7 @@ Documentation:
   - python3
 
 Flake8:
+  stage: test
   script:
   - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh
   - . ./prepare-and-run-flake8.sh "$CI_PROJECT_NAME" test examples
@@ -143,13 +156,14 @@ Flake8:
   - tags
 
 Benchmarks:
+  stage: test
   script:
-  - export CONDA_ENVIRONMENT=.test-conda-env.yml
-  - export PROJECT=loopy
-  - export PYOPENCL_TEST=portable:pthread
+  - CONDA_ENVIRONMENT=.test-conda-env.yml
+  - PROJECT=loopy
+  - PYOPENCL_TEST=portable:pthread
   - export LOOPY_NO_CACHE=1
   - export ASV_FACTOR=1.5
-  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-benchmark-py-project.sh
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-benchmark-py-project.sh
   - ". ./build-and-benchmark-py-project.sh"
   tags:
   - linux
-- 
GitLab


From 4c6819dc68497e405f2cfb110d2922e639782389 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Wed, 21 Apr 2021 01:39:25 -0500
Subject: [PATCH 4/9] default branch name is master

---
 asv.conf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv.conf.json b/asv.conf.json
index 5ef608174..26ac70b48 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -20,7 +20,7 @@
 
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
-    "branches": ["master"],    // for git
+    "branches": ["main"],    // for git
 
     // The DVCS being used.  If not set, it will be automatically
     // determined from "repo" by looking at the protocol in the URL
-- 
GitLab


From 1d7924fa91bfe02fea53cc44dff931615ddbfa1a Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Fri, 23 Apr 2021 15:58:20 -0500
Subject: [PATCH 5/9] install sumpy

---
 .gitlab-ci.yml      |  2 +-
 .test-conda-env.yml | 18 ------------------
 asv.conf.json       |  3 ++-
 3 files changed, 3 insertions(+), 20 deletions(-)
 delete mode 100644 .test-conda-env.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e4a2e3e0f..2ea1707ff 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -158,7 +158,7 @@ Flake8:
 Benchmarks:
   stage: test
   script:
-  - CONDA_ENVIRONMENT=.test-conda-env.yml
+  - CONDA_ENVIRONMENT=.test-conda-env-py3.yml
   - PROJECT=loopy
   - PYOPENCL_TEST=portable:pthread
   - export LOOPY_NO_CACHE=1
diff --git a/.test-conda-env.yml b/.test-conda-env.yml
deleted file mode 100644
index 6f7a4d2af..000000000
--- a/.test-conda-env.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: test-conda-env
-channels:
-- conda-forge
-- nodefaults
-
-dependencies:
-- git
-- numpy
-- pocl
-- pocl-cuda
-- islpy
-- pyopencl
-- python=3
-
-- pip
-- pip:
-    - git+https://github.com/inducer/pytools
-    - git+https://github.com/inducer/pymbolic
diff --git a/asv.conf.json b/asv.conf.json
index 26ac70b48..99c2ea2b5 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -71,8 +71,9 @@
         "islpy" : [""],
         "pocl" : [""],
         "pip+git+https://github.com/inducer/pymbolic#egg=pymbolic": [""],
-        "pip+git+https://gitlab.tiker.net/inducer/boxtree#egg=boxtree": [""],
+        "pip+git+https://github.com/inducer/boxtree#egg=boxtree": [""],
         "pip+git+https://github.com/inducer/loopy#egg=loopy": [""],
+        "pip+git+https://github.com/inducer/sumpy#egg=sumpy": [""],
     },
 
     // Combinations of libraries/python versions can be excluded/included
-- 
GitLab


From 1a074f5433f765ec1c64fd45ff734bd9e4e8e691 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Fri, 23 Apr 2021 15:58:29 -0500
Subject: [PATCH 6/9] use installed sumpy

---
 benchmarks/run_sumpy_kernels.py | 48 +++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
index 2a789a198..358d17c58 100644
--- a/benchmarks/run_sumpy_kernels.py
+++ b/benchmarks/run_sumpy_kernels.py
@@ -5,14 +5,51 @@ from pyopencl.tools import (  # noqa
 
 import logging
 logger = logging.getLogger(__name__)
-from .sumpy_kernels import m2l_3d_order_6
 
 import loopy as lp
 
+def _sumpy_kernel_init(dim, order):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
+
+    np.random.seed(17)
+
+    knl = LaplaceKernel(dim)
+    local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion
+    mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion
+    target_kernels = [knl]
+    m_expn = mpole_expn_class(knl, order=order)
+    l_expn = local_expn_class(knl, order=order)
+
+    from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR
+    m2l = E2EFromCSR(ctx, m_expn, l_expn)
+    m2l.get_translation_loopy_insns()
+    return m2l
+
+def _sumpy_kernel_make(m2l):
+    loopy_knl = m2l.get_optimized_kernel()
+    loopy_knl = lp.add_and_infer_dtypes(
+        loopy_knl,
+        dict(
+            tgt_ibox=np.int32,
+            centers=np.float64,
+            tgt_center=np.float64,
+            target_boxes=np.int32,
+            src_ibox=np.int32,
+            src_expansions=np.float64,
+            tgt_rscale=np.float64,
+            src_rscale=np.float64,
+            src_box_starts=np.int32,
+            src_box_lists=np.int32,
+        ),
+    )
+    return loopy_knl
+
+
 class SumpyBenchmarkSuite:
 
     params = [
-        "m2l_3d_order_6"
+        ("m2l", 3, 6)
     ]
 
     param_names = ['test_name']
@@ -24,7 +61,9 @@ class SumpyBenchmarkSuite:
         for param in self.params:
             self.setup(data, param)
             data[param] = {}
-            knl = globals()[param]()
+            m2l = _sumpy_kernel_init(param[1], param[2])
+            data[param]["setup"] = m2l
+            knl = _sumpy_kernel_make(m2l)
             data[param]["instantiated"] = knl
             preprocessed = lp.preprocess_kernel(knl)
             data[param]["preprocessed"] = preprocessed
@@ -37,8 +76,7 @@ class SumpyBenchmarkSuite:
         np.random.seed(17)
 
     def time_instantiate(self, data, param):
-        create_knl = globals()[param]
-        create_knl()
+        create_knl = _sumpy_kernel_make(data[param]["setup"])
 
     def time_preprocess(self, data, param):
         lp.preprocess_kernel(data[param]["instantiated"])
-- 
GitLab


From c79ebc74b4af8acde6d31c1d791aaa8d7b977708 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Fri, 23 Apr 2021 16:03:50 -0500
Subject: [PATCH 7/9] fix formatting

---
 benchmarks/run_sumpy_kernels.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
index 358d17c58..711306572 100644
--- a/benchmarks/run_sumpy_kernels.py
+++ b/benchmarks/run_sumpy_kernels.py
@@ -1,31 +1,33 @@
+import loopy as lp
 import numpy as np
-
-from pyopencl.tools import (  # noqa
-        pytest_generate_tests_for_pyopencl as pytest_generate_tests)
-
+import pyopencl as cl
 import logging
+
 logger = logging.getLogger(__name__)
 
-import loopy as lp
 
 def _sumpy_kernel_init(dim, order):
-    ctx = ctx_factory()
-    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
+    from sumpy.expansion.multipole import (
+        LaplaceConformingVolumeTaylorMultipoleExpansion,
+    )
+    from sumpy.expansion.local import LaplaceConformingVolumeTaylorLocalExpansion
+    from sumpy.kernel import LaplaceKernel
+    from sumpy import E2EFromCSR
 
+    ctx = cl.create_some_context()
     np.random.seed(17)
 
     knl = LaplaceKernel(dim)
     local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion
     mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion
-    target_kernels = [knl]
     m_expn = mpole_expn_class(knl, order=order)
     l_expn = local_expn_class(knl, order=order)
 
-    from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR
     m2l = E2EFromCSR(ctx, m_expn, l_expn)
     m2l.get_translation_loopy_insns()
     return m2l
 
+
 def _sumpy_kernel_make(m2l):
     loopy_knl = m2l.get_optimized_kernel()
     loopy_knl = lp.add_and_infer_dtypes(
@@ -48,11 +50,9 @@ def _sumpy_kernel_make(m2l):
 
 class SumpyBenchmarkSuite:
 
-    params = [
-        ("m2l", 3, 6)
-    ]
+    params = [("m2l", 3, 6)]
 
-    param_names = ['test_name']
+    param_names = ["test_name"]
 
     version = 1
 
@@ -76,7 +76,7 @@ class SumpyBenchmarkSuite:
         np.random.seed(17)
 
     def time_instantiate(self, data, param):
-        create_knl = _sumpy_kernel_make(data[param]["setup"])
+        _sumpy_kernel_make(data[param]["setup"])
 
     def time_preprocess(self, data, param):
         lp.preprocess_kernel(data[param]["instantiated"])
@@ -107,4 +107,3 @@ class SumpyBenchmarkSuite:
     peakmem_preprocess = time_preprocess
     peakmem_schedule = time_schedule
     peakmem_generate_code = time_generate_code
-
-- 
GitLab


From 007e555444a25b43f585442feee9a92f15920a94 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Fri, 23 Apr 2021 17:27:10 -0500
Subject: [PATCH 8/9] fix caching and add order 12

---
 benchmarks/run_sumpy_kernels.py | 74 ++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
index 711306572..db5aeba0a 100644
--- a/benchmarks/run_sumpy_kernels.py
+++ b/benchmarks/run_sumpy_kernels.py
@@ -2,11 +2,19 @@ import loopy as lp
 import numpy as np
 import pyopencl as cl
 import logging
+from dataclasses import dataclass
 
 logger = logging.getLogger(__name__)
 
+from pyopencl.tools import (  # noqa
+    pytest_generate_tests_for_pyopencl as pytest_generate_tests,
+)
 
-def _sumpy_kernel_init(dim, order):
+
+def _sumpy_kernel_init(param):
+    name, dim, order = param.name, param.dim, param.order
+    # TODO: add other kernels
+    assert name == "m2l"
     from sumpy.expansion.multipole import (
         LaplaceConformingVolumeTaylorMultipoleExpansion,
     )
@@ -25,11 +33,14 @@ def _sumpy_kernel_init(dim, order):
 
     m2l = E2EFromCSR(ctx, m_expn, l_expn)
     m2l.get_translation_loopy_insns()
+    m2l.ctx = None
+    m2l.device = None
     return m2l
 
 
-def _sumpy_kernel_make(m2l):
-    loopy_knl = m2l.get_optimized_kernel()
+def _sumpy_kernel_make(expn, param):
+    assert param.name == "m2l"
+    loopy_knl = expn.get_optimized_kernel()
     loopy_knl = lp.add_and_infer_dtypes(
         loopy_knl,
         dict(
@@ -48,53 +59,59 @@ def _sumpy_kernel_make(m2l):
     return loopy_knl
 
 
+@dataclass(frozen=True)
+class Param:
+    name: str
+    dim: int
+    order: int
+
+
+def cached_data(params):
+    data = {}
+    np.random.seed(17)
+    logging.basicConfig(level=logging.INFO)
+    for param in params:
+        data[param] = {}
+        expn = _sumpy_kernel_init(param)
+        data[param]["setup"] = expn
+        knl = _sumpy_kernel_make(expn, param)
+        knl = lp.preprocess_kernel(knl)
+        data[param]["instantiated"] = knl
+        scheduled = lp.get_one_scheduled_kernel(knl)
+        data[param]["scheduled"] = scheduled
+    return data
+
+
 class SumpyBenchmarkSuite:
 
-    params = [("m2l", 3, 6)]
+    params = [
+        Param("m2l", dim=3, order=6)
+        Param("m2l", dim=3, order=12)
+    ]
 
     param_names = ["test_name"]
 
     version = 1
 
     def setup_cache(self):
-        data = {}
-        for param in self.params:
-            self.setup(data, param)
-            data[param] = {}
-            m2l = _sumpy_kernel_init(param[1], param[2])
-            data[param]["setup"] = m2l
-            knl = _sumpy_kernel_make(m2l)
-            data[param]["instantiated"] = knl
-            preprocessed = lp.preprocess_kernel(knl)
-            data[param]["preprocessed"] = preprocessed
-            scheduled = lp.get_one_scheduled_kernel(preprocessed)
-            data[param]["scheduled"] = scheduled
-        return data
-
-    def setup(self, data, param):
-        logging.basicConfig(level=logging.INFO)
-        np.random.seed(17)
+        return cached_data(self.params)
 
     def time_instantiate(self, data, param):
-        _sumpy_kernel_make(data[param]["setup"])
-
-    def time_preprocess(self, data, param):
-        lp.preprocess_kernel(data[param]["instantiated"])
+        knl = _sumpy_kernel_make(data[param]["setup"], param)
+        lp.preprocess_kernel(knl)
 
     def time_schedule(self, data, param):
-        lp.get_one_scheduled_kernel(data[param]["preprocessed"])
+        lp.get_one_scheduled_kernel(data[param]["instantiated"])
 
     def time_generate_code(self, data, param):
         lp.generate_code_v2(data[param]["scheduled"])
 
     time_instantiate.timeout = 600.0
-    time_preprocess.timeout = 600.0
     time_schedule.timeout = 600.0
     time_generate_code.timeout = 600.0
 
     # No warmup is needed
     time_instantiate.warmup_time = 0
-    time_preprocess.warmup_time = 0
     time_schedule.warmup_time = 0
     time_generate_code.warmup_time = 0
 
@@ -104,6 +121,5 @@ class SumpyBenchmarkSuite:
 
     # Run memory benchmarks as well
     peakmem_instantiate = time_instantiate
-    peakmem_preprocess = time_preprocess
     peakmem_schedule = time_schedule
     peakmem_generate_code = time_generate_code
-- 
GitLab


From 3b55618cab317e7df3248ef26185582ee5014861 Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Fri, 23 Apr 2021 17:37:40 -0500
Subject: [PATCH 9/9] Fix typo

---
 benchmarks/run_sumpy_kernels.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py
index db5aeba0a..d37a6bb8a 100644
--- a/benchmarks/run_sumpy_kernels.py
+++ b/benchmarks/run_sumpy_kernels.py
@@ -85,8 +85,8 @@ def cached_data(params):
 class SumpyBenchmarkSuite:
 
     params = [
-        Param("m2l", dim=3, order=6)
-        Param("m2l", dim=3, order=12)
+        Param("m2l", dim=3, order=6),
+        Param("m2l", dim=3, order=12),
     ]
 
     param_names = ["test_name"]
-- 
GitLab