diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25651f66571c7be459c4074a7890df940c50ce08..0e932ef2f55adcecd1eee76b51151fae037c9988 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -126,6 +126,7 @@ jobs: python-version: '3.x' - name: "Main Script" run: | + PROJECT=loopy CONDA_ENVIRONMENT=.test-conda-env-py3.yml curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/ci-support.sh . ci-support.sh diff --git a/.gitignore b/.gitignore index 7be271c37ca0d6d1d67185ce4fbf202bbc488240..e7ea1299a074d23ec23ad85c88c1289b4f7d2df1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ loopy/_git_rev.py virtualenv-[0-9]*[0-9] *.so + +.asv diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 07a0492393048c64d93df70f800b7da4e3d1861f..2ea1707ff43eb9e1d17760a4ac86bec1a886ae1d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,9 @@ +stages: + - test + - deploy + Python 3 POCL: + stage: test script: - export PY_EXE=python3 - export PYOPENCL_TEST=portable:pthread @@ -16,6 +21,7 @@ Python 3 POCL: junit: test/pytest.xml Python 3 POCL without arg check: + stage: test script: - export PY_EXE=python3 - export PYOPENCL_TEST=portable:pthread @@ -34,6 +40,7 @@ Python 3 POCL without arg check: junit: test/pytest.xml Python 3 Intel: + stage: test script: - export PY_EXE=python3 - export PYOPENCL_TEST=intel @@ -54,6 +61,7 @@ Python 3 Intel: Python 3 POCL Twice With Cache: + stage: test script: | export PY_EXE=python3 export PYOPENCL_TEST=portable:pthread @@ -87,6 +95,7 @@ Python 3 POCL Twice With Cache: # - tags Python 3 POCL Examples: + stage: test script: | export PY_EXE=python3 export PYOPENCL_TEST=portable:pthread @@ -113,6 +122,7 @@ Python 3 POCL Examples: - tags Pylint: + stage: test script: # Needed to avoid name shadowing issues when running from source directory. - PROJECT_INSTALL_FLAGS="--editable" @@ -126,7 +136,9 @@ Pylint: - tags Documentation: + stage: deploy script: + - PROJECT=loopy - EXTRA_INSTALL="pybind11 numpy" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh - ". ./build-docs.sh" @@ -134,6 +146,7 @@ Documentation: - python3 Flake8: + stage: test script: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-flake8.sh - . ./prepare-and-run-flake8.sh "$CI_PROJECT_NAME" test examples @@ -141,3 +154,19 @@ Flake8: - python3 except: - tags + +Benchmarks: + stage: test + script: + - CONDA_ENVIRONMENT=.test-conda-env-py3.yml + - PROJECT=loopy + - PYOPENCL_TEST=portable:pthread + - export LOOPY_NO_CACHE=1 + - export ASV_FACTOR=1.5 + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-benchmark-py-project.sh + - ". ./build-and-benchmark-py-project.sh" + tags: + - linux + - benchmark + except: + - tags diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 0000000000000000000000000000000000000000..99c2ea2b5941721a045d8aa7a0586d7d5f9e1eb6 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,159 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "loopy", + + // The project's homepage + "project_url": "https://documen.tician.de/loopy", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": ".", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + // "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "http://gitlab.tiker.net/inducer/loopy/commits/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["2.7", "3.6"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + "conda_channels": ["conda-forge", "defaults"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // "matrix": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""], // emcee is only available for install with pip. + // }, + "matrix": { + "numpy" : [""], + "pyopencl" : [""], + "islpy" : [""], + "pocl" : [""], + "pip+git+https://github.com/inducer/pymbolic#egg=pymbolic": [""], + "pip+git+https://github.com/inducer/boxtree#egg=boxtree": [""], + "pip+git+https://github.com/inducer/loopy#egg=loopy": [""], + "pip+git+https://github.com/inducer/sumpy#egg=sumpy": [""], + }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + // "wheel_cache_size": 0 + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // } +} diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/benchmarks/run_sumpy_kernels.py b/benchmarks/run_sumpy_kernels.py new file mode 100644 index 0000000000000000000000000000000000000000..d37a6bb8a8e06d51651c7d36b47e66c624ccf26f --- /dev/null +++ b/benchmarks/run_sumpy_kernels.py @@ -0,0 +1,125 @@ +import loopy as lp +import numpy as np +import pyopencl as cl +import logging +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests, +) + + +def _sumpy_kernel_init(param): + name, dim, order = param.name, param.dim, param.order + # TODO: add other kernels + assert name == "m2l" + from sumpy.expansion.multipole import ( + LaplaceConformingVolumeTaylorMultipoleExpansion, + ) + from sumpy.expansion.local import LaplaceConformingVolumeTaylorLocalExpansion + from sumpy.kernel import LaplaceKernel + from sumpy import E2EFromCSR + + ctx = cl.create_some_context() + np.random.seed(17) + + knl = LaplaceKernel(dim) + local_expn_class = LaplaceConformingVolumeTaylorLocalExpansion + mpole_expn_class = LaplaceConformingVolumeTaylorMultipoleExpansion + m_expn = mpole_expn_class(knl, order=order) + l_expn = local_expn_class(knl, order=order) + + m2l = E2EFromCSR(ctx, m_expn, l_expn) + m2l.get_translation_loopy_insns() + m2l.ctx = None + m2l.device = None + return m2l + + +def _sumpy_kernel_make(expn, param): + assert param.name == "m2l" + loopy_knl = expn.get_optimized_kernel() + loopy_knl = lp.add_and_infer_dtypes( + loopy_knl, + dict( + tgt_ibox=np.int32, + centers=np.float64, + tgt_center=np.float64, + target_boxes=np.int32, + src_ibox=np.int32, + src_expansions=np.float64, + tgt_rscale=np.float64, + src_rscale=np.float64, + src_box_starts=np.int32, + src_box_lists=np.int32, + ), + ) + return loopy_knl + + +@dataclass(frozen=True) +class Param: + name: str + dim: int + order: int + + +def cached_data(params): + data = {} + np.random.seed(17) + logging.basicConfig(level=logging.INFO) + for param in params: + data[param] = {} + expn = _sumpy_kernel_init(param) + data[param]["setup"] = expn + knl = _sumpy_kernel_make(expn, param) + knl = lp.preprocess_kernel(knl) + data[param]["instantiated"] = knl + scheduled = lp.get_one_scheduled_kernel(knl) + data[param]["scheduled"] = scheduled + return data + + +class SumpyBenchmarkSuite: + + params = [ + Param("m2l", dim=3, order=6), + Param("m2l", dim=3, order=12), + ] + + param_names = ["test_name"] + + version = 1 + + def setup_cache(self): + return cached_data(self.params) + + def time_instantiate(self, data, param): + knl = _sumpy_kernel_make(data[param]["setup"], param) + lp.preprocess_kernel(knl) + + def time_schedule(self, data, param): + lp.get_one_scheduled_kernel(data[param]["instantiated"]) + + def time_generate_code(self, data, param): + lp.generate_code_v2(data[param]["scheduled"]) + + time_instantiate.timeout = 600.0 + time_schedule.timeout = 600.0 + time_generate_code.timeout = 600.0 + + # No warmup is needed + time_instantiate.warmup_time = 0 + time_schedule.warmup_time = 0 + time_generate_code.warmup_time = 0 + + # These are expensive operations. Run only once + time_schedule.number = 1 + time_generate_code.number = 1 + + # Run memory benchmarks as well + peakmem_instantiate = time_instantiate + peakmem_schedule = time_schedule + peakmem_generate_code = time_generate_code