Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Commits on Source (5837)
Showing with 1403 additions and 11 deletions
#! /bin/bash
set -e
function install_example_prereqs()
{
# ipython_genutils for https://github.com/jupyter/nbconvert/issues/1725
# jinja < 3.1 for https://github.com/jupyter/nbconvert/issues/1736
with_echo pip install \
matplotlib ipykernel nbconvert ipython_genutils 'jinja2 < 3.1'
install_ispc
}
function run_examples()
{
PATTERN=$1
CMDLINE=$2
for i in $(find examples -name "$PATTERN" -print ); do
echo "-----------------------------------------------------------------------"
echo "RUNNING $i"
echo "-----------------------------------------------------------------------"
dn=$(dirname "$i")
bn=$(basename "$i")
(cd $dn; echo $CMDLINE "$bn"; $CMDLINE "$bn")
done
}
function run_py_examples()
{
run_examples "*.py" ${PY_EXE}
}
function run_ipynb_examples()
{
run_examples "*.ipynb" "${PY_EXE} -m nbconvert --to html --execute"
}
function run_floopy_examples()
{
run_examples "*.floopy" "${PY_EXE} -m loopy"
}
# https://editorconfig.org/
# https://github.com/editorconfig/editorconfig-vim
# https://github.com/editorconfig/editorconfig-emacs
root = true
[*]
indent_style = space
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.py]
indent_size = 4
[*.rst]
indent_size = 4
[*.cpp]
indent_size = 2
[*.hpp]
indent_size = 2
# There may be one in doc/
[Makefile]
indent_style = tab
# https://github.com/microsoft/vscode/issues/1679
[*.md]
trim_trailing_whitespace = false
version: 2
updates:
# Set update schedule for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
# vim: sw=4
name: Gitlab mirror
on:
push:
branches:
- main
jobs:
autopush:
name: Automatic push to gitlab.tiker.net
if: startsWith(github.repository, 'inducer/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: |
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
mirror_github_to_gitlab
env:
GITLAB_AUTOPUSH_KEY: ${{ secrets.GITLAB_AUTOPUSH_KEY }}
# vim: sw=4
name: CI
on:
push:
branches:
- main
pull_request:
schedule:
- cron: '17 3 * * 0'
concurrency:
group: ${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
ruff:
name: Ruff
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: astral-sh/setup-uv@v5
- name: "Main Script"
run: |
uv run --only-dev ruff check
typos:
name: Typos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: crate-ci/typos@master
pylint:
name: Pylint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
sed 's/python=3/python=3.7/' .test-conda-env-py3.yml > .test-conda-env.yml
USE_CONDA_BUILD=1
curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-pylint.sh
. ./prepare-and-run-pylint.sh "$(basename $GITHUB_REPOSITORY)" test/test_*.py
mypy:
name: Mypy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
EXTRA_INSTALL="mypy pytest types-colorama types-Pygments"
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_conda_env
./run-mypy.sh
pytest:
name: Conda Pytest
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
pytest_intel:
name: Conda Pytest with Intel CL
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
curl -L -O https://raw.githubusercontent.com/illinois-scicomp/machine-shop-maintenance/main/install-intel-icd.sh
sudo bash ./install-intel-icd.sh
CONDA_ENVIRONMENT=.test-conda-env-py3.yml
echo "- ocl-icd-system" >> "$CONDA_ENVIRONMENT"
sed -i "/pocl/ d" "$CONDA_ENVIRONMENT"
export PYOPENCL_TEST=intel
source /opt/enable-intel-cl.sh
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_conda_env
test_py_project
pytest_no_arg_check:
name: Conda Pytest without arg check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project-within-miniconda.sh
export _LOOPY_SKIP_ARG_CHECKS=1
. ./build-and-test-py-project-within-miniconda.sh
pytest_twice:
name: Conda Pytest Twice (for cache behavior)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
# This test makes sure that loopy can run with kernels loaded from disk cache.
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_conda_env
( test_py_project )
# See https://github.com/inducer/loopy/pull/828 why this is disabled.
# export LOOPY_ABORT_ON_CACHE_MISS=1
( test_py_project )
examples:
name: Conda Examples
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
run: |
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_conda_env
rewrite_pyopencl_test
. ./.ci/examples-funcs.sh
install_example_prereqs
run_py_examples
run_ipynb_examples
run_floopy_examples
docs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
-
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: "Main Script"
run: |
curl -L -O https://tiker.net/ci-support-v0
. ci-support-v0
build_py_project_in_conda_env
build_docs
downstream_tests:
strategy:
matrix:
downstream_project: [arraycontext, meshmode, grudge, pytential, pytato]
fail-fast: false
name: Tests for downstream project ${{ matrix.downstream_project }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Main Script"
env:
DOWNSTREAM_PROJECT: ${{ matrix.downstream_project }}
run: |
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
test_downstream "$DOWNSTREAM_PROJECT"
downstream_firedrake:
name: Tests for downstream project Firedrake
runs-on: ubuntu-latest
container:
image: 'firedrakeproject/firedrake'
steps:
- name: "Main script"
run: |
cd /root
python3 -m venv --system-site-packages myvenv
export HOME="$(pwd)"
mkdir loopy
cd loopy
git init
git remote add origin "https://github.com/$GITHUB_REPOSITORY.git"
git fetch origin "$GITHUB_REF"
git checkout FETCH_HEAD
git submodule update --init
. /root/myvenv/bin/activate
pip install --editable .
pip uninstall -y pytools
pip uninstall -y pymbolic
pip install "git+https://github.com/inducer/pytools.git#egg=pytools"
pip install "git+https://github.com/inducer/pymbolic.git#egg=pymbolic"
cd /opt/firedrake
# patch so exception messages get shown
curl -L https://gist.githubusercontent.com/inducer/17d7134ace215f0df1f3627eac4195c7/raw/ec5470a7d8587b6e1f336f3ef1d0ece5e26f236a/firedrake-debug-patch.diff | patch -p1
sed -i 's/@mpiexec/@mpiexec --oversubscribe/' Makefile
make check
validate_cff:
name: Validate CITATION.cff
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- run: |
pip install cffconvert
cffconvert -i CITATION.cff --validate
# vim: sw=4
......@@ -13,3 +13,22 @@ distribute*egg
distribute*tar.gz
*.log
*profiler.conf
core
.coverage
htmlcov
.ipynb_checkpoints
lextab.py
yacctab.py
.pytest_cache/*
.cache
.env
virtualenv-[0-9]*[0-9]
*.so
.asv
# Files used by run-pylint.sh
.pylintrc.yml
.run-pylint.py
Pytest POCL:
script:
- export PYOPENCL_TEST=portable:pthread
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
tags:
- python3
- pocl
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
Pytest Nvidia Titan V:
script:
- export PYOPENCL_TEST=nvi:titan
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- source /opt/enable-intel-cl.sh
- curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
tags:
- python3
- nvidia-titan-v
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
Pytest POCL without arg check:
script:
- export PYOPENCL_TEST=portable:pthread
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- export _LOOPY_SKIP_ARG_CHECKS=1
- curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
tags:
- python3
- pocl
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
Pytest Intel:
script:
- export PYOPENCL_TEST=intel
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- export LOOPY_INTEL_CL_OK_FOR_TEST_REF=1
- source /opt/enable-intel-cl.sh
- curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
tags:
- python3
- intel-cl-cpu
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
Pytest POCL Twice With Cache:
script: |
export PYOPENCL_TEST=portable:pthread
export EXTRA_INSTALL="pybind11 numpy mako"
curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/ci-support.sh
. ./ci-support.sh
build_py_project_in_venv
( test_py_project )
( test_py_project )
tags:
- python3
- pocl
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
# PyPy POCL:
# script:
# - export PY_EXE=pypy
# - export PYOPENCL_TEST=portable:pthread
# - export EXTRA_INSTALL="pybind11 numpy mako"
# - curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh
# - ". ./build-and-test-py-project.sh"
# tags:
# - pypy
# - pocl
# except:
# - tags
Pytest POCL Examples:
script: |
export PYOPENCL_TEST=portable:pthread
export EXTRA_INSTALL="pybind11 numpy mako"
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_venv
rewrite_pyopencl_test
. ./.ci/examples-funcs.sh
install_example_prereqs
run_py_examples
run_ipynb_examples
run_floopy_examples
tags:
- python3
- pocl
- large-node
# For examples/python/ispc-stream-harness.py
- avx2
except:
- tags
Pylint:
script:
# Needed to avoid name shadowing issues when running from source directory.
- PROJECT_INSTALL_FLAGS="--editable"
- EXTRA_INSTALL="pybind11 numpy mako matplotlib ipykernel ply fparser"
- curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-pylint.sh
- . ./prepare-and-run-pylint.sh "$CI_PROJECT_NAME" test/test_*.py
tags:
- python3
except:
- tags
Documentation:
script: |
EXTRA_INSTALL="pybind11 numpy"
curl -L -O https://tiker.net/ci-support-v0
. ci-support-v0
build_py_project_in_venv
build_docs
build_asv_html
maybe_upload_docs
tags:
- python3
Ruff:
script:
- pipx install uv
- uv run --only-dev ruff check
tags:
- docker-runner
except:
- tags
Mypy:
script: |
EXTRA_INSTALL="mypy pybind11 numpy types-colorama types-Pygments"
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_venv
./run-mypy.sh
tags:
- python3
except:
- tags
Downstream:
parallel:
matrix:
- DOWNSTREAM_PROJECT: [arraycontext, meshmode, grudge, pytential, pytato]
tags:
- large-node
- "docker-runner"
script: |
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
test_downstream "$DOWNSTREAM_PROJECT"
[submodule "loopy/target/c/compyte"]
path = loopy/target/c/compyte
url = https://github.com/inducer/compyte.git
- arg: extension-pkg-whitelist
val: islpy
- arg: ignore
val:
- compyte
- arg: ignored-modules
val:
- IPython
- pycuda
- maptlotlib
- maptlotlib.pyplot
- arg: init-hook
val: import sys; sys.setrecursionlimit(5000)
- arg: disable
val:
- E1102
name: test-conda-env
channels:
- conda-forge
- nodefaults
dependencies:
- python=3
- git
- numpy
- pocl
- mako
- pyopencl
- islpy
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
# major contributors
- family-names: "Kloeckner"
given-names: "Andreas"
orcid: "https://orcid.org/0000-0003-1228-519X"
- family-names: Kulkarni
given-names: Kaushik
email: kaushikcfd@gmail.com
- family-names: Kempf
given-names: Dominic
email: dominic.r.kempf@gmail.com
- family-names: Wala
given-names: Matt
email: wala1@illinois.edu
- family-names: Curtis
given-names: Nick
email: arghdos@gmail.com
- family-names: Stevens
given-names: James
email: jdsteve2@illinois.edu
- family-names: Fernando
given-names: Isuru
email: isuruf@gmail.com
# smaller fixes
- family-names: Mitchell
given-names: Lawrence
email: lawrence@wence.uk
- family-names: Alvey-Blanco
given-names: Addison J.
email: aalveyblanco@gmail.com
- family-names: Fikl
given-names: Alexandru
email: alexfikl@gmail.com
- family-names: Malone
given-names: Chris
email: chris.m.malone@gmail.com
- family-names: Ward
given-names: Connor
email: c.ward20@imperial.ac.uk
- family-names: Wilcox
given-names: Lucas C.
email: lucas@swirlee.com
- family-names: Koch
given-names: Marcel
email: marcel.koch@uni-muenster.de
- family-names: Woodman
given-names: Marmaduke
email: marmaduke.woodman@univ-amu.fr
- family-names: Smith
given-names: Matthew
email: mjsmith6@illinois.edu
- family-names: Diener
given-names: Matthias
email: mdiener@illinois.edu
- family-names: Christensen
given-names: Nicholas
email: njchris2@illinois.edu
- family-names: Nykto
given-names: Nicolas
email: nnytko2@illinois.edu
- family-names: Kirby
given-names: Robert C.
email: Robert_Kirby@baylor.edu
- family-names: Hegmann
given-names: Sebastian
email: shegmann@nina.iwr.uni-heidelberg.de
- family-names: Vorderwuelbecke
given-names: Sophia
email: sv2518@ic.ac.uk
- family-names: Ratnayaka
given-names: Thilina
email: thilinarmtb@gmail.com
- family-names: Gibson
given-names: Thomas
email: gibsonthomas1120@hotmail.com
- family-names: Sun
given-names: Tianjiao
email: tj-sun@tianjiaos-air.home
- family-names: Smith
given-names: Timothy A.
email: tasmith4@illinois.edu
- family-names: Warburton
given-names: Tim
email: timwar@caam.rice.edu
- family-names: Wei
given-names: Xiaoyu
email: wxy0516@gmail.com
- family-names: Weiner
given-names: Zach
email: zachjweiner@gmail.com
title: "Loopy"
version: 2024.1
date-released: 2024-02-16
url: "https://github.com/inducer/loopy"
doi: 10.5281/zenodo.10672275
license: MIT
MIT License
Copyright (c) 2018 Andreas Klöckner and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
......@@ -7,7 +7,7 @@ Documentation Notes
Things to consider
^^^^^^^^^^^^^^^^^^
- Depedencies are pointwise for shared loop dimensions
- Dependencies are pointwise for shared loop dimensions
and global over non-shared ones (between dependent and ancestor)
- multiple insns could fight over which iname gets local axis 0
......@@ -16,16 +16,17 @@ Things to consider
- Every loop in loopy is opened at most once.
Too restrictive?
- Loop bounds currently may not depend on parallel dimensions
Does it make sense to relax this?
- Why do CSEs necessarily have to duplicate the inames?
- Why do precomputes necessarily have to duplicate the inames?
-> because that would be necessary for a sequential prefetch
- Cannot do slab decomposition on inames that share a tag with
other inames
-> Is that reasonable?
- Entering a loop means:
- setting up conditionals related to it (slabs/bounds)
- allowing loops nested inside to depend on loop state
- Not using all hw loop dimensions causes an error, as
is the case for variant 3 in the rank_one test.
......@@ -40,17 +41,43 @@ Things to consider
- Limitation: base index for parallel axes is 0.
- Dependency on order of operations is ill-formed
- Dependency on non-local global writes is ill-formed
- No substitution rules allowed on lhs of insns
To-do
^^^^^
- Kernel splitting (via what variables get computed in a kernel)
- Kernel fusion
- when are link_inames, duplicate_inames safe?
- rename IndexTag -> InameTag
- Data implementation tags
- turn base_indices into offset
- vectorization
- write_image()
- change_arg_to_image (test!)
- Make tests run on GPUs
- Fuse: store/fetch elimination?
- Test array access with modulo
- Derive all errors from central hierarchy
- Provide context for more errors?
- Allow mixing computed and stored strides
Fixes:
- applied_iname_rewrites tracking for prefetch footprints isn't bulletproof
old inames may still be around, so the rewrite may or may not have to be
applied.
- Group instructions by dependency/inames for scheduling, to
increase sched. scalability
......@@ -62,7 +89,15 @@ Fixes:
Future ideas
^^^^^^^^^^^^
- Expose iname-duplicate-and-rename as a primitive.
- subtract_domain_lower_bound
- Storage sharing for temporaries?
- Kernel splitting (via what variables get computed in a kernel)
- Put all OpenCL functions into mangler
- Fuse: store/fetch elimination?
- Array language
......@@ -80,9 +115,6 @@ Future ideas
- Float4 joining on fetch/store?
- How can one automatically generate something like microblocks?
-> Some sort of axis-adding transform?
- Better for loop bound generation
-> Try a triangular loop
......@@ -107,6 +139,33 @@ Future ideas
Dealt with
^^^^^^^^^^
- How can one automatically generate something like microblocks?
-> Some sort of axis-adding transform?
- RuleAwareIdentityMapper
extract_subst -> needs WalkMapper [actually fine as is]
padding [DONE]
replace make_unique_var_name [DONE]
join_inames [DONE]
duplicate_inames [DONE]
split_iname [DONE]
CSE [DONE]
- rename iname
- delete unused inames
- Expose iname-duplicate-and-rename as a primitive.
- make sure simple side effects work
- Loop bounds currently may not depend on parallel dimensions
Does it make sense to relax this?
- Streamline argument specification
- syntax for linear array access
- Test divisibility constraints
- Test join_inames
......
Loopy: Transformation-Based Generation of High-Performance CPU/GPU Code
=======================================================================
.. image:: https://gitlab.tiker.net/inducer/loopy/badges/main/pipeline.svg
:alt: Gitlab Build Status
:target: https://gitlab.tiker.net/inducer/loopy/commits/main
.. image:: https://github.com/inducer/loopy/actions/workflows/ci.yml/badge.svg
:alt: Github Build Status
:target: https://github.com/inducer/loopy/actions/workflows/ci.yml
.. image:: https://badge.fury.io/py/loopy.svg
:alt: Python Package Index Release Page
:target: https://pypi.org/project/loopy/
.. image:: https://zenodo.org/badge/20281732.svg
:alt: Zenodo DOI for latest release
:target: https://zenodo.org/doi/10.5281/zenodo.10672274
Loopy lets you easily generate the tedious, complicated code that is necessary
to get good performance out of GPUs and multi-core CPUs.
Loopy's core idea is that a computation should be described simply and then
*transformed* into a version that gets high performance. This transformation
takes place under user control, from within Python.
It can capture the following types of optimizations:
* Vector and multi-core parallelism in the OpenCL/CUDA model
* Data layout transformations (structure of arrays to array of structures)
* Loop unrolling
* Loop tiling with efficient handling of boundary cases
* Prefetching/copy optimizations
* Instruction level parallelism
* and many more!
Loopy targets array-type computations, such as the following:
* dense linear algebra,
* convolutions,
* n-body interactions,
* PDE solvers, such as finite element, finite difference, and
Fast-Multipole-type computations.
It is not (and does not want to be) a general-purpose programming language.
Loopy is licensed under the liberal `MIT license
<https://en.wikipedia.org/wiki/MIT_License>`__ and free for commercial, academic,
and private use. All of Loopy's dependencies can be automatically installed from
the package index after using::
pip install loopy
In addition, Loopy is compatible with and enhances
`pyopencl <https://mathema.tician.de/software/pyopencl>`__.
---
Places on the web related to Loopy:
* `Python Package Index <https://pypi.org/project/loopy>`__ (download releases)
* `Documentation <https://documen.tician.de/loopy>`__ (read how things work)
* `Github <https://github.com/inducer/loopy>`__ (get latest source code, file bugs)
* `Homepage <https://mathema.tician.de/software/loopy>`__
* `Benchmarks <https://documen.tician.de/loopy/benchmarks>`__
FORTRAN:
do/continue
case sensitivity
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,
// The name of the project being benchmarked
"project": "loopy",
// The project's homepage
"project_url": "https://documen.tician.de/loopy",
// The URL or local path of the source code repository for the
// project being benchmarked
"repo": ".",
// The Python project's subdirectory in your repo. If missing or
// the empty string, the project is assumed to be located at the root
// of the repository.
// "repo_subdir": "",
// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["main"], // for git
// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
// "dvcs": "git",
// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,
// the base URL to show a commit for the project.
"show_commit_url": "http://github.com/inducer/loopy/commit/",
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["2.7", "3.6"],
// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
"conda_channels": ["conda-forge", "defaults"],
// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
// "matrix": {
// "numpy": ["1.6", "1.7"],
// "six": ["", null], // test with and without six installed
// "pip+emcee": [""], // emcee is only available for install with pip.
// },
"matrix": {
"numpy" : [""],
"pyopencl" : [""],
"islpy" : [""],
"pocl" : [""],
"pip+git+https://github.com/inducer/pymbolic#egg=pymbolic": [""],
"pip+git+https://github.com/inducer/boxtree#egg=boxtree": [""],
"pip+git+https://github.com/inducer/loopy#egg=loopy": [""],
"pip+git+https://github.com/inducer/sumpy#egg=sumpy": [""],
},
// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],
// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",
// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
"env_dir": ".asv/env",
// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": ".asv/results",
// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
"html_dir": ".asv/html",
// The number of characters to retain in the commit hashes.
// "hash_length": 8,
// `asv` will cache wheels of the recent builds in each
// environment, making them faster to install next time. This is
// number of builds to keep, per environment.
// "wheel_cache_size": 0
// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// }
// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// }
}
#! /usr/bin/env python
if __name__ == "__main__":
import loopy.cli
loopy.cli.main()
#!/usr/bin/env python
import ctypes
from os import system
C_SRC = """
#include <stdlib.h>
#include <stdint.h>
int64_t cdiv(int64_t a, int64_t b)
{
return a/b;
}
int64_t cmod(int64_t a, int64_t b)
{
return a%b;
}
#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \
MACRO_NAME(int8, char) \
MACRO_NAME(int16, short) \
MACRO_NAME(int32, int) \
MACRO_NAME(int64, long long)
#define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \
TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \
{ \
if ((a<0) != (b<0)) \
a = a - (b + (b<0) - (b>=0)); \
return a/b; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV)
#undef LOOPY_DEFINE_FLOOR_DIV
#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \
TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \
{ \
if (a<0) \
a = a - (b-1); \
return a/b; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)
#undef LOOPY_DEFINE_FLOOR_DIV_POS_B
#define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \
TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \
{ \
TYPE result = a%b; \
if (result < 0) \
result += b; \
return result; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B)
#undef LOOPY_DEFINE_MOD_POS_B
#define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \
TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \
{ \
TYPE result = a%b; \
if (result < 0 && b > 0) \
result += b; \
if (result > 0 && b < 0) \
result = result + b; \
return result; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD)
#undef LOOPY_DEFINE_MOD
"""
def main():
with open("int-experiments.c", "w") as outf:
outf.write(C_SRC)
system("gcc -Wall -shared int-experiments.c -o int-experiments.so")
int_exp = ctypes.CDLL("int-experiments.so")
for func in [
int_exp.cdiv,
int_exp.cmod,
int_exp.loopy_floor_div_int64,
int_exp.loopy_floor_div_pos_b_int64,
int_exp.loopy_mod_pos_b_int64,
int_exp.loopy_mod_int64,
]:
func.argtypes = [ctypes.c_longlong, ctypes.c_longlong]
func.restype = ctypes.c_longlong
cmod = int_exp.cmod
int_floor_div = int_exp.loopy_floor_div_int64
int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64
int_mod_pos_b = int_exp.loopy_mod_pos_b_int64
int_mod = int_exp.loopy_mod_int64
m = 50
for a in range(-m, m):
for b in range(1, m):
cresult = int_floor_div_pos_b(a, b)
presult = a // b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
for a in range(-m, m):
for b in range(-m, m):
if b == 0:
continue
cresult = int_floor_div(a, b)
presult = a // b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
for a in range(-m, m):
for b in range(1, m):
cresult = int_mod_pos_b(a, b)
presult = a % b
assert cresult == presult
for a in range(-m, m):
for b in range(-m, m):
if b == 0:
continue
cresult = int_mod(a, b)
presult = a % b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
# print(int_mod(552, -918), 552 % -918)
print(cmod(23, -11), 23 % -11)
if __name__ == "__main__":
main()
" Vim highlighting for Floopy (Fortran+Loopy) source code
" -------------------------------------------------------
" Installation:
" Just drop this file into ~/.vim/syntax/floopy.vim
"
" Then do
" :set filetype=floopy
"
" You may also include a line
" vim: filetype=floopy.python
" at the end of your file to set the file type automatically.
"
" Another option is to include the following in your .vimrc
" au BufRead,BufNewFile *.floopy set filetype=floopy
runtime! syntax/fortran.vim
unlet b:current_syntax
syntax include @LoopyPython syntax/python.vim
if exists('s:current_syntax')
let b:current_syntax=s:current_syntax
else
unlet b:current_syntax
endif
syntax region textSnipLoopyPython
\ matchgroup=Comment
\ start='$loopy begin' end='$loopy end'
\ containedin=ALL
\ contains=@LoopyPython
import numpy as np
# Inspired by a visualization used in the Halide tutorial
# https://www.youtube.com/watch?v=3uiEyEKji0M
def div_ceil(nr, dr):
return -(-nr // dr)
def product(iterable):
from functools import reduce
from operator import mul
return reduce(mul, iterable, 1)
class ArrayAccessPatternContext:
def __init__(self, gsize, lsize, subgroup_size=32, decay_constant=0.75):
self.lsize = lsize
self.gsize = gsize
self.subgroup_size = subgroup_size
self.timestamp = 0
self.decay_constant = decay_constant
self.ind_length = len(gsize) + len(lsize)
self.arrays = []
def l(self, index): # noqa: E743
subscript = [np.newaxis] * self.ind_length
subscript[len(self.gsize) + index] = slice(None)
return np.arange(self.lsize[index])[tuple(subscript)]
def g(self, index):
subscript = [np.newaxis] * self.ind_length
subscript[index] = slice(None)
return np.arange(self.gsize[index])[tuple(subscript)]
def nsubgroups(self):
return div_ceil(product(self.lsize), self.subgroup_size)
def animate(self, f, interval=200):
import matplotlib.animation as animation
import matplotlib.pyplot as plt
fig = plt.figure()
plots = []
for iary, ary in enumerate(self.arrays):
ax = fig.add_subplot(1, len(self.arrays), 1+iary)
ax.set_title(ary.name)
plots.append(ary.plot(ax))
def data_gen():
for _ in f():
self.tick()
for ary, plot in zip(self.arrays, plots):
plot.set_array(ary.get_plot_data())
fig.canvas.draw()
yield plots
# must be kept alive until after plt.show()
return animation.FuncAnimation(
fig, lambda x: x, data_gen,
blit=False, interval=interval, repeat=True)
def tick(self):
self.timestamp += 1
class Array:
def __init__(self, ctx, name, shape, strides, elements_per_row=None):
# Each array element stores a tuple:
# (timestamp, subgroup, g0, g1, g2, ) of last access
assert len(shape) == len(strides)
self.nattributes = 2+len(ctx.gsize)
if elements_per_row is None:
if len(shape) > 1:
minstride = min(strides)
for sh_i, st_i in zip(shape, strides):
if st_i == minstride:
elements_per_row = sh_i
break
else:
elements_per_row = 256
self.array = np.zeros((product(shape), self.nattributes,), dtype=np.int32)
self.ctx = ctx
self.name = name
self.shape = shape
self.strides = strides
self.elements_per_row = elements_per_row
ctx.arrays.append(self)
def __getitem__(self, index):
if not isinstance(index, tuple):
index = (index,)
assert len(index) == len(self.shape)
all_subscript = (np.newaxis,) * self.ctx.ind_length
def reshape_ind(ind):
if not isinstance(ind, np.ndarray):
return ind[all_subscript]
else:
assert len(ind.shape) == self.ctx.ind_length
lin_index = sum(
ind_i * stride_i
for ind_i, stride_i in zip(index, self.strides))
if not isinstance(lin_index, np.ndarray):
subscript = [np.newaxis] * self.ctx.ind_length
lin_index = np.array(lin_index)[subscript]
self.array[lin_index, 0] = self.ctx.timestamp
for i, _glength in enumerate(self.ctx.gsize):
if lin_index.shape[i] > 1:
self.array[lin_index, 2+i] = self.ctx.g(i)
workitem_index = 0
for i in range(len(self.ctx.lsize))[::-1]:
workitem_index = (
workitem_index * self.ctx.lsize[i]
+ self.ctx.l(i))
subgroup = workitem_index//self.ctx.subgroup_size
self.array[lin_index, 1] = subgroup
def __setitem__(self, index, value):
self.__getitem__(index)
def get_plot_data(self):
nelements = self.array.shape[0]
base_shape = (
div_ceil(nelements, self.elements_per_row),
self.elements_per_row,)
shaped_array = np.zeros(
(*base_shape, self.nattributes),
dtype=np.float32)
shaped_array.reshape(-1, self.nattributes)[:nelements] = self.array
modulation = np.exp(
-self.ctx.decay_constant*(self.ctx.timestamp-shaped_array[:, :, 0]))
subgroup = shaped_array[:, :, 1]
if self.ctx.nsubgroups() > 1:
subgroup = subgroup/(self.ctx.nsubgroups()-1)
else:
subgroup.fill(1)
rgb_array = np.zeros((*base_shape, 3))
if 1:
if len(self.ctx.gsize) > 1:
# g.0 -> red
rgb_array[:, :, 0] = shaped_array[:, :, 2]/(self.ctx.gsize[0]-1)
if len(self.ctx.gsize) > 1:
# g.1 -> blue
rgb_array[:, :, 2] = shaped_array[:, :, 3]/(self.ctx.gsize[1]-1)
if 1:
rgb_array[:, :, 1] = subgroup
return rgb_array*modulation[:, :, np.newaxis]
def plot(self, ax, **kwargs):
return ax.imshow(
self.get_plot_data(), interpolation="nearest",
**kwargs)
def show_example():
n = 2**7
n16 = div_ceil(n, 16)
ctx = ArrayAccessPatternContext(gsize=(n16, n16), lsize=(16, 16))
in0 = Array(ctx, "in0", (n, n), (n, 1))
if 0:
# knl a
i_inner = ctx.l(1)
i_outer = ctx.g(1)
k_inner = ctx.l(0)
def f():
for k_outer in range(n16):
in0[i_inner + i_outer*16, k_inner + k_outer*16]
yield
elif 0:
# knl b
j_inner = ctx.l(0)
j_outer = ctx.g(0)
k_inner = ctx.l(1)
def f():
for k_outer in range(n16):
in0[k_inner + k_outer*16, j_inner + j_outer*16]
yield
ani = ctx.animate(f)
import matplotlib.pyplot as plt
if 1:
plt.show()
else:
ani.save("access.mp4")
def show_example_2():
bsize = 8
blocks = 3
ctx = ArrayAccessPatternContext(gsize=(1,), lsize=(1,),
decay_constant=0.005)
in0 = Array(ctx, "in0", (blocks*bsize, blocks*bsize), (blocks*bsize, 1))
def f():
for i_outer in range(blocks):
for j_outer in range(blocks):
for i_inner in range(bsize):
for j_inner in range(bsize):
in0[i_inner + i_outer*bsize, j_inner + j_outer*bsize]
yield
ani = ctx.animate(f, interval=10)
import matplotlib.pyplot as plt
if 1:
plt.show()
else:
ani.save("access.mp4")
if __name__ == "__main__":
show_example_2()