Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Commits on Source (146)
Showing
with 179 additions and 364 deletions
name: CI
on:
push:
branches:
- master
pull_request:
paths-ignore:
- 'doc/*.rst'
schedule:
- cron: '17 3 * * 0'
jobs:
flake8:
name: Flake8
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
-
uses: actions/setup-python@v1
with:
python-version: '3.x'
- name: "Main Script"
run: |
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
. ./prepare-and-run-flake8.sh ./loopy ./test
pylint:
name: Pylint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: "Main Script"
run: |
sed 's/python=3/python=3.7/' .test-conda-env-py3.yml > .test-conda-env.yml
CONDA_ENVIRONMENT=.test-conda-env.yml
USE_CONDA_BUILD=1
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh
. ./prepare-and-run-pylint.sh loopy test/test_*.py
pytest3:
name: Conda Pytest Py3
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: "Main Script"
run: |
CONDA_ENVIRONMENT=.test-conda-env-py3.yml
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
pytest_twice:
name: Pytest twice (for cache behavior) on Py${{ matrix.python-version }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: "Main Script"
run: |
CONDA_ENVIRONMENT=.test-conda-env-py3.yml
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
${PY_EXE} -m pytest -rw --durations=10 --tb=native --junitxml=pytest.xml -rxs $TESTABLES
# vim: sw=4
Python 2.7 POCL:
script:
- export PY_EXE=python2.7
- export PYOPENCL_TEST=portable
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- export NO_DOCTESTS=1
- curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
tags:
- python2.7
- pocl
except:
- tags
artifacts:
reports:
junit: test/pytest.xml
Python 3 POCL:
script:
- export PY_EXE=python3
- export PYOPENCL_TEST=portable
- export PYOPENCL_TEST=portable:pthread
- export EXTRA_INSTALL="pybind11 numpy mako"
- export LOOPY_NO_CACHE=1
- curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
......@@ -57,7 +38,7 @@ Python 3 Intel:
Python 3 POCL Twice With Cache:
script:
- export PY_EXE=python3
- export PYOPENCL_TEST=portable
- export PYOPENCL_TEST=portable:pthread
- export EXTRA_INSTALL="pybind11 numpy mako"
- curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
- ". ./build-and-test-py-project.sh"
......@@ -77,7 +58,7 @@ Python 3 POCL Twice With Cache:
# PyPy POCL:
# script:
# - export PY_EXE=pypy
# - export PYOPENCL_TEST=portable
# - export PYOPENCL_TEST=portable:pthread
# - export EXTRA_INSTALL="pybind11 numpy mako"
# - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
# - ". ./build-and-test-py-project.sh"
......@@ -90,7 +71,7 @@ Python 3 POCL Twice With Cache:
Python 3 POCL Examples:
script:
- export PY_EXE=python3
- export PYOPENCL_TEST=portable
- export PYOPENCL_TEST=portable:pthread
- export EXTRA_INSTALL="pybind11 numpy mako pyvisfile matplotlib ipykernel nbconvert"
- ". ./build-py-project-and-run-examples.sh"
tags:
......@@ -114,20 +95,6 @@ Pylint:
except:
- tags
CentOS binary:
script:
- (cd build-helpers; ./make-linux-build-docker.sh --nodate)
- (cd ./build-helpers; ./loopy-centos6 ../examples/fortran/sparse.floopy)
artifacts:
expire_in: 4 weeks
paths:
- build-helpers/loopy-centos6
tags:
- docker
only:
- master
retry: 2
Documentation:
script:
- EXTRA_INSTALL="pybind11 numpy"
......
name: test-conda-env
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python=3
- git
- conda-forge::numpy
- numpy
- pocl
- mako
- pyopencl
......
......@@ -4,9 +4,9 @@ Loopy: Transformation-Based Generation of High-Performance CPU/GPU Code
.. image:: https://gitlab.tiker.net/inducer/loopy/badges/master/pipeline.svg
:alt: Gitlab Build Status
:target: https://gitlab.tiker.net/inducer/loopy/commits/master
.. image:: https://dev.azure.com/ak-spam/inducer/_apis/build/status/inducer.loopy?branchName=master
:alt: Azure Build Status
:target: https://dev.azure.com/ak-spam/inducer/_build/latest?definitionId=10&branchName=master
.. image:: https://github.com/inducer/loopy/workflows/CI/badge.svg?branch=master&event=push
:alt: Github Build Status
:target: https://github.com/inducer/loopy/actions?query=branch%3Amaster+workflow%3ACI+event%3Apush
.. image:: https://badge.fury.io/py/loo.py.png
:alt: Python Package Index Release Page
:target: https://pypi.org/project/loo.py/
......
jobs:
-
job: 'Python2'
pool:
vmImage: 'ubuntu-latest'
steps:
-
script: |
set -e
sed 's/python=3/python=2.7/' .test-conda-env-py3.yml > .test-conda-env-py2.yml
cat .test-conda-env-py2.yml
CONDA_ENVIRONMENT=.test-conda-env-py2.yml
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
displayName: 'Pytest Conda'
-
task: PublishTestResults@2
inputs:
testResultsFormat: 'JUnit'
testResultsFiles: 'test/pytest.xml'
-
job: 'Python3'
pool:
vmImage: 'ubuntu-latest'
steps:
-
script: |
set -e
CONDA_ENVIRONMENT=.test-conda-env-py3.yml
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
displayName: 'Pytest Conda'
-
task: PublishTestResults@2
inputs:
testResultsFormat: 'JUnit'
testResultsFiles: 'test/pytest.xml'
-
job: 'Python3Twice'
displayName: "Python3 - run tests twice to test cache behavior"
pool:
vmImage: 'ubuntu-latest'
steps:
-
script: |
set -e
CONDA_ENVIRONMENT=.test-conda-env-py3.yml
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project-within-miniconda.sh
. ./build-and-test-py-project-within-miniconda.sh
${PY_EXE} -m pytest -rw --durations=10 --tb=native --junitxml=pytest.xml -rxs $TESTABLES
displayName: 'Pytest Conda'
-
task: PublishTestResults@2
inputs:
testResultsFormat: 'JUnit'
testResultsFiles: 'test/pytest.xml'
-
job: 'Flake8'
pool:
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python37:
python.version: '3.7'
steps:
-
task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
-
script: |
set -e
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-flake8.sh
. ./prepare-and-run-flake8.sh loopy test
displayName: 'Flake8'
-
job: 'Pylint'
pool:
vmImage: 'ubuntu-latest'
steps:
-
script: |
set -e
sed 's/python=3/python=3.7/' .test-conda-env-py3.yml > .test-conda-env.yml
CONDA_ENVIRONMENT=.test-conda-env.yml
USE_CONDA_BUILD=1
curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/prepare-and-run-pylint.sh
. ./prepare-and-run-pylint.sh loopy test/test_*.py
displayName: 'Pylint'
schedules:
-
cron: "0 0 * * 0"
displayName: Weekly build
branches:
include:
- master
loopy-*-20[0-9][0-9]*
# -*- mode: python -*-
from os.path import basename, dirname, join
from glob import glob
single_file = True
# This makes the executable spew debug info.
debug = False
from os.path import expanduser
import packaging # pip install packaging to add
a = Analysis(['../bin/loopy'],
pathex=[expanduser('~/src/loopy')],
hiddenimports=[
"decorator",
"appdirs",
"packaging.markers",
"packaging.specifiers",
"packaging.version",
"packaging.requirements",
],
hookspath=None,
runtime_hooks=None,
excludes=["hedge", "meshpy", "pyopencl", "PIL"]
)
import ply.lex
import ply.yacc
a.datas += [
(join("py-src", "ply", "lex", basename(fn)), fn, "DATA")
for fn in glob(join(dirname(ply.lex.__file__), "*.py"))
] + [
(join("py-src", "ply", "yacc", basename(fn)), fn, "DATA")
for fn in glob(join(dirname(ply.yacc.__file__), "*.py"))
]
pyz = PYZ(a.pure)
if single_file:
exe = EXE(pyz,
a.scripts,
a.binaries,
a.zipfiles,
a.datas,
name='loopy',
debug=debug,
strip=None,
upx=True,
console=True)
else:
exe = EXE(pyz,
a.scripts,
exclude_binaries=True,
name='loopy',
debug=debug,
strip=None,
upx=True,
console=True)
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=None,
upx=True,
name='loopy')
#! /bin/bash
set -e
set -x
VENV_VERSION="virtualenv-15.2.0"
rm -Rf "$VENV_VERSION"
curl -k https://files.pythonhosted.org/packages/b1/72/2d70c5a1de409ceb3a27ff2ec007ecdd5cc52239e7c74990e32af57affe9/$VENV_VERSION.tar.gz | tar xfz -
$VENV_VERSION/virtualenv.py --system-site-packages --no-setuptools .env
source .env/bin/activate
curl -k https://bootstrap.pypa.io/ez_setup.py | python -
curl -k https://gitlab.tiker.net/inducer/pip/raw/7.0.3/contrib/get-pip.py | python -
pip install packaging
PYTHON_VER=$(python -c 'import sys; print(".".join(str(s) for s in sys.version_info[:2]))')
pip install git+https://github.com/pyinstaller/pyinstaller.git@413c37bec126c0bd26084813593f65128966b4b7
git clone --recursive git://github.com/inducer/loopy
cd loopy
grep -v pyopencl requirements.txt > myreq.txt
# needed for pyinstaller package to be usable
echo packaging >> myreq.txt
pip install -r myreq.txt
python setup.py install
chown -R user /tmp/build
su user -p -c "cd /tmp/build && source .env/bin/activate && cd loopy && ./build-helpers/run-pyinstaller.sh"
#! /bin/bash
set -e
set -x
mkdir /tmp/build
cd /tmp/build
useradd -d /home/user -m -s /bin/bash user
yum install -y centos-release-scl
yum install -y git python27 python27-python-devel python27-numpy tar gcc gcc-c++ mercurial libffi-devel
scl enable python27 /mnt/make-linux-build-docker-inner-part-2.sh
#! /bin/bash
# should be run in this directory (build-helpers)
if test "$1" = "--nodate"; then
TGT_NAME=loopy-centos6
else
TGT_NAME=loopy-centos6-$(date +"%Y-%m-%d")
fi
echo "Generating $TGT_NAME..."
set -e
set -x
docker pull centos:6
CNT=$(docker create -t -v $(pwd):/mnt centos:6 /mnt/make-linux-build-docker-inner.sh)
echo "working in container $CNT"
docker start -i $CNT
docker cp $CNT:/tmp/build/loopy/dist/loopy $(pwd) || true
mv loopy $TGT_NAME
docker rm $CNT
#! /bin/bash
# run this from the loopy root directory
rm -Rf dist build
pyinstaller \
--workpath=build/pyinstaller \
build-helpers/loopy.spec
#! /bin/bash
set -e
scp "$1" tiker.net:public_html/pub/loopy-binaries/
......@@ -20,29 +20,6 @@ When you run this script, the following kernel is generated, compiled, and execu
.. _static-binary:
Want to try out loopy?
----------------------
There's no need to go through :ref:`installation` if you'd just like to get a
feel for what loopy is. Instead, you may
`download a self-contained Linux binary <https://gitlab.tiker.net/inducer/loopy/-/jobs/66778/artifacts/browse/build-helpers/>`_.
This is purposefully built on an ancient Linux distribution, so it should work
on most versions of Linux that are currently out there.
Once you have the binary, do the following::
chmod +x ./loopy-centos6
./loopy-centos6 --target=opencl hello-loopy.loopy
./loopy-centos6 --target=cuda hello-loopy.loopy
./loopy-centos6 --target=ispc hello-loopy.loopy
Grab the example here: :download:`examples/python/hello-loopy.loopy <../examples/python/hello-loopy.loopy>`.
You may also donwload the most recent version by going to the `list of builds
<https://gitlab.tiker.net/inducer/loopy/builds>`_, clicking on the newest one
of type "CentOS binary", clicking on "Browse" under "Build Artifacts", then
navigating to "build-helpers", and downloading the binary from there.
Places on the web related to Loopy
----------------------------------
......
......@@ -151,6 +151,42 @@ Tag Meaning
.. }}}
Identifiers
-----------
Reserved Identifiers
^^^^^^^^^^^^^^^^^^^^
The identifier prefix ``_lp_`` is reserved for internal usage; when creating
*inames*, *argument names*, *temporary variable names*, *substitution rule
names*, *instruction IDs*, and other identifiers, users should *not* use names
beginning with ``_lp_``. This prefix is used for identifiers created
internally when operating on Loopy's kernel IR. For Loopy developers, further
information on name prefixes used within submodules is below.
Identifier Registry
^^^^^^^^^^^^^^^^^^^
Functionality in :mod:`loopy` *must* use identifiers beginning with ``_lp_`` for
all internally-created identifiers. Additionally, each name beginning with
``_lp_`` must start with one of the reserved prefixes below. New prefixes may
be registered by adding them to the table below. New prefixes may not themselves
be the prefix of an existing prefix.
**Reserved Identifier Prefixes**
======================= ==================================
Reserved Prefix Usage (module or purpose)
======================= ==================================
``_lp_linchk_`` :mod:`loopy.linearization.checker`
======================= ==================================
.. note::
Existing Loopy code may not yet fully satisfy these naming requirements.
Name changes are in progress, and prefixes will be added to this registry
as they are created.
.. _instructions:
Instructions
......
......@@ -118,7 +118,7 @@ Finishing up
.. autofunction:: generate_loop_schedules
.. autofunction:: get_one_scheduled_kernel
.. autofunction:: get_one_linearized_kernel
.. autofunction:: save_and_reload_temporaries
......
......@@ -1204,16 +1204,16 @@ Here is what happens when we try to generate code for the kernel:
This happens due to the kernel splitting done by :mod:`loopy`. The splitting
happens when the instruction schedule is generated. To see the schedule, we
should call :func:`loopy.get_one_scheduled_kernel`:
should call :func:`loopy.get_one_linearized_kernel`:
>>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
>>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
>>> print(knl)
---------------------------------------------------------------------------
KERNEL: rotate_v2
---------------------------------------------------------------------------
...
---------------------------------------------------------------------------
SCHEDULE:
LINEARIZATION:
0: CALL KERNEL rotate_v2(extra_args=[], extra_inames=[])
1: tmp = arr[i_inner + i_outer*16] {id=maketmp}
2: RETURN FROM KERNEL rotate_v2
......@@ -1233,12 +1233,12 @@ goes for local temporaries).
:func:`loopy.save_and_reload_temporaries` for the purpose of handling the
task of saving and restoring temporary values across global barriers. This
function adds instructions to the kernel without scheduling them. That means
that :func:`loopy.get_one_scheduled_kernel` needs to be called one more time to
that :func:`loopy.get_one_linearized_kernel` needs to be called one more time to
put those instructions into the schedule.
>>> knl = lp.get_one_scheduled_kernel(lp.preprocess_kernel(knl))
>>> knl = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
>>> knl = lp.save_and_reload_temporaries(knl)
>>> knl = lp.get_one_scheduled_kernel(knl) # Schedule added instructions
>>> knl = lp.get_one_linearized_kernel(knl) # Schedule added instructions
>>> print(knl)
---------------------------------------------------------------------------
KERNEL: rotate_v2
......@@ -1251,7 +1251,7 @@ put those instructions into the schedule.
---------------------------------------------------------------------------
...
---------------------------------------------------------------------------
SCHEDULE:
LINEARIZATION:
0: CALL KERNEL rotate_v2(extra_args=['tmp_save_slot'], extra_inames=[])
1: tmp = arr[i_inner + i_outer*16] {id=maketmp}
2: tmp_save_slot[tmp_save_hw_dim_0_rotate_v2, tmp_save_hw_dim_1_rotate_v2] = tmp {id=tmp.save}
......
import numpy as np
import numpy.linalg as la
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp
def main():
fn = "matmul.floopy"
with open(fn, "r") as inf:
source = inf.read()
dgemm, = lp.parse_transformed_fortran(source, filename=fn)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 2048
a = cl.array.empty(queue, (n, n), dtype=np.float64, order="F")
b = cl.array.empty(queue, (n, n), dtype=np.float64, order="F")
c = cl.array.zeros(queue, (n, n), dtype=np.float64, order="F")
cl.clrandom.fill_rand(a)
cl.clrandom.fill_rand(b)
dgemm = lp.set_options(dgemm, write_code=True)
dgemm(queue, a=a, b=b, alpha=1, c=c)
c_ref = (a.get() @ b.get())
assert la.norm(c_ref - c.get())/la.norm(c_ref) < 1e-10
if __name__ == "__main__":
main()
......@@ -123,12 +123,12 @@ from loopy.transform.add_barrier import add_barrier
from loopy.type_inference import infer_unknown_types
from loopy.preprocess import preprocess_kernel, realize_reduction
from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel
from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping,
Op, MemAccess, get_op_poly, get_op_map, get_lmem_access_poly,
get_DRAM_access_poly, get_gmem_access_poly, get_mem_access_map,
get_synchronization_poly, get_synchronization_map,
gather_access_footprints, gather_access_footprint_bytes)
from loopy.schedule import (
generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel)
from loopy.statistics import (ToCountMap, CountGranularity,
stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map,
get_synchronization_map, gather_access_footprints,
gather_access_footprint_bytes)
from loopy.codegen import (
PreambleInfo,
generate_code, generate_code_v2, generate_body)
......@@ -248,16 +248,16 @@ __all__ = [
"infer_unknown_types",
"preprocess_kernel", "realize_reduction",
"generate_loop_schedules", "get_one_scheduled_kernel",
"generate_loop_schedules",
"get_one_scheduled_kernel", "get_one_linearized_kernel",
"GeneratedProgram", "CodeGenerationResult",
"PreambleInfo",
"generate_code", "generate_code_v2", "generate_body",
"ToCountMap", "CountGranularity", "stringify_stats_mapping", "Op",
"MemAccess", "get_op_poly", "get_op_map", "get_lmem_access_poly",
"get_DRAM_access_poly", "get_gmem_access_poly", "get_mem_access_map",
"get_synchronization_poly", "get_synchronization_map",
"gather_access_footprints", "gather_access_footprint_bytes",
"MemAccess", "get_op_map", "get_mem_access_map",
"get_synchronization_map", "gather_access_footprints",
"gather_access_footprint_bytes",
"CompiledKernel",
......
......@@ -534,7 +534,7 @@ def auto_test_vs_ref(
from loopy.target.pyopencl import PyOpenCLTarget
if test_knl.state not in [
KernelState.PREPROCESSED,
KernelState.SCHEDULED]:
KernelState.LINEARIZED]:
if isinstance(test_knl.target, PyOpenCLTarget):
test_knl = test_knl.copy(target=PyOpenCLTarget(ctx.devices[0]))
......
......@@ -184,6 +184,19 @@ def check_for_inactive_iname_access(kernel):
", ".join(expression_inames - kernel.insn_inames(insn))))
def check_for_unused_inames(kernel):
# Warn if kernel has unused inames
from loopy.transform.iname import get_used_inames
unused_inames = kernel.all_inames() - get_used_inames(kernel)
if unused_inames:
warn_with_kernel(
kernel, "unused_inames",
"Found unused inames in kernel: %s "
"Unused inames during linearization will be prohibited in "
"Loopy version 2021.X."
% unused_inames)
def _is_racing_iname_tag(tv, tag):
from loopy.kernel.data import (AddressSpace,
LocalIndexTagBase, GroupIndexTag, ConcurrentTag, auto)
......@@ -220,12 +233,12 @@ def check_for_write_races(kernel):
assignee_inames = assignee_indices & kernel.all_inames()
if not assignee_inames <= kernel.insn_inames(insn):
raise LoopyError(
"assignee of instructiosn '%s' references "
"assignee of instructions '%s' references "
"iname that the instruction does not depend on"
% insn.id)
if assignee_name in kernel.arg_dict:
# Any parallel tags that are not depended upon by the assignee
# Any concurrent tags that are not depended upon by the assignee
# will cause write races.
raceable_parallel_insn_inames = set(
......@@ -658,6 +671,7 @@ def pre_schedule_checks(kernel):
check_loop_priority_inames_known(kernel)
check_multiple_tags_allowed(kernel)
check_for_inactive_iname_access(kernel)
check_for_unused_inames(kernel)
check_for_write_races(kernel)
check_for_data_dependent_parallel_bounds(kernel)
check_bounds(kernel)
......