Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Showing
with 2711 additions and 671 deletions
#! /bin/bash
# should be run in this directory (build-helpers)
set -e
set -x
CNT=$(docker create -t -v $(pwd):/mnt centos:6 /mnt/make-linux-build-docker-inner.sh)
echo "working in container $CNT"
docker start -i $CNT
docker cp $CNT:/tmp/build/loopy/dist/loopy $(pwd) || true
mv loopy loopy-centos6-$(date +"%Y-%m-%d")
docker rm $CNT
#! /bin/bash
# run this from the loopy root directory
rm -Rf dist build
pyinstaller \
--workpath=build/pyinstaller \
build-helpers/loopy.spec
#! /bin/bash
set -e
scp "$1" tiker.net:public_html/pub/loopy-binaries/
#!/usr/bin/env python
import ctypes
from os import system
C_SRC = """
#include <stdlib.h>
#include <stdint.h>
int64_t cdiv(int64_t a, int64_t b)
{
return a/b;
}
int64_t cmod(int64_t a, int64_t b)
{
return a%b;
}
#define LOOPY_CALL_WITH_INTEGER_TYPES(MACRO_NAME) \
MACRO_NAME(int8, char) \
MACRO_NAME(int16, short) \
MACRO_NAME(int32, int) \
MACRO_NAME(int64, long long)
#define LOOPY_DEFINE_FLOOR_DIV(SUFFIX, TYPE) \
TYPE loopy_floor_div_##SUFFIX(TYPE a, TYPE b) \
{ \
if ((a<0) != (b<0)) \
a = a - (b + (b<0) - (b>=0)); \
return a/b; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV)
#undef LOOPY_DEFINE_FLOOR_DIV
#define LOOPY_DEFINE_FLOOR_DIV_POS_B(SUFFIX, TYPE) \
TYPE loopy_floor_div_pos_b_##SUFFIX(TYPE a, TYPE b) \
{ \
if (a<0) \
a = a - (b-1); \
return a/b; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_FLOOR_DIV_POS_B)
#undef LOOPY_DEFINE_FLOOR_DIV_POS_B
#define LOOPY_DEFINE_MOD_POS_B(SUFFIX, TYPE) \
TYPE loopy_mod_pos_b_##SUFFIX(TYPE a, TYPE b) \
{ \
TYPE result = a%b; \
if (result < 0) \
result += b; \
return result; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD_POS_B)
#undef LOOPY_DEFINE_MOD_POS_B
#define LOOPY_DEFINE_MOD(SUFFIX, TYPE) \
TYPE loopy_mod_##SUFFIX(TYPE a, TYPE b) \
{ \
TYPE result = a%b; \
if (result < 0 && b > 0) \
result += b; \
if (result > 0 && b < 0) \
result = result + b; \
return result; \
}
LOOPY_CALL_WITH_INTEGER_TYPES(LOOPY_DEFINE_MOD)
#undef LOOPY_DEFINE_MOD
"""
def main():
with open("int-experiments.c", "w") as outf:
outf.write(C_SRC)
system("gcc -Wall -shared int-experiments.c -o int-experiments.so")
int_exp = ctypes.CDLL("int-experiments.so")
for func in [
int_exp.cdiv,
int_exp.cmod,
int_exp.loopy_floor_div_int64,
int_exp.loopy_floor_div_pos_b_int64,
int_exp.loopy_mod_pos_b_int64,
int_exp.loopy_mod_int64,
]:
func.argtypes = [ctypes.c_longlong, ctypes.c_longlong]
func.restype = ctypes.c_longlong
cmod = int_exp.cmod
int_floor_div = int_exp.loopy_floor_div_int64
int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64
int_mod_pos_b = int_exp.loopy_mod_pos_b_int64
int_mod = int_exp.loopy_mod_int64
m = 50
for a in range(-m, m):
for b in range(1, m):
cresult = int_floor_div_pos_b(a, b)
presult = a // b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
for a in range(-m, m):
for b in range(-m, m):
if b == 0:
continue
cresult = int_floor_div(a, b)
presult = a // b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
for a in range(-m, m):
for b in range(1, m):
cresult = int_mod_pos_b(a, b)
presult = a % b
assert cresult == presult
for a in range(-m, m):
for b in range(-m, m):
if b == 0:
continue
cresult = int_mod(a, b)
presult = a % b
assert cresult == presult
if cresult != presult:
print(a, b, cresult, presult)
# print(int_mod(552, -918), 552 % -918)
print(cmod(23, -11), 23 % -11)
if __name__ == "__main__":
main()
import numpy as np
# Inspired by a visualization used in the Halide tutorial
# https://www.youtube.com/watch?v=3uiEyEKji0M
def div_ceil(nr, dr):
return -(-nr // dr)
def product(iterable):
from functools import reduce
from operator import mul
return reduce(mul, iterable, 1)
class ArrayAccessPatternContext:
def __init__(self, gsize, lsize, subgroup_size=32, decay_constant=0.75):
self.lsize = lsize
self.gsize = gsize
self.subgroup_size = subgroup_size
self.timestamp = 0
self.decay_constant = decay_constant
self.ind_length = len(gsize) + len(lsize)
self.arrays = []
def l(self, index): # noqa: E743
subscript = [np.newaxis] * self.ind_length
subscript[len(self.gsize) + index] = slice(None)
return np.arange(self.lsize[index])[tuple(subscript)]
def g(self, index):
subscript = [np.newaxis] * self.ind_length
subscript[index] = slice(None)
return np.arange(self.gsize[index])[tuple(subscript)]
def nsubgroups(self):
return div_ceil(product(self.lsize), self.subgroup_size)
def animate(self, f, interval=200):
import matplotlib.animation as animation
import matplotlib.pyplot as plt
fig = plt.figure()
plots = []
for iary, ary in enumerate(self.arrays):
ax = fig.add_subplot(1, len(self.arrays), 1+iary)
ax.set_title(ary.name)
plots.append(ary.plot(ax))
def data_gen():
for _ in f():
self.tick()
for ary, plot in zip(self.arrays, plots):
plot.set_array(ary.get_plot_data())
fig.canvas.draw()
yield plots
# must be kept alive until after plt.show()
return animation.FuncAnimation(
fig, lambda x: x, data_gen,
blit=False, interval=interval, repeat=True)
def tick(self):
self.timestamp += 1
class Array:
def __init__(self, ctx, name, shape, strides, elements_per_row=None):
# Each array element stores a tuple:
# (timestamp, subgroup, g0, g1, g2, ) of last access
assert len(shape) == len(strides)
self.nattributes = 2+len(ctx.gsize)
if elements_per_row is None:
if len(shape) > 1:
minstride = min(strides)
for sh_i, st_i in zip(shape, strides):
if st_i == minstride:
elements_per_row = sh_i
break
else:
elements_per_row = 256
self.array = np.zeros((product(shape), self.nattributes,), dtype=np.int32)
self.ctx = ctx
self.name = name
self.shape = shape
self.strides = strides
self.elements_per_row = elements_per_row
ctx.arrays.append(self)
def __getitem__(self, index):
if not isinstance(index, tuple):
index = (index,)
assert len(index) == len(self.shape)
all_subscript = (np.newaxis,) * self.ctx.ind_length
def reshape_ind(ind):
if not isinstance(ind, np.ndarray):
return ind[all_subscript]
else:
assert len(ind.shape) == self.ctx.ind_length
lin_index = sum(
ind_i * stride_i
for ind_i, stride_i in zip(index, self.strides))
if not isinstance(lin_index, np.ndarray):
subscript = [np.newaxis] * self.ctx.ind_length
lin_index = np.array(lin_index)[subscript]
self.array[lin_index, 0] = self.ctx.timestamp
for i, _glength in enumerate(self.ctx.gsize):
if lin_index.shape[i] > 1:
self.array[lin_index, 2+i] = self.ctx.g(i)
workitem_index = 0
for i in range(len(self.ctx.lsize))[::-1]:
workitem_index = (
workitem_index * self.ctx.lsize[i]
+ self.ctx.l(i))
subgroup = workitem_index//self.ctx.subgroup_size
self.array[lin_index, 1] = subgroup
def __setitem__(self, index, value):
self.__getitem__(index)
def get_plot_data(self):
nelements = self.array.shape[0]
base_shape = (
div_ceil(nelements, self.elements_per_row),
self.elements_per_row,)
shaped_array = np.zeros(
(*base_shape, self.nattributes),
dtype=np.float32)
shaped_array.reshape(-1, self.nattributes)[:nelements] = self.array
modulation = np.exp(
-self.ctx.decay_constant*(self.ctx.timestamp-shaped_array[:, :, 0]))
subgroup = shaped_array[:, :, 1]
if self.ctx.nsubgroups() > 1:
subgroup = subgroup/(self.ctx.nsubgroups()-1)
else:
subgroup.fill(1)
rgb_array = np.zeros((*base_shape, 3))
if 1:
if len(self.ctx.gsize) > 1:
# g.0 -> red
rgb_array[:, :, 0] = shaped_array[:, :, 2]/(self.ctx.gsize[0]-1)
if len(self.ctx.gsize) > 1:
# g.1 -> blue
rgb_array[:, :, 2] = shaped_array[:, :, 3]/(self.ctx.gsize[1]-1)
if 1:
rgb_array[:, :, 1] = subgroup
return rgb_array*modulation[:, :, np.newaxis]
def plot(self, ax, **kwargs):
return ax.imshow(
self.get_plot_data(), interpolation="nearest",
**kwargs)
def show_example():
n = 2**7
n16 = div_ceil(n, 16)
ctx = ArrayAccessPatternContext(gsize=(n16, n16), lsize=(16, 16))
in0 = Array(ctx, "in0", (n, n), (n, 1))
if 0:
# knl a
i_inner = ctx.l(1)
i_outer = ctx.g(1)
k_inner = ctx.l(0)
def f():
for k_outer in range(n16):
in0[i_inner + i_outer*16, k_inner + k_outer*16]
yield
elif 0:
# knl b
j_inner = ctx.l(0)
j_outer = ctx.g(0)
k_inner = ctx.l(1)
def f():
for k_outer in range(n16):
in0[k_inner + k_outer*16, j_inner + j_outer*16]
yield
ani = ctx.animate(f)
import matplotlib.pyplot as plt
if 1:
plt.show()
else:
ani.save("access.mp4")
def show_example_2():
bsize = 8
blocks = 3
ctx = ArrayAccessPatternContext(gsize=(1,), lsize=(1,),
decay_constant=0.005)
in0 = Array(ctx, "in0", (blocks*bsize, blocks*bsize), (blocks*bsize, 1))
def f():
for i_outer in range(blocks):
for j_outer in range(blocks):
for i_inner in range(bsize):
for j_inner in range(bsize):
in0[i_inner + i_outer*bsize, j_inner + j_outer*bsize]
yield
ani = ctx.animate(f, interval=10)
import matplotlib.pyplot as plt
if 1:
plt.show()
else:
ani.save("access.mp4")
if __name__ == "__main__":
show_example_2()
......@@ -3,7 +3,7 @@
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python ` which sphinx-build`
SPHINXBUILD = python `which sphinx-build`
PAPER =
BUILDDIR = _build
......
pre {
line-height: 110%;
}
.footer {
background-color: #eee;
}
body > div.container {
margin-top:10px;
}
dd {
margin-left: 40px;
}
tt.descname {
font-size: 100%;
}
code {
color: rgb(51,51,51);
}
h1 {
padding-bottom:7px;
border-bottom: 1px solid #ccc;
}
h2 {
padding-bottom:5px;
border-bottom: 1px solid #ccc;
}
h3 {
padding-bottom:5px;
border-bottom: 1px solid #ccc;
}
.rubric {
font-size: 120%;
padding-bottom:1px;
border-bottom: 1px solid #ccc;
}
.headerlink {
padding-left: 1ex;
padding-right: 1ex;
}
a.headerlink:hover {
text-decoration: none;
}
blockquote p {
font-size: 100%;
font-weight: normal;
line-height: normal;
};
{% extends "!layout.html" %}
{% set bootswatch_css_custom = ['_static/akdoc.css']%}
# -*- coding: utf-8 -*-
#
# loopy documentation build configuration file, created by
# sphinx-quickstart on Tue Aug 9 13:40:49 2011.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import os
from urllib.request import urlopen
#import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py"
with urlopen(_conf_url) as _inf:
exec(compile(_inf.read(), _conf_url, "exec"), globals())
# -- General configuration -----------------------------------------------------
copyright = "2016, Andreas Klöckner"
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.intersphinx',
#'sphinx.ext.viewcode',
'sphinx.ext.doctest',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'loopy'
copyright = u'2011, Andreas Klöckner'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
ver_dic = {}
exec(compile(open("../loopy/version.py").read(), "../loopy/version.py", 'exec'), ver_dic)
_version_source = "../loopy/version.py"
with open(_version_source) as vpy_file:
version_py = vpy_file.read()
os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] = "1"
exec(compile(version_py, _version_source, "exec"), ver_dic)
version = ".".join(str(x) for x in ver_dic["VERSION"])
# The full version, including alpha/beta/rc tags.
release = ver_dic["VERSION_TEXT"]
del os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"]
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
try:
import sphinx_bootstrap_theme
except:
from warnings import warn
warn("I would like to use the sphinx bootstrap theme, but can't find it.\n"
"'pip install sphinx_bootstrap_theme' to fix.")
else:
# Activate the theme.
html_theme = 'bootstrap'
html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"navbar_fixed_top": "true",
"navbar_site_name": "Contents",
'bootstrap_version': '3',
'source_link_position': 'footer',
}
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
html_show_sourcelink = False
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'loopydoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'loopy.tex', u'loopy Documentation',
u'Andreas Kloeckner', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'loopy', u'loopy Documentation',
[u'Andreas Kloeckner'], 1)
]
exclude_patterns = ["_build"]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
'http://docs.python.org/': None,
'http://documen.tician.de/islpy': None,
'http://documen.tician.de/pyopencl': None,
'http://documen.tician.de/cgen': None,
'http://docs.scipy.org/doc/numpy/': None,
}
autoclass_content = "both"
"python": ("https://docs.python.org/3", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"pytools": ("https://documen.tician.de/pytools", None),
"islpy": ("https://documen.tician.de/islpy", None),
"pyopencl": ("https://documen.tician.de/pyopencl", None),
"cgen": ("https://documen.tician.de/cgen", None),
"pymbolic": ("https://documen.tician.de/pymbolic", None),
"constantdict": ("https://matthiasdiener.github.io/constantdict/", None),
}
nitpicky = True
nitpick_ignore_regex = [
["py:class", r"typing_extensions\.(.+)"],
["py:class", r"numpy\.u?int[0-9]+"],
["py:class", r"numpy\.float[0-9]+"],
["py:class", r"numpy\.complex[0-9]+"],
# Reference not found from "<unknown>"? I'm not even sure where to look.
["py:class", r"ExpressionNode"],
# Type aliases
["py:class", r"InameStr"],
["py:class", r"ConcreteCallablesTable"],
["py:class", r"LoopNestTree"],
["py:class", r"LoopTree"],
["py:class", r"ToLoopyTypeConvertible"],
["py:class", r"ToStackMatchConvertible"],
]
......@@ -18,20 +18,20 @@ When you run this script, the following kernel is generated, compiled, and execu
(See the full example for how to print the generated code.)
.. _static-binary:
Places on the web related to Loopy
----------------------------------
* `Python package index <http://pypi.python.org/pypi/loo.py>`_ (download releases) Note the extra '.' in the PyPI identifier!
* `Github <http://github.com/inducer/loopy>`_ (get latest source code, file bugs)
* `Wiki <http://wiki.tiker.net/Loopy>`_ (read installation tips, get examples, read FAQ)
* `Homepage <http://mathema.tician.de/software/loopy>`_
* `Python package index <https://pypi.org/project/loopy>`_ (download releases)
* `Github <https://github.com/inducer/loopy>`_ (get latest source code, file bugs)
* `Homepage <https://mathema.tician.de/software/loopy>`_
Table of Contents
-----------------
If you're only just learning about loopy, consider the following `paper
<http://arxiv.org/abs/1405.7470>`_ on loo.py that may serve as a good
<https://arxiv.org/abs/1405.7470>`_ on loopy that may serve as a good
introduction.
Please check :ref:`installation` to get started.
......@@ -40,8 +40,16 @@ Please check :ref:`installation` to get started.
:maxdepth: 2
tutorial
reference
ref_creation
ref_kernel
ref_translation_unit
ref_transform
ref_call
ref_other
misc
ref_internals
🚀 Github <https://github.com/inducer/loopy>
💾 Download Releases <https://pypi.org/project/loopy>
Indices and tables
==================
......
......@@ -3,9 +3,21 @@
Installation
============
Option 0: Static Binary
-----------------------
If you would just like to experiment with :mod:`loopy`'s code transformation
abilities, the easiest way to get loopy is to download a statically-linked
Linux binary.
See :ref:`static-binary` for details.
Option 1: From Source, no PyOpenCL integration
-----------------------------------------------
This command should install :mod:`loopy`::
pip install https://github.com/inducer/loopy/tarball/master
pip install loopy
You may need to run this with :command:`sudo`.
If you don't already have `pip <https://pypi.python.org/pypi/pip>`_,
......@@ -15,26 +27,85 @@ run this beforehand::
python get-pip.py
For a more manual installation, `download the source
<http://pypi.python.org/pypi/islpy>`_, unpack it, and say::
<https://pypi.org/project/loopy>`_, unpack it, and say::
python setup.py install
You may also clone its git repository::
git clone --recursive git://github.com/inducer/loopy
git clone --recursive http://git.tiker.net/trees/loopy.git
git clone --recursive https://github.com/inducer/loopy.git
Option 2: From Conda Forge, with PyOpenCL integration
-----------------------------------------------------
This set of instructions is intended for 64-bit Linux and
MacOS support computers:
#. Make sure your system has the basics to build software.
On Debian derivatives (Ubuntu and many more),
installing ``build-essential`` should do the trick.
Everywhere else, just making sure you have the ``g++`` package should be
enough.
#. Install `miniforge <https://github.com/conda-forge/miniforge>`_.
#. ``export CONDA=/WHERE/YOU/INSTALLED/miniforge3``
If you accepted the default location, this should work:
``export CONDA=$HOME/miniforge3``
#. ``$CONDA/bin/conda create -n dev``
#. ``source $CONDA/bin/activate dev``
#. ``conda install git pip pocl islpy pyopencl`` (Linux)
or
``conda install osx-pocl-opencl git pip pocl islpy pyopencl`` (OS X)
#. Type the following command::
pip install git+https://github.com/inducer/loopy
Next time you want to use :mod:`loopy`, just run the following command::
source /WHERE/YOU/INSTALLED/miniforge3/bin/activate dev
You may also like to add this to a startup file (like :file:`$HOME/.bashrc`) or create an alias for it.
See the `PyOpenCL installation instructions
<https://documen.tician.de/pyopencl/misc.html#installation>`_ for options
regarding OpenCL drivers.
User-visible Changes
====================
Version 2014.1
See also :ref:`language-versioning`.
Version 2018.1
--------------
.. note::
This version is currently under development. You can get snapshots from
PyOpenCL's `git repository <https://github.com/inducer/loopy>`_
loopy's `git repository <https://github.com/inducer/loopy>`_
Version 2016.1.1
----------------
* Initial release.
* Add :func:`loopy.chunk_iname`.
* Add ``unused:l``, ``unused:g``, and ``like:INAME`` iname tag notation
* Release automatically built, self-contained Linux binary
* Many fixes and improvements
* Docs improvements
Version 2016.1
--------------
* Initial release.
.. _license:
......@@ -43,7 +114,7 @@ Licensing
Loopy is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009-13 Andreas Klöckner and Contributors.
Copyright (c) 2009-17 Andreas Klöckner and Contributors.
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
......@@ -69,8 +140,297 @@ OTHER DEALINGS IN THE SOFTWARE.
Frequently Asked Questions
==========================
The FAQ is maintained collaboratively on the
`Wiki FAQ page <http://wiki.tiker.net/Loopy/FrequentlyAskedQuestions>`_.
Is Loopy specific to OpenCL?
----------------------------
No, absolutely not. You can switch to a different code generation target
(subclasses of :class:`loopy.TargetBase`) by using (say)::
knl = knl.copy(target=loopy.CudaTarget())
Also see :ref:`targets`. (Py)OpenCL right now has the best support for
running kernels directly out of the box, but that could easily be expanded.
Open an issue to discuss what you need.
In the meantime, you can generate code simply by saying::
cg_result = loopy.generate_code_v2(knl)
print(cg_result.host_code())
print(cg_result.device_code())
Additionally, for C-based languages, header definitions are available via::
loopy.generate_header(knl)
For what types of codes does :mod:`loopy` work well?
----------------------------------------------------
Any array-based/number-crunching code whose control flow is not *too*
data dependent should be expressible. For example:
* Sparse matrix-vector multiplies, despite data-dependent control
flow (varying row lengths, say), is easy and natural to express.
* Looping until convergence on the other hand is an example
of something that can't be expressed easily. Such checks
would have to be performed outside of :mod:`loopy` code.
Can I see some examples?
------------------------
Loopy has a ton of tests, and right now, those are probably the best
source of examples. Here are some links:
* `Tests directory <https://github.com/inducer/loopy/tree/master/test>`_
* `Applications tests <https://github.com/inducer/loopy/blob/master/test/test_apps.py>`_
* `Feature tests <https://github.com/inducer/loopy/blob/master/test/test_loopy.py>`_
Here's a more complicated example of a loopy code:
.. literalinclude:: ../examples/python/find-centers.py
:language: python
This example is included in the :mod:`loopy` distribution as
:download:`examples/python/find-centers.py <../examples/python/find-centers.py>`.
What this does is find nearby "centers" satisfying some criteria
for an array of points ("targets").
Specifying dependencies for groups of instructions is cumbersome. Help?
-----------------------------------------------------------------------
You can now specify instruction ID prefixes and dependencies for groups
of instructions, like this::
with {id_prefix=init_m}
<> m[0] = ...
m[1] = ...
m[2] = ...
end
with {id_prefix=update_m,dep=init_m*}
m[0] = m[0] + ...
m[1] = m[1] + ...
m[2] = m[2] * ...
end
with {dep=update_m*}
output[i, j, 0] = 0.25*m[0]
output[i, j, 1] = 0.25*m[1]
output[i, j, 2] = 0.25*m[2]
end
.. versionadded:: 2016.2.1
(There was a bug in prior versions that kept this from working.)
What types of transformations can I do?
---------------------------------------
This list is always growing, but here are a few pointers:
* Unroll
Use :func:`loopy.tag_inames` with the ``"unr"`` tag.
Unrolled loops must have a fixed size. (See either
:func:`loopy.split_iname` or :func:`loopy.fix_parameters`.)
* Stride changes (Row/column/something major)
Use :func:`loopy.tag_array_axes` with (e.g.) ``stride:17`` or
``N1,N2,N0`` to determine how each axis of an array is realized.
* Prefetch
Use :func:`loopy.add_prefetch`.
* Reorder loops
Use :func:`loopy.prioritize_loops`.
* Precompute subexpressions:
Use a :ref:`substitution-rule` to assign a name to a subexpression,
using may be :func:`loopy.assignment_to_subst` or :func:`loopy.extract_subst`.
Then use :func:`loopy.precompute` to create an (array or scalar)
temporary with precomputed values.
* Tile:
Use :func:`loopy.split_iname` to produce enough loops, then use
:func:`loopy.prioritize_loops` to set the ordering.
* Fix constants
Use :func:`loopy.fix_parameters`.
* Parallelize (across cores)
Use :func:`loopy.tag_inames` with the ``"g.0"``, ``"g.1"`` (and so on) tags.
* Parallelize (across vector lanes)
Use :func:`loopy.tag_inames` with the ``"l.0"``, ``"l.1"`` (and so on) tags.
* Affinely map loop domains
Use :func:`loopy.affine_map_inames`.
* Texture-based data access
Use :func:`loopy.change_arg_to_image` to use texture memory
for an argument.
* Kernel Fusion
Use :func:`loopy.fuse_kernels`.
* Explicit-SIMD Vectorization
Use :func:`loopy.tag_inames` with the ``"vec"`` iname tag.
Note that the corresponding axis of an array must
also be tagged using the ``"vec"`` array axis tag
(using :func:`loopy.tag_array_axes`) in order for vector code to be
generated.
Vectorized loops (and array axes) must have a fixed size. (See either
:func:`loopy.split_iname` or :func:`loopy.fix_parameters` along with
:func:`loopy.split_array_axis`.)
* Reuse of Temporary Storage
Use :func:`loopy.alias_temporaries` to reduce the size of intermediate
storage.
* SoA $\leftrightarrow$ AoS
Use :func:`loopy.tag_array_axes` with the ``"sep"`` array axis tag
to generate separate arrays for each entry of a short, fixed-length
array axis.
Separated array axes must have a fixed size. (See either
:func:`loopy.split_array_axis`.)
* Realization of Instruction-level parallelism
Use :func:`loopy.tag_inames` with the ``"ilp"`` tag.
ILP loops must have a fixed size. (See either
:func:`loopy.split_iname` or :func:`loopy.fix_parameters`.)
* Type inference
Use :func:`loopy.add_and_infer_dtypes`.
* Convey assumptions:
Use :func:`loopy.assume` to say, e.g.
``loopy.assume(knl, "N mod 4 = 0")`` or
``loopy.assume(knl, "N > 0")``.
* Perform batch computations
Use :func:`loopy.to_batched`.
* Interface with your own library functions
See :ref:`func-interface` for details.
* Loop collapse
Use :func:`loopy.join_inames`.
In what sense does Loopy support vectorization?
-----------------------------------------------
There are really two ways in which the OpenCL/CUDA model of computation exposes
vectorization:
* "SIMT": The user writes scalar program instances and either the compiler or
the hardware joins the individual program instances into vectors of a
hardware-given length for execution.
* "Short vectors": This type of vectorization is based on vector types,
e.g. ``float4``, which support arithmetic with implicit vector semantics
as well as a number of 'intrinsic' functions.
Loopy supports both. The first one, SIMT, is accessible by tagging inames with,
e.g., ``l.0```. Accessing the second one requires using both execution- and
data-reshaping capabilities in loopy. To start with, you need an array that
has an axis with the length of the desired vector. If that's not yet available,
you may use :func:`loopy.split_array_axis` to produce one. Similarly, you need
an iname whose bounds match those of the desired vector length. Again, if you
don't already have one, :func:`loopy.split_iname` will easily produce one.
Lastly, both the array axis an the iname need the implementation tag ``"vec"``.
Here is an example of this machinery in action:
.. literalinclude:: ../examples/python/vector-types.py
:language: python
Note how the example slices off the last 'slab' of iterations to ensure that
the bulk of the iteration does not require conditionals which would prevent
successful vectorization. This generates the following code:
.. literalinclude:: ../examples/python/vector-types.cl
:language: c
What is the story with language versioning?
-------------------------------------------
The idea is to keep supporting multiple versions at a time. There's a
tension in loopy between the need to build code that keeps working
unchanged for some number of years, and needing the language to
evolve--not just as a research vehicle, but also to enable to respond
to emerging needs in applications and hardware.
The idea is not to support all versions indefinitely, merely to allow
users to upgrade on their own schedule on the scale of a couple years.
Warnings about needing to upgrade would get noisier as a version nears
deprecation. In a way, it is intended to be a version of Python's
`__future__` flags, which IMO have the served the language tremendously
well.
One can also obtain the current language version programmatically:
:data:`loopy.MOST_RECENT_LANGUAGE_VERSION`.
But pinning your code to that would mean choosing to not use the
potentially valuable guarantee to keep existing code working unchanged
for a while. Instead, it might be wiser to just grab the version of the
language current at the time of writing the code.
Uh-oh. I got a scheduling error. Any hints?
-------------------------------------------
* Make sure that dependencies between instructions are as
you intend.
Use :func:`loopy.show_dependency_graph` to check.
There's a heuristic that tries to help find dependencies. If there's
only a single write to a variable, then it adds dependencies from all
readers to the writer. In your case, that's actually counterproductive,
because it creates a circular dependency, hence the scheduling issue.
So you'll have to turn that off, like so::
knl = lp.make_kernel(
"{ [t]: 0 <= t < T}",
"""
<> xt = x[t] {id=fetch,dep=*}
x[t + 1] = xt * 0.1 {dep=fetch}
""")
* Make sure that your loops are correctly nested.
Print the kernel to make sure all instructions are within
the set of inames you intend them to be in.
* One iname is one for loop.
For sequential loops, one iname corresponds to exactly one
``for`` loop in generated code. Loopy will not generate multiple
loops from one iname.
* Make sure that your loops are correctly nested.
The scheduler will try to be as helpful as it can in telling
you where it got stuck.
Citing Loopy
============
......@@ -79,7 +439,7 @@ If you use loopy for your work and find its approach helpful, please
consider citing the following article.
A. Klöckner. `Loo.py: transformation-based code generation for GPUs and
CPUs <http://arxiv.org/abs/1405.7470>`_. Proceedings of ARRAY '14: ACM
CPUs <https://arxiv.org/abs/1405.7470>`_. Proceedings of ARRAY '14: ACM
SIGPLAN Workshop on Libraries, Languages, and Compilers for Array
Programming. Edinburgh, Scotland.
......@@ -96,5 +456,33 @@ Here's a Bibtex entry for your convenience::
doi = "{10.1145/2627373.2627387}",
}
Getting help
============
Email the friendly folks on the `loopy mailing list <https://lists.tiker.net/listinfo/loopy>`_.
Acknowledgments
===============
Work on loopy was supported in part by
- the Department of Energy, National Nuclear Security Administration, under Award Number DE-NA0003963,
- the US Navy ONR, under grant number N00014-14-1-0117, and
- the US National Science Foundation under grant numbers DMS-1418961, CCF-1524433, DMS-1654756, SHF-1911019, and OAC-1931577.
AK also gratefully acknowledges a hardware gift from Nvidia Corporation.
The views and opinions expressed herein do not necessarily reflect those of the funding agencies.
Cross-References to Other Documentation
=======================================
.. currentmodule:: numpy
.. class:: int16
See :class:`numpy.generic`.
.. class:: complex128
See :class:`numpy.generic`.
.. currentmodule:: loopy
.. _func-interface:
Function Interface
==================
Resolving and specialization
----------------------------
In :mod:`loopy`, a :class:`loopy.TranslationUnit` is a collection of callables
and entrypoints. Callables are of type
:class:`loopy.kernel.function_interface.InKernelCallable`. Functions start life
as simple :class:`pymbolic.primitives.Call` nodes. Call resolution turns the function
identifiers in those calls into :class:`~loopy.symbolic.ResolvedFunction` objects.
Each resolved function has an entry in :attr:`TranslationUnit.callables_table`.
The process of realizing a function as a
:class:`~loopy.kernel.function_interface.InKernelCallable` is referred to as
resolving.
During code generation for a :class:`~loopy.TranslationUnit`, a (resolved) callable
is *specialized* depending on the types and shapes of the arguments passed at a
call site. For example, a call to ``sin(x)`` in :mod:`loopy` is type-generic to
begin with, but it later specialized to either ``sinf``, ``sin`` or ``sinl``
depending on the type of its argument ``x``. A callable's behavior during type
or shape specialization is encoded via
:meth:`~loopy.InKernelCallable.with_types` and
:meth:`~loopy.InKernelCallable.with_descrs`.
Registering callables
---------------------
A user can *register* callables within a :class:`~loopy.TranslationUnit` to
allow loopy to resolve calls not pre-defined in :mod:`loopy`. In :mod:`loopy`,
we typically aim to expose all the standard math functions defined for
a :class:`~loopy.target.TargetBase`. Other foreign functions could be invoked by
*registering* them.
An example demonstrating registering a ``CBlasGemv`` as a loopy callable:
.. literalinclude:: ../examples/python/call-external.py
Call Instruction for a kernel call
----------------------------------
At a call-site involving a call to a :class:`loopy.LoopKernel`, the arguments to
the call must be ordered by the order of input arguments of the callee kernel.
Similarly, the assignees must be ordered by the order of callee kernel's output
arguments. Since a :class:`~loopy.kernel.data.KernelArgument` can be both an
input and an output, such arguments would be a part of the call instruction's
assignees as well as the call expression node's parameters.
Entry points
------------
Only callables in :attr:`loopy.TranslationUnit.entrypoints` can be called from
the outside. All other callables are only visible from within the translation
unit, similar to C's ``static`` functions.
Reference
---------
.. automodule:: loopy.kernel.function_interface
.. currentmodule:: loopy
.. _creating-kernels:
Reference: Creating Kernels
===========================
From Loop Domains and Instructions
----------------------------------
.. autofunction:: make_kernel
From Fortran
------------
.. autofunction:: parse_fortran
.. autofunction:: parse_transformed_fortran
.. autofunction:: c_preprocess
From Other Kernels
------------------
.. autofunction:: fuse_kernels
To Copy between Data Formats
----------------------------
.. autofunction:: make_copy_kernel
Einstein summation convention kernels
-------------------------------------
.. autofunction:: make_einsum
.. automodule:: loopy.version
.. vim: tw=75:spell:fdm=marker
Reference: Documentation for Internal API
=========================================
Targets
-------
See also :ref:`targets`.
.. automodule:: loopy.target.c
Symbolic
--------
See also :ref:`expression-syntax`.
.. automodule:: loopy.symbolic
Types
-----
DTypes of variables in a :class:`loopy.LoopKernel` must be picklable, so in
the codegen pipeline user-provided types are converted to
:class:`loopy.types.LoopyType`.
.. automodule:: loopy.types
Type inference
^^^^^^^^^^^^^^
.. automodule:: loopy.type_inference
Codegen
-------
.. automodule:: loopy.codegen
Reduction Operation
-------------------
.. automodule:: loopy.library.reduction
Iname Tags
----------
.. automodule:: loopy.kernel.data
Array
-----
.. automodule:: loopy.kernel.array
Checks
------
.. automodule:: loopy.check
Schedule
--------
.. automodule:: loopy.schedule
.. automodule:: loopy.schedule.tools
.. automodule:: loopy.schedule.tree
This diff is collapsed.
Reference: Other Functionality
==============================
Auxiliary Data Types
--------------------
.. automodule:: loopy.typing
Obtaining Kernel Performance Statistics
---------------------------------------
.. automodule:: loopy.statistics
Controlling caching
-------------------
.. envvar:: LOOPY_NO_CACHE
.. envvar:: CG_NO_CACHE
By default, loopy will cache (on disk) the result of various stages
of code generation to speed up future code generation of the same kernel.
By setting the environment variables :envvar:`LOOPY_NO_CACHE` or
:envvar:`CG_NO_CACHE` to any
string that :func:`pytools.strtobool` evaluates as ``True``, this caching
is suppressed.
.. envvar:: LOOPY_ABORT_ON_CACHE_MISS
If set to a string that :func:`pytools.strtobool` evaluates as ``True``,
loopy will raise an exception if a cache miss occurs. This can be useful
for debugging cache-related issues. For example, it can be used to automatically test whether caching is successful for a particular code, by setting this variable to ``True`` and re-running the code.
.. autofunction:: set_caching_enabled
.. autoclass:: CacheMode
Running Kernels
---------------
Use :class:`TranslationUnit.executor` to bind a translation unit
to execution resources, and then use :class:`ExecutorBase.__call__`
to invoke the kernel.
.. autoclass:: ExecutorBase
Automatic Testing
-----------------
.. autofunction:: auto_test_vs_ref
Troubleshooting
---------------
Printing :class:`LoopKernel` objects
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you're confused about things loopy is referring to in an error message or
about the current state of the :class:`LoopKernel` you are transforming, the
following always works::
print(kernel)
(And it yields a human-readable--albeit terse--representation of *kernel*.)
.. autofunction:: get_dot_dependency_graph
.. autofunction:: show_dependency_graph
.. autofunction:: t_unit_to_python
.. _reference-transform:
Reference: Transforming Kernels
===============================
Dealing with Parameters
-----------------------
.. automodule:: loopy.transform.parameter
Wrangling inames
----------------
.. automodule:: loopy.transform.iname
Dealing with Substitution Rules
-------------------------------
.. currentmodule:: loopy
.. autofunction:: extract_subst
.. autofunction:: assignment_to_subst
.. autofunction:: expand_subst
.. autofunction:: find_rules_matching
.. autofunction:: find_one_rule_matching
Caching, Precomputation and Prefetching
---------------------------------------
.. autofunction:: precompute
.. autofunction:: add_prefetch
.. autofunction:: buffer_array
.. autofunction:: alias_temporaries
Influencing data access
-----------------------
.. autofunction:: change_arg_to_image
.. autofunction:: tag_array_axes
.. autofunction:: remove_unused_arguments
.. autofunction:: set_array_axis_names
.. automodule:: loopy.transform.privatize
.. autofunction:: allocate_temporaries_for_base_storage
Padding Data
------------
.. autofunction:: split_array_axis
.. autofunction:: find_padding_multiple
.. autofunction:: add_padding
Manipulating Instructions
-------------------------
.. autofunction:: set_instruction_priority
.. autofunction:: add_dependency
.. autofunction:: remove_instructions
.. autofunction:: replace_instruction_ids
.. autofunction:: tag_instructions
.. autofunction:: add_nosync
.. autofunction:: add_barrier
Registering Library Routines
----------------------------
.. autofunction:: register_reduction_parser
.. autofunction:: register_preamble_generators
.. autofunction:: register_symbol_manglers
Modifying Arguments
-------------------
.. autofunction:: set_argument_order
.. autofunction:: add_dtypes
.. autofunction:: infer_unknown_types
.. autofunction:: add_and_infer_dtypes
.. autofunction:: rename_argument
.. autofunction:: set_temporary_address_space
Creating Batches of Operations
------------------------------
.. automodule:: loopy.transform.batch
Finishing up
------------
.. currentmodule:: loopy
.. autofunction:: preprocess_kernel
.. autofunction:: generate_loop_schedules
.. autofunction:: get_one_linearized_kernel
.. autofunction:: save_and_reload_temporaries
.. autoclass:: GeneratedProgram
.. autoclass:: CodeGenerationResult
.. autofunction:: generate_code_v2
.. autofunction:: generate_header
Setting options
---------------
.. autofunction:: set_options
.. _context-matching:
Matching contexts
-----------------
TODO: Matching instruction tags
.. automodule:: loopy.match
.. vim: tw=75:spell
.. currentmodule:: loopy
Translation Units
=================
.. automodule:: loopy.translation_unit
This diff is collapsed.
#! /bin/bash
cat > _build/html/.htaccess <<EOF
AuthUserFile /home/andreas/htpasswd
AuthGroupFile /dev/null
AuthName "Pre-Release Documentation"
AuthType Basic
require user iliketoast
EOF
rsync --progress --verbose --archive --delete _build/html/{.*,*} doc-upload:doc/loopy
rsync --verbose --archive --delete _build/html/{.*,*} doc-upload:doc/loopy