Skip to content
Snippets Groups Projects
Commit 3f728ca4 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Minor fixes and doc improvements. Add hello-world example.

parent aadcf34c
No related branches found
No related tags found
No related merge requests found
Welcome to loopy's documentation!
=================================
Contents:
.. note::
Loo.py hasn't been released yet. What's documented here generally
exists as code and has survived some light testing. So if you try
it and it works for you, great. If not, please do make sure to shoot
me a message.
loopy is a code generator for array-based code in the OpenCL/CUDA execution
model. Here's a very simple example of how to double the entries of a vector
using loopy:
.. literalinclude:: ../examples/hello-loopy.py
The following kernel is generated, compiled, and executed behind your back (and
also printed at the end):
.. literalinclude:: ../examples/hello-loopy.cl
:language: c
This file is included in the :mod:`loopy` distribution as
:file:`examples/hello-loopy.py`.
.. toctree::
:maxdepth: 2
guide
reference
misc
Indices and tables
==================
......
Installation
============
Installation should require no more than the usual::
python setup.py install
User-visible Changes
====================
Version 2013.1
--------------
.. note::
This version is currently under development. You can get snapshots from
PyOpenCL's `git repository <https://github.com/inducer/loopy>`_
* Initial release.
.. _license:
Licensing
=========
Loopy is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009-13 Andreas Klöckner and Contributors.
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
Frequently Asked Questions
==========================
The FAQ is maintained collaboratively on the
`Wiki FAQ page <http://wiki.tiker.net/Loopy/FrequentlyAskedQuestions>`_.
Citing Loopy
============
If you use loopy for your work and find its approach helpful, please
consider citing the following article.
TBD, Fixme.
We are not asking you to gratuitously cite PyOpenCL in work that is otherwise
unrelated to software. That said, if you do discuss some of the development
aspects of your code and would like to highlight a few of the ideas behind
PyOpenCL, feel free to cite this article:
Andreas Klöckner, TBD
Here's a Bibtex entry for your convenience::
@article{kloeckner_pycuda_2012,
author = {{Kl{\"o}ckner}, Andreas
and {Warburton}, Timothy
title = "{TBD}",
journal = "TBD",
volume = "TBD",
number = "TBD",
pages = "TBD",
year = "TBD",
doi = "TBD",
}
......@@ -220,14 +220,6 @@ Automatic Testing
Troubleshooting
---------------
Special-purpose functionality
-----------------------------
Manipulating Reductions
~~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: realize_reduction
Printing :class:`LoopKernel` objects
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
......
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
__kernel void __attribute__ ((reqd_work_group_size(128, 1, 1)))
loopy_kernel(__global float *restrict out, __global float const *restrict a, int const n)
{
if ((-1 + -128 * gid(0) + -1 * lid(0) + n) >= 0)
out[lid(0) + gid(0) * 128] = 2.0f * a[lid(0) + gid(0) * 128];
}
import numpy as np
import loopy as lp
import pyopencl as cl
import pyopencl.array
# -----------------------------------------------------------------------------
# setup
# -----------------------------------------------------------------------------
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 15 * 10**6
a = cl.array.arange(queue, n, dtype=np.float32)
# -----------------------------------------------------------------------------
# generation (loopy bits start here)
# -----------------------------------------------------------------------------
knl = lp.make_kernel(ctx.devices[0],
"{[i]: 0<=i<n}", # "loop domain"-- what values does i take?
"out[i] = 2*a[i]", # "instructions" to be executed across the domain
[ # argument declarations
lp.GlobalArg("out", np.float32, shape=("n",)),
lp.GlobalArg("a", np.float32, shape=("n",)),
lp.ValueArg("n", np.int32),
])
# -----------------------------------------------------------------------------
# transformation
# -----------------------------------------------------------------------------
knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
# -----------------------------------------------------------------------------
# execution
# -----------------------------------------------------------------------------
cknl = lp.CompiledKernel(ctx, knl)
evt, (out,) = cknl(queue, a=a, n=n)
cknl.print_code()
......@@ -427,28 +427,27 @@ def get_problems(kernel, parameters):
glens, llens = kernel.get_grid_sizes_as_exprs()
if (max(len(glens), len(llens))
> kernel.device.max_work_item_dimensions):
msg(5, "too many work item dimensions")
from pymbolic import evaluate
from pymbolic.mapper.evaluator import UnknownVariableError
try:
glens = evaluate(glens, parameters)
llens = evaluate(llens, parameters)
except UnknownVariableError, name:
raise RuntimeError("When checking your kernel for problems, "
"a value for parameter '%s' was not available. Pass "
"it in the 'parameters' kwarg to check_kernels()."
msg(1, "could not check axis bounds because no value "
"for variable '%s' was passed to check_kernels()"
% name)
if (max(len(glens), len(llens))
> kernel.device.max_work_item_dimensions):
msg(5, "too many work item dimensions")
for i in range(len(llens)):
if llens[i] > kernel.device.max_work_item_sizes[i]:
msg(5, "group axis %d too big" % i)
from pytools import product
if product(llens) > kernel.device.max_work_group_size:
msg(5, "work group too big")
else:
for i in range(len(llens)):
if llens[i] > kernel.device.max_work_item_sizes[i]:
msg(5, "group axis %d too big" % i)
from pytools import product
if product(llens) > kernel.device.max_work_group_size:
msg(5, "work group too big")
import pyopencl as cl
from pyopencl.characterize import usable_local_mem_size
......
......@@ -32,6 +32,8 @@ import numpy as np
from pytools import Record
AUTO_TEST_SKIP_RUN = False
......@@ -646,7 +648,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
print "using %s for the reference calculation" % dev
ref_evt, _ = ref_compiled(ref_queue, **ref_args)
if not AUTO_TEST_SKIP_RUN:
ref_evt, _ = ref_compiled(ref_queue, **ref_args)
else:
ref_evt = cl.enqueue_marker(ref_queue)
ref_queue.finish()
ref_stop = time()
......@@ -693,9 +698,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
print 75*"-"
for i in range(warmup_rounds):
evt, _ = compiled(queue, **args)
if not AUTO_TEST_SKIP_RUN:
compiled(queue, **args)
if need_check:
if need_check and not AUTO_TEST_SKIP_RUN:
for arg_desc in arg_descriptors:
if arg_desc is None:
continue
......@@ -731,8 +737,11 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
evt_start = cl.enqueue_marker(queue)
for i in range(timing_rounds):
evt, _ = compiled(queue, **args)
events.append(evt)
if not AUTO_TEST_SKIP_RUN:
evt, _ = compiled(queue, **args)
events.append(evt)
else:
events.append(cl.enqueue_marker(queue))
evt_end = cl.enqueue_marker(queue)
......
......@@ -1017,8 +1017,8 @@ class LoopKernel(Record):
processed_args = []
for arg in args:
for name in arg.name.split(","):
new_arg = arg.copy(name=name)
for arg_name in arg.name.split(","):
new_arg = arg.copy(name=arg_name)
if isinstance(arg, _ShapedArg):
if arg.shape is not None:
new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment