From 3f728ca40711fcc8bc9802a8eb49f47caaf539f3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 24 Apr 2013 13:14:21 -0400 Subject: [PATCH] Minor fixes and doc improvements. Add hello-world example. --- doc/index.rst | 22 ++++++++++- doc/misc.rst | 86 +++++++++++++++++++++++++++++++++++++++++ doc/reference.rst | 8 ---- examples/hello-loopy.cl | 10 +++++ examples/hello-loopy.py | 38 ++++++++++++++++++ loopy/check.py | 29 +++++++------- loopy/compiled.py | 19 ++++++--- loopy/kernel.py | 4 +- 8 files changed, 185 insertions(+), 31 deletions(-) create mode 100644 doc/misc.rst create mode 100644 examples/hello-loopy.cl create mode 100644 examples/hello-loopy.py diff --git a/doc/index.rst b/doc/index.rst index 032b11e5e..1687f4c8e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,13 +1,33 @@ Welcome to loopy's documentation! ================================= -Contents: +.. note:: + Loo.py hasn't been released yet. What's documented here generally + exists as code and has survived some light testing. So if you try + it and it works for you, great. If not, please do make sure to shoot + me a message. + +loopy is a code generator for array-based code in the OpenCL/CUDA execution +model. Here's a very simple example of how to double the entries of a vector +using loopy: + +.. literalinclude:: ../examples/hello-loopy.py + +The following kernel is generated, compiled, and executed behind your back (and +also printed at the end): + +.. literalinclude:: ../examples/hello-loopy.cl + :language: c + +This file is included in the :mod:`loopy` distribution as +:file:`examples/hello-loopy.py`. .. toctree:: :maxdepth: 2 guide reference + misc Indices and tables ================== diff --git a/doc/misc.rst b/doc/misc.rst new file mode 100644 index 000000000..1b2ea07de --- /dev/null +++ b/doc/misc.rst @@ -0,0 +1,86 @@ +Installation +============ + +Installation should require no more than the usual:: + + python setup.py install + +User-visible Changes +==================== + +Version 2013.1 +-------------- +.. note:: + + This version is currently under development. You can get snapshots from + PyOpenCL's `git repository <https://github.com/inducer/loopy>`_ + +* Initial release. + +.. _license: + +Licensing +========= + +Loopy is licensed to you under the MIT/X Consortium license: + +Copyright (c) 2009-13 Andreas Klöckner and Contributors. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +Frequently Asked Questions +========================== + +The FAQ is maintained collaboratively on the +`Wiki FAQ page <http://wiki.tiker.net/Loopy/FrequentlyAskedQuestions>`_. + +Citing Loopy +============ + +If you use loopy for your work and find its approach helpful, please +consider citing the following article. + + TBD, Fixme. + +We are not asking you to gratuitously cite PyOpenCL in work that is otherwise +unrelated to software. That said, if you do discuss some of the development +aspects of your code and would like to highlight a few of the ideas behind +PyOpenCL, feel free to cite this article: + + Andreas Klöckner, TBD + +Here's a Bibtex entry for your convenience:: + + @article{kloeckner_pycuda_2012, + author = {{Kl{\"o}ckner}, Andreas + and {Warburton}, Timothy + title = "{TBD}", + journal = "TBD", + volume = "TBD", + number = "TBD", + pages = "TBD", + year = "TBD", + doi = "TBD", + } + + + diff --git a/doc/reference.rst b/doc/reference.rst index e3a59b97d..bf3ddc864 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -220,14 +220,6 @@ Automatic Testing Troubleshooting --------------- -Special-purpose functionality ------------------------------ - -Manipulating Reductions -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: realize_reduction - Printing :class:`LoopKernel` objects ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/examples/hello-loopy.cl b/examples/hello-loopy.cl new file mode 100644 index 000000000..bad7e0c00 --- /dev/null +++ b/examples/hello-loopy.cl @@ -0,0 +1,10 @@ +#define lid(N) ((int) get_local_id(N)) +#define gid(N) ((int) get_group_id(N)) + +__kernel void __attribute__ ((reqd_work_group_size(128, 1, 1))) + loopy_kernel(__global float *restrict out, __global float const *restrict a, int const n) +{ + + if ((-1 + -128 * gid(0) + -1 * lid(0) + n) >= 0) + out[lid(0) + gid(0) * 128] = 2.0f * a[lid(0) + gid(0) * 128]; +} diff --git a/examples/hello-loopy.py b/examples/hello-loopy.py new file mode 100644 index 000000000..3cb600d90 --- /dev/null +++ b/examples/hello-loopy.py @@ -0,0 +1,38 @@ +import numpy as np +import loopy as lp +import pyopencl as cl +import pyopencl.array + +# ----------------------------------------------------------------------------- +# setup +# ----------------------------------------------------------------------------- +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +n = 15 * 10**6 +a = cl.array.arange(queue, n, dtype=np.float32) + +# ----------------------------------------------------------------------------- +# generation (loopy bits start here) +# ----------------------------------------------------------------------------- +knl = lp.make_kernel(ctx.devices[0], + "{[i]: 0<=i<n}", # "loop domain"-- what values does i take? + "out[i] = 2*a[i]", # "instructions" to be executed across the domain + [ # argument declarations + lp.GlobalArg("out", np.float32, shape=("n",)), + lp.GlobalArg("a", np.float32, shape=("n",)), + lp.ValueArg("n", np.int32), + ]) + +# ----------------------------------------------------------------------------- +# transformation +# ----------------------------------------------------------------------------- +knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") + +# ----------------------------------------------------------------------------- +# execution +# ----------------------------------------------------------------------------- +cknl = lp.CompiledKernel(ctx, knl) +evt, (out,) = cknl(queue, a=a, n=n) + +cknl.print_code() diff --git a/loopy/check.py b/loopy/check.py index 3d340c482..c7ba6a76f 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -427,28 +427,27 @@ def get_problems(kernel, parameters): glens, llens = kernel.get_grid_sizes_as_exprs() + if (max(len(glens), len(llens)) + > kernel.device.max_work_item_dimensions): + msg(5, "too many work item dimensions") + from pymbolic import evaluate from pymbolic.mapper.evaluator import UnknownVariableError try: glens = evaluate(glens, parameters) llens = evaluate(llens, parameters) except UnknownVariableError, name: - raise RuntimeError("When checking your kernel for problems, " - "a value for parameter '%s' was not available. Pass " - "it in the 'parameters' kwarg to check_kernels()." + msg(1, "could not check axis bounds because no value " + "for variable '%s' was passed to check_kernels()" % name) - - if (max(len(glens), len(llens)) - > kernel.device.max_work_item_dimensions): - msg(5, "too many work item dimensions") - - for i in range(len(llens)): - if llens[i] > kernel.device.max_work_item_sizes[i]: - msg(5, "group axis %d too big" % i) - - from pytools import product - if product(llens) > kernel.device.max_work_group_size: - msg(5, "work group too big") + else: + for i in range(len(llens)): + if llens[i] > kernel.device.max_work_item_sizes[i]: + msg(5, "group axis %d too big" % i) + + from pytools import product + if product(llens) > kernel.device.max_work_group_size: + msg(5, "work group too big") import pyopencl as cl from pyopencl.characterize import usable_local_mem_size diff --git a/loopy/compiled.py b/loopy/compiled.py index 053bc1d87..11a7b888e 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -32,6 +32,8 @@ import numpy as np from pytools import Record +AUTO_TEST_SKIP_RUN = False + @@ -646,7 +648,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print "using %s for the reference calculation" % dev - ref_evt, _ = ref_compiled(ref_queue, **ref_args) + if not AUTO_TEST_SKIP_RUN: + ref_evt, _ = ref_compiled(ref_queue, **ref_args) + else: + ref_evt = cl.enqueue_marker(ref_queue) ref_queue.finish() ref_stop = time() @@ -693,9 +698,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet print 75*"-" for i in range(warmup_rounds): - evt, _ = compiled(queue, **args) + if not AUTO_TEST_SKIP_RUN: + compiled(queue, **args) - if need_check: + if need_check and not AUTO_TEST_SKIP_RUN: for arg_desc in arg_descriptors: if arg_desc is None: continue @@ -731,8 +737,11 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet evt_start = cl.enqueue_marker(queue) for i in range(timing_rounds): - evt, _ = compiled(queue, **args) - events.append(evt) + if not AUTO_TEST_SKIP_RUN: + evt, _ = compiled(queue, **args) + events.append(evt) + else: + events.append(cl.enqueue_marker(queue)) evt_end = cl.enqueue_marker(queue) diff --git a/loopy/kernel.py b/loopy/kernel.py index a79237a9c..80aa93842 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -1017,8 +1017,8 @@ class LoopKernel(Record): processed_args = [] for arg in args: - for name in arg.name.split(","): - new_arg = arg.copy(name=name) + for arg_name in arg.name.split(","): + new_arg = arg.copy(name=arg_name) if isinstance(arg, _ShapedArg): if arg.shape is not None: new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines)) -- GitLab