Minor fixes and doc improvements. Add hello-world example.

3f728ca4 · Andreas Klöckner · aadcf34c · 3f728ca4 · 3f728ca4 · 3f728ca4
Commit 3f728ca4 authored 11 years ago by Andreas Klöckner
--- a/doc/index.rst
+++ b/doc/index.rst
 Welcome to loopy's documentation!
 =================================

-Contents:
+.. note::
+    Loo.py hasn't been released yet. What's documented here generally
+    exists as code and has survived some light testing. So if you try
+    it and it works for you, great. If not, please do make sure to shoot
+    me a message.
+
+loopy is a code generator for array-based code in the OpenCL/CUDA execution
+model. Here's a very simple example of how to double the entries of a vector
+using loopy:
+
+.. literalinclude:: ../examples/hello-loopy.py
+
+The following kernel is generated, compiled, and executed behind your back (and
+also printed at the end):
+
+.. literalinclude:: ../examples/hello-loopy.cl
+    :language: c
+
+This file is included in the :mod:`loopy` distribution as
+:file:`examples/hello-loopy.py`.

 .. toctree::
    :maxdepth: 2

    guide
    reference
+    misc

 Indices and tables
 ==================

--- a/doc/misc.rst
+++ b/doc/misc.rst
+Installation
+============
+
+Installation should require no more than the usual::
+
+    python setup.py install
+
+User-visible Changes
+====================
+
+Version 2013.1
+--------------
+.. note::
+
+    This version is currently under development. You can get snapshots from
+    PyOpenCL's `git repository <https://github.com/inducer/loopy>`_
+
+* Initial release.
+
+.. _license:
+
+Licensing
+=========
+
+Loopy is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009-13 Andreas Klöckner and Contributors.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+Frequently Asked Questions
+==========================
+
+The FAQ is maintained collaboratively on the
+`Wiki FAQ page <http://wiki.tiker.net/Loopy/FrequentlyAskedQuestions>`_.
+
+Citing Loopy
+============
+
+If you use loopy for your work and find its approach helpful, please
+consider citing the following article.
+
+    TBD, Fixme.
+
+We are not asking you to gratuitously cite PyOpenCL in work that is otherwise
+unrelated to software. That said, if you do discuss some of the development
+aspects of your code and would like to highlight a few of the ideas behind
+PyOpenCL, feel free to cite this article:
+
+    Andreas Klöckner, TBD
+
+Here's a Bibtex entry for your convenience::
+
+    @article{kloeckner_pycuda_2012,
+       author = {{Kl{\"o}ckner}, Andreas
+            and {Warburton}, Timothy
+       title = "{TBD}",
+       journal = "TBD",
+       volume = "TBD",
+       number = "TBD",
+       pages = "TBD",
+       year = "TBD",
+       doi = "TBD",
+    }
+
+
+
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -220,14 +220,6 @@ Automatic Testing
 Troubleshooting
 ---------------

-Special-purpose functionality
-----------------------------
-
-Manipulating Reductions
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: realize_reduction
-
 Printing :class:`LoopKernel` objects
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


--- a/examples/hello-loopy.cl
+++ b/examples/hello-loopy.cl
+#define lid(N) ((int) get_local_id(N))
+#define gid(N) ((int) get_group_id(N))
+
+__kernel void __attribute__ ((reqd_work_group_size(128, 1, 1)))
+  loopy_kernel(__global float *restrict out, __global float const *restrict a, int const n)
+{
+
+    if ((-1 + -128 * gid(0) + -1 * lid(0) + n) >= 0)
+          out[lid(0) + gid(0) * 128] = 2.0f * a[lid(0) + gid(0) * 128];
+}
--- a/examples/hello-loopy.py
+++ b/examples/hello-loopy.py
+import numpy as np
+import loopy as lp
+import pyopencl as cl
+import pyopencl.array
+
+# -----------------------------------------------------------------------------
+# setup
+# -----------------------------------------------------------------------------
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+n = 15 * 10**6
+a = cl.array.arange(queue, n, dtype=np.float32)
+
+# -----------------------------------------------------------------------------
+# generation (loopy bits start here)
+# -----------------------------------------------------------------------------
+knl = lp.make_kernel(ctx.devices[0],
+    "{[i]: 0<=i<n}",   # "loop domain"-- what values does i take?
+    "out[i] = 2*a[i]", # "instructions" to be executed across the domain
+    [ # argument declarations
+        lp.GlobalArg("out", np.float32, shape=("n",)),
+        lp.GlobalArg("a", np.float32, shape=("n",)),
+        lp.ValueArg("n", np.int32),
+        ])
+
+# -----------------------------------------------------------------------------
+# transformation
+# -----------------------------------------------------------------------------
+knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
+
+# -----------------------------------------------------------------------------
+# execution
+# -----------------------------------------------------------------------------
+cknl = lp.CompiledKernel(ctx, knl)
+evt, (out,) = cknl(queue, a=a, n=n)
+
+cknl.print_code()
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -427,28 +427,27 @@ def get_problems(kernel, parameters):

    glens, llens = kernel.get_grid_sizes_as_exprs()

+    if (max(len(glens), len(llens))
+            > kernel.device.max_work_item_dimensions):
+        msg(5, "too many work item dimensions")
+
    from pymbolic import evaluate
    from pymbolic.mapper.evaluator import UnknownVariableError
    try:
        glens = evaluate(glens, parameters)
        llens = evaluate(llens, parameters)
    except UnknownVariableError, name:
-        raise RuntimeError("When checking your kernel for problems, "
-                "a value for parameter '%s' was not available. Pass "
-                "it in the 'parameters' kwarg to check_kernels()."
+        msg(1, "could not check axis bounds because no value "
+                "for variable '%s' was passed to check_kernels()"
                % name)
-
-    if (max(len(glens), len(llens))
-            > kernel.device.max_work_item_dimensions):
-        msg(5, "too many work item dimensions")
-
-    for i in range(len(llens)):
-        if llens[i] > kernel.device.max_work_item_sizes[i]:
-            msg(5, "group axis %d too big" % i)
-
-    from pytools import product
-    if product(llens) > kernel.device.max_work_group_size:
-        msg(5, "work group too big")
+    else:
+        for i in range(len(llens)):
+            if llens[i] > kernel.device.max_work_item_sizes[i]:
+                msg(5, "group axis %d too big" % i)
+
+        from pytools import product
+        if product(llens) > kernel.device.max_work_group_size:
+            msg(5, "work group too big")

    import pyopencl as cl
    from pyopencl.characterize import usable_local_mem_size

--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -32,6 +32,8 @@ import numpy as np

 from pytools import Record

+AUTO_TEST_SKIP_RUN = False
+



@@ -646,7 +648,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet

        print "using %s for the reference calculation" % dev

-        ref_evt, _ = ref_compiled(ref_queue, **ref_args)
+        if not AUTO_TEST_SKIP_RUN:
+            ref_evt, _ = ref_compiled(ref_queue, **ref_args)
+        else:
+            ref_evt = cl.enqueue_marker(ref_queue)

        ref_queue.finish()
        ref_stop = time()
@@ -693,9 +698,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
            print 75*"-"

        for i in range(warmup_rounds):
-            evt, _ = compiled(queue, **args)
+            if not AUTO_TEST_SKIP_RUN:
+                compiled(queue, **args)

-            if need_check:
+            if need_check and not AUTO_TEST_SKIP_RUN:
                for arg_desc in arg_descriptors:
                    if arg_desc is None:
                        continue
@@ -731,8 +737,11 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
            evt_start = cl.enqueue_marker(queue)

            for i in range(timing_rounds):
-                evt, _ = compiled(queue, **args)
-                events.append(evt)
+                if not AUTO_TEST_SKIP_RUN:
+                    evt, _ = compiled(queue, **args)
+                    events.append(evt)
+                else:
+                    events.append(cl.enqueue_marker(queue))

            evt_end = cl.enqueue_marker(queue)


--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -1017,8 +1017,8 @@ class LoopKernel(Record):

        processed_args = []
        for arg in args:
-            for name in arg.name.split(","):
-                new_arg = arg.copy(name=name)
+            for arg_name in arg.name.split(","):
+                new_arg = arg.copy(name=arg_name)
                if isinstance(arg, _ShapedArg):
                    if arg.shape is not None:
                        new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines))