From 3f728ca40711fcc8bc9802a8eb49f47caaf539f3 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 24 Apr 2013 13:14:21 -0400
Subject: [PATCH] Minor fixes and doc improvements. Add hello-world example.

---
 doc/index.rst           | 22 ++++++++++-
 doc/misc.rst            | 86 +++++++++++++++++++++++++++++++++++++++++
 doc/reference.rst       |  8 ----
 examples/hello-loopy.cl | 10 +++++
 examples/hello-loopy.py | 38 ++++++++++++++++++
 loopy/check.py          | 29 +++++++-------
 loopy/compiled.py       | 19 ++++++---
 loopy/kernel.py         |  4 +-
 8 files changed, 185 insertions(+), 31 deletions(-)
 create mode 100644 doc/misc.rst
 create mode 100644 examples/hello-loopy.cl
 create mode 100644 examples/hello-loopy.py

diff --git a/doc/index.rst b/doc/index.rst
index 032b11e5e..1687f4c8e 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,13 +1,33 @@
 Welcome to loopy's documentation!
 =================================
 
-Contents:
+.. note::
+    Loo.py hasn't been released yet. What's documented here generally
+    exists as code and has survived some light testing. So if you try
+    it and it works for you, great. If not, please do make sure to shoot
+    me a message.
+
+loopy is a code generator for array-based code in the OpenCL/CUDA execution
+model. Here's a very simple example of how to double the entries of a vector
+using loopy:
+
+.. literalinclude:: ../examples/hello-loopy.py
+
+The following kernel is generated, compiled, and executed behind your back (and
+also printed at the end):
+
+.. literalinclude:: ../examples/hello-loopy.cl
+    :language: c
+
+This file is included in the :mod:`loopy` distribution as
+:file:`examples/hello-loopy.py`.
 
 .. toctree::
     :maxdepth: 2
 
     guide
     reference
+    misc
 
 Indices and tables
 ==================
diff --git a/doc/misc.rst b/doc/misc.rst
new file mode 100644
index 000000000..1b2ea07de
--- /dev/null
+++ b/doc/misc.rst
@@ -0,0 +1,86 @@
+Installation
+============
+
+Installation should require no more than the usual::
+
+    python setup.py install
+
+User-visible Changes
+====================
+
+Version 2013.1
+--------------
+.. note::
+
+    This version is currently under development. You can get snapshots from
+    PyOpenCL's `git repository <https://github.com/inducer/loopy>`_
+
+* Initial release.
+
+.. _license:
+
+Licensing
+=========
+
+Loopy is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009-13 Andreas Klöckner and Contributors.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+Frequently Asked Questions
+==========================
+
+The FAQ is maintained collaboratively on the
+`Wiki FAQ page <http://wiki.tiker.net/Loopy/FrequentlyAskedQuestions>`_.
+
+Citing Loopy
+============
+
+If you use loopy for your work and find its approach helpful, please
+consider citing the following article.
+
+    TBD, Fixme.
+
+We are not asking you to gratuitously cite PyOpenCL in work that is otherwise
+unrelated to software. That said, if you do discuss some of the development
+aspects of your code and would like to highlight a few of the ideas behind
+PyOpenCL, feel free to cite this article:
+
+    Andreas Klöckner, TBD
+
+Here's a Bibtex entry for your convenience::
+
+    @article{kloeckner_pycuda_2012,
+       author = {{Kl{\"o}ckner}, Andreas
+            and {Warburton}, Timothy
+       title = "{TBD}",
+       journal = "TBD",
+       volume = "TBD",
+       number = "TBD",
+       pages = "TBD",
+       year = "TBD",
+       doi = "TBD",
+    }
+
+
+
diff --git a/doc/reference.rst b/doc/reference.rst
index e3a59b97d..bf3ddc864 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -220,14 +220,6 @@ Automatic Testing
 Troubleshooting
 ---------------
 
-Special-purpose functionality
------------------------------
-
-Manipulating Reductions
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: realize_reduction
-
 Printing :class:`LoopKernel` objects
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/examples/hello-loopy.cl b/examples/hello-loopy.cl
new file mode 100644
index 000000000..bad7e0c00
--- /dev/null
+++ b/examples/hello-loopy.cl
@@ -0,0 +1,10 @@
+#define lid(N) ((int) get_local_id(N))
+#define gid(N) ((int) get_group_id(N))
+
+__kernel void __attribute__ ((reqd_work_group_size(128, 1, 1)))
+  loopy_kernel(__global float *restrict out, __global float const *restrict a, int const n)
+{
+
+    if ((-1 + -128 * gid(0) + -1 * lid(0) + n) >= 0)
+          out[lid(0) + gid(0) * 128] = 2.0f * a[lid(0) + gid(0) * 128];
+}
diff --git a/examples/hello-loopy.py b/examples/hello-loopy.py
new file mode 100644
index 000000000..3cb600d90
--- /dev/null
+++ b/examples/hello-loopy.py
@@ -0,0 +1,38 @@
+import numpy as np
+import loopy as lp
+import pyopencl as cl
+import pyopencl.array
+
+# -----------------------------------------------------------------------------
+# setup
+# -----------------------------------------------------------------------------
+ctx = cl.create_some_context()
+queue = cl.CommandQueue(ctx)
+
+n = 15 * 10**6
+a = cl.array.arange(queue, n, dtype=np.float32)
+
+# -----------------------------------------------------------------------------
+# generation (loopy bits start here)
+# -----------------------------------------------------------------------------
+knl = lp.make_kernel(ctx.devices[0],
+    "{[i]: 0<=i<n}",   # "loop domain"-- what values does i take?
+    "out[i] = 2*a[i]", # "instructions" to be executed across the domain
+    [ # argument declarations
+        lp.GlobalArg("out", np.float32, shape=("n",)),
+        lp.GlobalArg("a", np.float32, shape=("n",)),
+        lp.ValueArg("n", np.int32),
+        ])
+
+# -----------------------------------------------------------------------------
+# transformation
+# -----------------------------------------------------------------------------
+knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
+
+# -----------------------------------------------------------------------------
+# execution
+# -----------------------------------------------------------------------------
+cknl = lp.CompiledKernel(ctx, knl)
+evt, (out,) = cknl(queue, a=a, n=n)
+
+cknl.print_code()
diff --git a/loopy/check.py b/loopy/check.py
index 3d340c482..c7ba6a76f 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -427,28 +427,27 @@ def get_problems(kernel, parameters):
 
     glens, llens = kernel.get_grid_sizes_as_exprs()
 
+    if (max(len(glens), len(llens))
+            > kernel.device.max_work_item_dimensions):
+        msg(5, "too many work item dimensions")
+
     from pymbolic import evaluate
     from pymbolic.mapper.evaluator import UnknownVariableError
     try:
         glens = evaluate(glens, parameters)
         llens = evaluate(llens, parameters)
     except UnknownVariableError, name:
-        raise RuntimeError("When checking your kernel for problems, "
-                "a value for parameter '%s' was not available. Pass "
-                "it in the 'parameters' kwarg to check_kernels()."
+        msg(1, "could not check axis bounds because no value "
+                "for variable '%s' was passed to check_kernels()"
                 % name)
-
-    if (max(len(glens), len(llens))
-            > kernel.device.max_work_item_dimensions):
-        msg(5, "too many work item dimensions")
-
-    for i in range(len(llens)):
-        if llens[i] > kernel.device.max_work_item_sizes[i]:
-            msg(5, "group axis %d too big" % i)
-
-    from pytools import product
-    if product(llens) > kernel.device.max_work_group_size:
-        msg(5, "work group too big")
+    else:
+        for i in range(len(llens)):
+            if llens[i] > kernel.device.max_work_item_sizes[i]:
+                msg(5, "group axis %d too big" % i)
+
+        from pytools import product
+        if product(llens) > kernel.device.max_work_group_size:
+            msg(5, "work group too big")
 
     import pyopencl as cl
     from pyopencl.characterize import usable_local_mem_size
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 053bc1d87..11a7b888e 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -32,6 +32,8 @@ import numpy as np
 
 from pytools import Record
 
+AUTO_TEST_SKIP_RUN = False
+
 
 
 
@@ -646,7 +648,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
 
         print "using %s for the reference calculation" % dev
 
-        ref_evt, _ = ref_compiled(ref_queue, **ref_args)
+        if not AUTO_TEST_SKIP_RUN:
+            ref_evt, _ = ref_compiled(ref_queue, **ref_args)
+        else:
+            ref_evt = cl.enqueue_marker(ref_queue)
 
         ref_queue.finish()
         ref_stop = time()
@@ -693,9 +698,10 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
             print 75*"-"
 
         for i in range(warmup_rounds):
-            evt, _ = compiled(queue, **args)
+            if not AUTO_TEST_SKIP_RUN:
+                compiled(queue, **args)
 
-            if need_check:
+            if need_check and not AUTO_TEST_SKIP_RUN:
                 for arg_desc in arg_descriptors:
                     if arg_desc is None:
                         continue
@@ -731,8 +737,11 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
             evt_start = cl.enqueue_marker(queue)
 
             for i in range(timing_rounds):
-                evt, _ = compiled(queue, **args)
-                events.append(evt)
+                if not AUTO_TEST_SKIP_RUN:
+                    evt, _ = compiled(queue, **args)
+                    events.append(evt)
+                else:
+                    events.append(cl.enqueue_marker(queue))
 
             evt_end = cl.enqueue_marker(queue)
 
diff --git a/loopy/kernel.py b/loopy/kernel.py
index a79237a9c..80aa93842 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -1017,8 +1017,8 @@ class LoopKernel(Record):
 
         processed_args = []
         for arg in args:
-            for name in arg.name.split(","):
-                new_arg = arg.copy(name=name)
+            for arg_name in arg.name.split(","):
+                new_arg = arg.copy(name=arg_name)
                 if isinstance(arg, _ShapedArg):
                     if arg.shape is not None:
                         new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines))
-- 
GitLab