diff --git a/doc/index.rst b/doc/index.rst index 1687f4c8ee912c1e53879861dc548ca1333770b5..4443e82c7281b3742368b6678ab38a6487dd8218 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -12,15 +12,17 @@ model. Here's a very simple example of how to double the entries of a vector using loopy: .. literalinclude:: ../examples/hello-loopy.py + :end-before: ENDEXAMPLE -The following kernel is generated, compiled, and executed behind your back (and -also printed at the end): +This example is included in the :mod:`loopy` distribution as +:download:`examples/hello-loopy.py <../examples/hello-loopy.py>`. + +When you run this script, the following kernel is generated, compiled, and executed: .. literalinclude:: ../examples/hello-loopy.cl :language: c -This file is included in the :mod:`loopy` distribution as -:file:`examples/hello-loopy.py`. +(See the full example for how to print the generated code.) .. toctree:: :maxdepth: 2 diff --git a/examples/hello-loopy.py b/examples/hello-loopy.py index a835005ab53fe9c15323eff5b4624d3b5b023125..2c8ff4c3bbdfe331596eec1c0dcb49f6bd8b6e46 100644 --- a/examples/hello-loopy.py +++ b/examples/hello-loopy.py @@ -3,31 +3,28 @@ import loopy as lp import pyopencl as cl import pyopencl.array -# ----------------------------------------------------------------------------- # setup -# ----------------------------------------------------------------------------- +# ----- ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) n = 15 * 10**6 a = cl.array.arange(queue, n, dtype=np.float32) -# ----------------------------------------------------------------------------- -# generation (loopy bits start here) -# ----------------------------------------------------------------------------- +# create +# ------ knl = lp.make_kernel(ctx.devices[0], "{ [i]: 0<=i<n }", "out[i] = 2*a[i]") -# ----------------------------------------------------------------------------- -# transformation -# ----------------------------------------------------------------------------- +# transform +# --------- knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") -# ----------------------------------------------------------------------------- -# execution -# ----------------------------------------------------------------------------- -cknl = lp.CompiledKernel(ctx, knl) -evt, (out,) = cknl(queue, a=a, n=n) +# execute +# ------- +evt, (out,) = knl(queue, a=a, n=n) +# ENDEXAMPLE +cknl = lp.CompiledKernel(ctx, knl) print cknl.get_highlighted_code({"a": np.float32}) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2cd1bce37eccfb02468896aeb8deed397f31974a..f84feca880cac553ee9e6ad6471cd964de0bf48c 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -844,6 +844,19 @@ class LoopKernel(Record): # }}} + # {{{ direct execution + + @memoize_method + def get_compiled_kernel(self, ctx): + from loopy.compiled import CompiledKernel + return CompiledKernel(ctx, self) + + def __call__(self, queue, **kwargs): + return self.get_compiled_kernel(queue.context)( + queue, **kwargs) + + # }}} + # }}} # vim: foldmethod=marker