diff --git a/loopy/library/random123.py b/loopy/library/random123.py
new file mode 100644
index 0000000000000000000000000000000000000000..78933d9f42d5fc0169115beb9069e55b842d981a
--- /dev/null
+++ b/loopy/library/random123.py
@@ -0,0 +1,222 @@
+"""Library integration with Random123."""
+
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2016 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+from pytools import Record
+from mako.template import Template
+import numpy as np
+
+
+# {{{ rng metadata
+
+class RNGInfo(Record):
+    @property
+    def full_name(self):
+        return "%s%dx%d" % (self.name, self.width, self.bits)
+
+
+_philox_base_info = RNGInfo(
+            name="philox",
+            pyopencl_header="pyopencl-random123/philox.cl",
+            generic_header="Random123/philox.h",
+            key_width=2)
+
+_threefry_base_info = RNGInfo(
+            name="threefry",
+            pyopencl_header="pyopencl-random123/threefry.cl",
+            generic_header="Random123/threefry.h",
+            key_width=4)
+
+RNG_VARIANTS = [
+        _philox_base_info.copy(width=2, bits=32),
+        _philox_base_info.copy(width=2, bits=64),
+        _philox_base_info.copy(width=4, bits=32),
+        _philox_base_info.copy(width=4, bits=64),
+
+        _threefry_base_info.copy(width=2, bits=32),
+        _threefry_base_info.copy(width=2, bits=64),
+        _threefry_base_info.copy(width=4, bits=32),
+        _threefry_base_info.copy(width=4, bits=64),
+        ]
+
+FUNC_NAMES_TO_RNG = dict(
+        (v.full_name + suffix, v)
+        for v in RNG_VARIANTS
+        for suffix in [
+            "", "_f32", "_f64",
+            ])
+
+# }}}
+
+
+# {{{ preamble
+
+PREAMBLE_TEMPLATE = Template("""
+%if is_pyopencl_target:
+#include <${ rng_variant.pyopencl_header }>
+%else:
+#include <${ rng_variant.generic_header }>
+%endif
+
+<%
+name = rng_variant.full_name
+width = rng_variant.width
+if rng_variant.bits == 32:
+    counter_type = "uint%d" % width
+    key_type = "uint%d" % rng_variant.key_width
+elif rng_variant.bits == 64:
+    counter_type = "ulong%d" % width
+    key_type = "ulong%d" % rng_variant.key_width
+else:
+    assert False
+%>
+
+typedef union {
+    ${ counter_type } v;
+    ${ name }_ctr_t c;
+} ${ name }_ctr_vec_union;
+
+
+${ counter_type } ${ name }_bump(${ counter_type } ctr)
+{
+    if (++ctr.x == 0)
+        if (++ctr.y == 0)
+            ++ctr.z;
+    return ctr;
+}
+
+${ counter_type } ${ name }_gen(
+        ${ counter_type } ctr,
+        ${ key_type } key,
+        ${ counter_type } *new_ctr)
+{
+    ${ name }_ctr_vec_union result;
+    result.c = ${ name }(
+        *(${ name }_ctr_t *) &ctr,
+        *(${ name }_key_t *) &key);
+    *new_ctr = ${ name }_bump(ctr);
+    return result.v;
+}
+
+float${ width } ${ name }_f32(
+        ${ counter_type } ctr,
+        ${ key_type } key,
+        ${ counter_type } *new_ctr)
+{
+    *new_ctr = ctr;
+    return
+        convert_float${ width }(${ name }_gen(*new_ctr, key, new_ctr))
+        * ${ repr(1/2**32) }f;
+}
+
+double${ width } ${ name }_f64(
+        ${ counter_type } ctr,
+        ${ key_type } key,
+        ${ counter_type } *new_ctr)
+{
+    *new_ctr = ctr;
+    %if rng_variant.bits == 32:
+        return
+            convert_double${ width }(${ name }_gen(*new_ctr, key, new_ctr))
+            * ${ repr(1/2**32) }
+            +
+            convert_double${ width }(${ name }_gen(*new_ctr, key, new_ctr))
+            * ${ repr(1/2**64) };
+
+    %elif rng_variant.bits == 64:
+        *new_ctr = ctr;
+        return
+            convert_double${ width }(${ name }_gen(*new_ctr, key, new_ctr))
+            * ${ repr(1/2**64) };
+
+    %else:
+        #error Unrecognized bit width in RNG
+
+    %endif
+}
+
+""", strict_undefined=True)
+
+# }}}
+
+
+def random123_preamble_generator(preamble_info):
+    for f in preamble_info.seen_functions:
+        try:
+            rng_variant = FUNC_NAMES_TO_RNG[f.name]
+        except KeyError:
+            continue
+
+        from loopy.target.pyopencl import PyOpenCLTarget
+        yield ("90-random123-"+rng_variant.full_name,
+                PREAMBLE_TEMPLATE.render(
+                    is_pyopencl_target=isinstance(
+                        preamble_info.kernel.target,
+                        PyOpenCLTarget),
+                    rng_variant=rng_variant,
+                    ))
+
+
+def random123_function_mangler(kernel, name, arg_dtypes):
+    try:
+        rng_variant = FUNC_NAMES_TO_RNG[name]
+    except KeyError:
+        return None
+
+    from loopy.types import NumpyType
+    target = kernel.target
+    base_dtype = {32: np.uint32, 64: np.uint64}[rng_variant.bits]
+    ctr_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.width)
+    key_dtype = target.vector_dtype(NumpyType(base_dtype), rng_variant.key_width)
+
+    from loopy.kernel.data import CallMangleInfo
+    fn = rng_variant.full_name
+    if name == fn:
+        return CallMangleInfo(
+                target_name=fn+"_gen",
+                result_dtypes=(ctr_dtype, ctr_dtype),
+                arg_dtypes=(ctr_dtype, key_dtype))
+
+    elif name == fn + "_f32":
+        return CallMangleInfo(
+                target_name=name,
+                result_dtypes=(
+                    target.vector_dtype(NumpyType(np.float32), rng_variant.width),
+                    ctr_dtype),
+                arg_dtypes=(ctr_dtype, key_dtype))
+
+    elif name == fn + "_f64":
+        return CallMangleInfo(
+                target_name=name,
+                result_dtypes=(
+                    target.vector_dtype(NumpyType(np.float64), rng_variant.width),
+                    ctr_dtype),
+                arg_dtypes=(ctr_dtype, key_dtype))
+
+    else:
+        return None
+
+# vim: foldmethod=marker
diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py
index 6097cd7a235e6a1b41ed5a2ac75892314bbe4817..44b057f1a713a09bd2f493c8f3be67e790049e6e 100644
--- a/loopy/target/pyopencl.py
+++ b/loopy/target/pyopencl.py
@@ -276,14 +276,18 @@ class PyOpenCLTarget(OpenCLTarget):
     comparison_fields = ["device"]
 
     def function_manglers(self):
+        from loopy.library.random123 import random123_function_mangler
         return (
                 super(PyOpenCLTarget, self).function_manglers() + [
-                    pyopencl_function_mangler
+                    pyopencl_function_mangler,
+                    random123_function_mangler
                     ])
 
     def preamble_generators(self):
+        from loopy.library.random123 import random123_preamble_generator
         return ([
-            pyopencl_preamble_generator
+            pyopencl_preamble_generator,
+            random123_preamble_generator,
             ] + super(PyOpenCLTarget, self).preamble_generators())
 
     def preprocess(self, kernel):
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 7474b5128548b20950357dc5432ce674a82f5c8d..46273b52f61566a5d061dd078c955edcdbb2a66d 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2520,6 +2520,38 @@ def test_forced_iname_deps_and_reduction():
     print(k.stringify(with_dependencies=True))
 
 
+@pytest.mark.parametrize("tp", ["f32", "f64"])
+def test_random123(ctx_factory, tp):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    n = 150000
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            """
+            <> key2 = make_uint2(i, 324830944) {inames=i}
+            <> key4 = make_uint4(i, 324830944, 234181, 2233) {inames=i}
+            <> ctr = make_uint4(0, 1, 2, 3) {inames=i}
+            <> real, ctr = philox4x32_TYPE(ctr, key2)
+            <> imag, ctr = threefry4x32_TYPE(ctr, key4)
+
+            out[i, 0] = real.s0 + 1j * imag.s0
+            out[i, 1] = real.s1 + 1j * imag.s1
+            out[i, 2] = real.s2 + 1j * imag.s2
+            out[i, 3] = real.s3 + 1j * imag.s3
+            """.replace("TYPE", tp))
+
+    knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0")
+    knl = lp.set_options(knl, write_cl=True)
+
+    evt, (out,) = knl(queue, n=n)
+
+    out = out.get()
+    assert (out < 1).all()
+    assert (0 <= out).all()
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])