From 7530148ad28283332614b4aa1e241814bcc3c91c Mon Sep 17 00:00:00 2001
From: Tim Warburton <timwar@caam.rice.edu>
Date: Mon, 5 Nov 2012 15:01:09 -0600
Subject: [PATCH] Add SEM example.

---
 examples/sem_reagan.py | 237 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 examples/sem_reagan.py

diff --git a/examples/sem_reagan.py b/examples/sem_reagan.py
new file mode 100644
index 000000000..88dde57a6
--- /dev/null
+++ b/examples/sem_reagan.py
@@ -0,0 +1,237 @@
+from __future__ import division
+
+__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+
+
+import numpy as np
+import pyopencl as cl
+import loopy as lp
+
+from pyopencl.tools import pytest_generate_tests_for_pyopencl \
+        as pytest_generate_tests
+
+
+
+
+def test_tim2d(ctx_factory):
+    dtype = np.float32
+    ctx = ctx_factory()
+    order = "C"
+
+    n = 8
+
+    from pymbolic import var
+    K_sym = var("K")
+
+    field_shape = (K_sym, n, n)
+
+    # K - run-time symbolic
+    knl = lp.make_kernel(ctx.devices[0],
+            "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n,
+           [
+            "ur(a,b) := sum(@o, D[a,o]*u[e,o,b])",
+            "us(a,b) := sum(@o, D[b,o]*u[e,a,o])",
+
+            #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)",
+
+            "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)",
+            "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)",
+            "lap[e,i,j]  = "
+            "  sum(m, D[m,i]*Gux(m,j))"
+            "+ sum(m, D[m,j]*Guy(i,m))"
+
+            ],
+            [
+            lp.GlobalArg("u", dtype, shape=field_shape, order=order),
+            lp.GlobalArg("lap", dtype, shape=field_shape, order=order),
+            lp.GlobalArg("G", dtype, shape=(3,)+field_shape, order=order),
+            # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
+            lp.GlobalArg("D", dtype, shape=(n, n), order=order),
+            # lp.ImageArg("D", dtype, shape=(n, n)),
+            lp.ValueArg("K", np.int32, approximately=1000),
+            ],
+            name="semlap2D", assumptions="K>=1")
+
+    seq_knl = knl
+
+    def variant_orig(knl):
+        knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0"))
+
+        knl = lp.add_prefetch(knl, "D[:,:]")
+        knl = lp.add_prefetch(knl, "u[e, :, :]")
+
+        knl = lp.precompute(knl, "ur(m,j)", np.float32, ["m", "j"])
+        knl = lp.precompute(knl, "us(i,m)", np.float32, ["i", "m"])
+
+        knl = lp.precompute(knl, "Gux(m,j)", np.float32, ["m", "j"])
+        knl = lp.precompute(knl, "Guy(i,m)", np.float32, ["i", "m"])
+
+        knl = lp.add_prefetch(knl, "G$x[:,e,:,:]")
+        knl = lp.add_prefetch(knl, "G$y[:,e,:,:]")
+
+        knl = lp.tag_inames(knl, dict(o="unr"))
+        knl = lp.tag_inames(knl, dict(m="unr"))
+
+        knl = lp.set_instruction_priority(knl, "D_fetch", 5)
+        print knl
+
+        return knl
+
+    for variant in [variant_orig]:
+        kernel_gen = lp.generate_loop_schedules(variant(knl))
+        kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
+
+        K = 1000
+        lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
+                op_count=[K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9],
+                op_label=["GFlops"],
+                parameters={"K": K})
+
+
+
+
+def make_me_a_test_test_tim3d_slab(ctx_factory):
+    dtype = np.float32
+    ctx = ctx_factory()
+    order = "C"
+
+    n = 8
+
+    from pymbolic import var
+
+    # K - run-time symbolic
+    knl = lp.make_kernel(ctx.devices[0],
+            "[E] -> {[i,j,k, o, e]: 0<=i,j,k,o < n and 0<=e<E }",
+            """
+            <> ur[i,j,k] = sum(@o, D[i,o]*u[e,o,j,k])
+            <> us[i,j,k] = sum(@o, D[j,o]*u[e,i,o,k])
+            <> ut[i,j,k] = sum(@o, D[k,o]*u[e,i,j,o])
+            """,
+            [
+            lp.GlobalArg("u", dtype, shape="E,n,n,n", order=order),
+            # lp.GlobalArg("G", dtype, shape=(3,)+field_shape, order=order),
+            # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
+            lp.GlobalArg("D", dtype, shape=(n, n), order=order),
+            # lp.ImageArg("D", dtype, shape=(n, n)),
+            lp.ValueArg("E", np.int32, approximately=1000),
+            ],
+            name="semdiff3D", assumptions="E>=1",
+	    defines={"n": n})
+
+    seq_knl = knl
+
+    def variant_orig(knl):
+	return knl
+
+    for variant in [variant_orig]:
+        kernel_gen = lp.generate_loop_schedules(variant(knl))
+        kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000))
+
+        E = 1000
+        lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
+                op_count=[E*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9],
+                op_label=["GFlops"],
+                parameters={"E": E})
+
+
+
+
+def test_tim3d_slab(ctx_factory):
+    dtype = np.float32
+    ctx = ctx_factory()
+    order = "C"
+
+    Nq = 8
+
+    from pymbolic import var
+
+    # K - run-time symbolic
+    knl = lp.make_kernel(ctx.devices[0],
+            "[E] -> {[i,j, k, o,m, e]: 0<=i,j,k,o,m < Nq and 0<=e<E }",
+            """
+            ur(a,b,c) := sum(o, D[a,o]*u[e,o,b,c])
+            us(a,b,c) := sum(o, D[b,o]*u[e,a,o,c])
+            ut(a,b,c) := sum(o, D[c,o]*u[e,a,b,o])
+
+            Gur(a,b,c) := G[0,e,a,b,c]*ur(a,b,c)+G[1,e,a,b,c]*us(a,b,c)+G[2,e,a,b,c]*ut(a,b,c)
+            Gus(a,b,c) := G[1,e,a,b,c]*ur(a,b,c)+G[3,e,a,b,c]*us(a,b,c)+G[4,e,a,b,c]*ut(a,b,c)
+            Gut(a,b,c) := G[2,e,a,b,c]*ur(a,b,c)+G[4,e,a,b,c]*us(a,b,c)+G[5,e,a,b,c]*ut(a,b,c)
+
+            lapr(a,b,c):= sum(m, D[m,a]*Gur(m,b,c)) 
+            laps(a,b,c):= sum(m, D[m,b]*Gus(a,m,c)) 
+            lapt(a,b,c):= sum(m, D[m,c]*Gut(a,b,m)) 
+
+            lap[e,i,j,k] = lapr(i,j,k) + laps(i,j,k) + lapt(i,j,k)
+            """,
+            [
+            lp.GlobalArg("u,lap", dtype, shape="E,Nq,Nq,Nq", order=order),
+            lp.GlobalArg("G", dtype, shape="6,E,Nq,Nq,Nq", order=order),
+            # lp.ConstantArrayArg("D", dtype, shape="Nq,Nq", order=order),
+            lp.GlobalArg("D", dtype, shape="Nq, Nq", order=order),
+            # lp.ImageArg("D", dtype, shape="Nq, Nq"),
+            lp.ValueArg("E", np.int32, approximately=1000),
+            ],
+            name="semdiff3D", assumptions="E>=1",
+	    defines={"Nq": Nq})
+
+    for derivative in "rst":
+        knl = lp.duplicate_inames(knl, "o", within="... < lap"+derivative, suffix="_"+derivative)
+
+    seq_knl = knl
+
+    def variant_orig(knl):
+        return knl
+	knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), )
+
+        for derivative, par_names in [
+            ("r", ["j", "k"]),
+            ("s", ["i", "k"]),
+            ("t", ["i", "j"])
+            ]:
+            knl = lp.precompute(knl, "ur", ["ir", "jr"], within="lapr")
+
+        print knl
+
+	return knl
+    #print lp.preprocess_kernel(knl)
+    #1/0
+
+    for variant in [variant_orig]:
+        kernel_gen = lp.generate_loop_schedules(variant(knl))
+        kernel_gen = lp.check_kernels(kernel_gen, dict(E=1000))
+
+        E = 1000
+        lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen,
+                op_count=[-666],
+                op_label=["GFlops"],
+                parameters={"E": E})
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from py.test.cmdline import main
+        main([__file__])
-- 
GitLab