diff --git a/MANIFEST.in b/MANIFEST.in index d5ad1fe80566494564a29c06d898becf575cf16d..6a79d3fc5198f5dfa27720643e23c90c264db6d6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include test/*.py -include examples/*.py +recursive-include examples *.py *.cl *.floopy *.sh include doc/*.rst include doc/Makefile diff --git a/bin/loopy b/bin/loopy new file mode 100644 index 0000000000000000000000000000000000000000..830673165b64e84e22940d3d988af13ff3d3ef8b --- /dev/null +++ b/bin/loopy @@ -0,0 +1,199 @@ +#! /usr/bin/env python +from __future__ import print_function + +import sys + +import loopy as lp +import numpy as np + + +def to_python_literal(value): + try: + int(value) + except ValueError: + pass + else: + # It's an integer + return value + + try: + float(value) + except ValueError: + pass + else: + # It's a float + return repr(float(value)) + + return repr(value) + + +def defines_to_python_code(defines_str): + import re + DEFINE_RE = re.compile(r"^\#define\s+([a-zA-Z0-9_]+)\s+(.*)$") + result = [] + for l in defines_str.split("\n"): + if not l.strip(): + continue + + match = DEFINE_RE.match(l) + if match is None: + raise RuntimeError("#define not understood: '%s'" % l) + + result.append( + "%s = %s" % (match.group(1), to_python_literal(match.group(2)))) + + return "\n".join(result) + + +def main(): + from argparse import ArgumentParser + + parser = ArgumentParser(description="Stand-alone loopy frontend") + + parser.add_argument("infile") + parser.add_argument("outfile") + parser.add_argument("--lang", default="loopy") + parser.add_argument("--target") + parser.add_argument("--name") + parser.add_argument("--transform") + parser.add_argument("--occa-defines") + parser.add_argument("--occa-add-dummy-arg", action="store_true") + parser.add_argument("--print-ir", action="store_true") + args = parser.parse_args() + + # {{{ set up target + + if args.target is None: + raise ValueError("must specify target") + + import re + CL_TARGET_RE = re.compile(r"^cl:([0-9]+),([0-9]+)$") + + cl_target_re_match = CL_TARGET_RE.match(args.target) + if cl_target_re_match is not None: + platform_ordinal = int(cl_target_re_match.group(1)) + device_ordinal = int(cl_target_re_match.group(2)) + + import pyopencl as cl + plat = cl.get_platforms()[platform_ordinal] + target = plat.get_devices()[device_ordinal] + else: + raise ValueError("target '%s' not understood" + % args.target) + + # }}} + + if args.infile == "-": + infile_content = sys.stdin.read() + else: + with open(args.infile, "r") as infile_fd: + infile_content = infile_fd.read() + + # {{{ path wrangling + + from os.path import dirname, abspath + from os import getcwd + + infile_dirname = dirname(args.infile) + if infile_dirname: + infile_dirname = abspath(infile_dirname) + else: + infile_dirname = getcwd() + + import sys + sys.path.append(infile_dirname) + + # }}} + + if args.lang == "loopy": + data_dic = {} + data_dic["lp"] = lp + data_dic["np"] = np + data_dic["lp_target"] = target + + if args.occa_defines: + with open(args.occa_defines, "r") as defines_fd: + occa_define_code = defines_to_python_code(defines_fd.read()) + exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic) + + with open(args.infile, "r") as infile_fd: + exec(compile(infile_content, args.infile, "exec"), data_dic) + + if args.transform: + with open(args.transform, "r") as xform_fd: + exec(compile(xform_fd.read(), + args.transform, "exec"), data_dic) + + try: + kernel = data_dic["lp_knl"] + except KeyError: + raise RuntimeError("loopy-lang requires 'lp_knl' " + "to be defined on exit") + + if args.name is not None: + kernel = kernel.copy(name=args.name) + + kernels = [kernel] + + elif args.lang == "floopy": + pre_transform_code = None + if args.transform: + with open(args.transform, "r") as xform_fd: + pre_transform_code = xform_fd.read() + + if args.occa_defines: + if pre_transform_code is None: + pre_transform_code = "" + + with open(args.occa_defines, "r") as defines_fd: + pre_transform_code = ( + defines_to_python_code(defines_fd.read()) + + pre_transform_code) + + from floopy.fortran import f2loopy + kernels = f2loopy(target, infile_content, + pre_transform_code=pre_transform_code) + + if args.name is not None: + kernels = [kernel for kernel in kernels + if kernel.name == args.name] + + if not kernels: + raise RuntimeError("no kernels found (name specified: %s)" + % args.name) + + else: + raise RuntimeError("unknown language: '%s'" + % args.lang) + + if args.print_ir: + for kernel in kernels: + print(kernel, file=sys.stderr) + + if args.occa_add_dummy_arg: + new_kernels = [] + for kernel in kernels: + new_args = [ + lp.GlobalArg("occa_info", np.int32, shape=None) + ] + kernel.args + new_kernels.append(kernel.copy(args=new_args)) + + kernels = new_kernels + del new_kernels + + codes = [] + from loopy.codegen import generate_code + for kernel in kernels: + kernel = lp.preprocess_kernel(kernel) + code, impl_arg_info = generate_code(kernel) + codes.append(code) + + if args.outfile == "-": + sys.stdout.write("\n\n".join(codes)) + else: + with open(args.outfile, "w") as outfile_fd: + outfile_fd.write("\n\n".join(codes)) + + +if __name__ == "__main__": + main() diff --git a/doc/index.rst b/doc/index.rst index 64c1e28c8ef91864ac43454d4fd8b5bbe1d206df..73fd85f52fff0068d5a91e54c8ea37d4524185ee 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,15 +5,15 @@ loopy is a code generator for array-based code in the OpenCL/CUDA execution model. Here's a very simple example of how to double the entries of a vector using loopy: -.. literalinclude:: ../examples/hello-loopy.py +.. literalinclude:: ../examples/python/hello-loopy.py :end-before: ENDEXAMPLE This example is included in the :mod:`loopy` distribution as -:download:`examples/hello-loopy.py <../examples/hello-loopy.py>`. +:download:`examples/python/hello-loopy.py <../examples/python/hello-loopy.py>`. When you run this script, the following kernel is generated, compiled, and executed: -.. literalinclude:: ../examples/hello-loopy.cl +.. literalinclude:: ../examples/python/hello-loopy.cl :language: c (See the full example for how to print the generated code.) diff --git a/examples/dg.py b/examples/dg.py deleted file mode 100644 index 95d6874c4995045ae52cfc2a6418d6e9a014bdf3..0000000000000000000000000000000000000000 --- a/examples/dg.py +++ /dev/null @@ -1,104 +0,0 @@ -raise NotImplementedError("NOT UPDATED YET FOR NEW-STYLE LOOPY!") # FIXME - - - - -def main_volume_d_dx(): - Np = 128 - K = 22000 - from pymbolic import var - - D, g, u, p, i, j, k = [var(s) for s in "Dgupijk"] - - dim = 3 - - ker = make_loop_kernel([ - LoopDimension("k", K), - LoopDimension("j", Np), - LoopDimension("i", Np), - ], [ - (p[j+Np*k], - sum(g[dim*(j+Np*k)+i] * D[dim*(j+Np*i)+i] for i in range(3)) - * u[i+Np*k]) - ]) - - gen_kwargs = { - "min_threads": 128, - "min_blocks": 32, - } - - if 0: - u = curandom.rand((Np, K)) - p = curandom.rand((Np, K)) - g = curandom.rand((Np*3, K)) - D = curandom.rand((Np*3, Np)) - - def launcher(grid, kernel, texref_lookup): - u.bind_to_texref_ext(texref_lookup["u"]) - g.bind_to_texref_ext(texref_lookup["g"]) - D.bind_to_texref_ext(texref_lookup["D"]) - kernel.prepared_call(grid, p.gpudata) - - drive_timing_run( - generate_all_kernels(ker, **gen_kwargs), - launcher, Np*Np*K) - else: - show_kernel_codes(generate_all_kernels(ker, **gen_kwargs)) - - - - -def main_hex_volume_d_dx(): - d = 3 - N = 4 - Np1 = N+1 - Np = Np1**d - - Np_padded = 128 - K = 20000 - from pymbolic import var - - D, Du, u, g, i0, i1, i2, j, k = [var(s) for s in [ - "D", "Du", "u", "g", "i0", "i1", "i2", "j", "k"]] - - axis_indices = [i0,i1,i2] - - ker = make_loop_kernel([ - LoopDimension("k", K), - LoopDimension("j", Np1), - ] + [LoopDimension(ai.name, Np1) for ai in axis_indices], - [ - (Du[k*Np_padded + sum(axis_indices[i_dim]*Np1**i_dim for i_dim in range(d))], - D[j*Np1+i0] * u[k*Np_padded + sum( - (axis_indices[i_dim] if i_dim != d_out else j)*Np1**i_dim - for i_dim in range(d))] - ) - for d_out in [0] - ] - ) - - gen_kwargs = { - "min_threads": 64, - "min_blocks": 32, - } - - if True and HAVE_CUDA: - if HAVE_CUDA: - u = curandom.rand((Np_padded, K)) - #g = curandom.rand((Np*3, K)) - D = curandom.rand((Np1, Np1)) - Du = curandom.rand((Np_padded, K)) - - def launcher(grid, kernel, texref_lookup): - u.bind_to_texref_ext(texref_lookup["u"]) - #g.bind_to_texref_ext(texref_lookup["g"]) - D.bind_to_texref_ext(texref_lookup["D"]) - kernel.prepared_call(grid, Du.gpudata) - - drive_timing_run( - generate_all_kernels(ker, **gen_kwargs), - launcher, 2*(Np1**3)*K) - else: - show_kernel_codes(generate_all_kernels(ker, **gen_kwargs)) - - diff --git a/examples/fortran/foo.floopy b/examples/fortran/foo.floopy new file mode 100644 index 0000000000000000000000000000000000000000..7d2d3eef0570e448ad8e9e7113e2470e1d4ef64d --- /dev/null +++ b/examples/fortran/foo.floopy @@ -0,0 +1,24 @@ +subroutine fill(out, a, n) + implicit none + + real*8 a, out(n) + integer n + + do i = 1, n + out(i) = a + end do + do i = 1, n + out(i) = out(i) * 2 + end do +end + +!$loopy begin transform +! +! fill = lp.split_iname(fill, "i", 128, +! outer_tag="g.0", inner_tag="l.0") +! fill = lp.split_iname(fill, "i_1", 128, +! outer_tag="g.0", inner_tag="l.0") +! +!$loopy end transform + +! vim:filetype=floopy diff --git a/examples/fortran/outerprod.py b/examples/fortran/outerprod.py new file mode 100644 index 0000000000000000000000000000000000000000..4122c84376fcc676023b3a4a01b0728c70b18b61 --- /dev/null +++ b/examples/fortran/outerprod.py @@ -0,0 +1,8 @@ +lp_knl = lp.make_kernel( + "{[i,j]: 0<=i,j<n}", + "c[i,j] = a[i]*b[j]") + +lp_knl = lp.add_dtypes(lp_knl, {"a": np.float64, "b": np.float64}) +lp_knl = lp.split_iname(lp_knl, "i", 16, outer_tag="g.0", inner_tag="l.0") +lp_knl = lp.split_iname(lp_knl, "j", 16, outer_tag="g.1", inner_tag="l.1") + diff --git a/examples/fortran/run-floopy.sh b/examples/fortran/run-floopy.sh new file mode 100755 index 0000000000000000000000000000000000000000..a56c4bdfffa37b28061dfe3a3e9e5796ccb2ad5f --- /dev/null +++ b/examples/fortran/run-floopy.sh @@ -0,0 +1,6 @@ +#! /bin/sh + +NAME="$1" +shift + +python $(which floopy) --target=cl:0,0 --lang=floopy "$NAME" - "$@" diff --git a/examples/fortran/run-loopy.sh b/examples/fortran/run-loopy.sh new file mode 100755 index 0000000000000000000000000000000000000000..55f4a8756dc8eac7a37e552a4c1a5100af08a6fd --- /dev/null +++ b/examples/fortran/run-loopy.sh @@ -0,0 +1,3 @@ +#! /bin/sh + +floopy --target=cl:0,0 --lang=loopy "$1" - diff --git a/examples/fortran/sparse.floopy b/examples/fortran/sparse.floopy new file mode 100644 index 0000000000000000000000000000000000000000..924e0aa4abe51c4dd84b01cad0cb83b56122c97d --- /dev/null +++ b/examples/fortran/sparse.floopy @@ -0,0 +1,30 @@ +subroutine sparse(rowstarts, colindices, values, m, n, nvals, x, y) + implicit none + + integer rowstarts(m+1), colindices(nvals) + real*8 values(nvals) + real*8 x(n), y(n), rowsum + + integer m, n, rowstart, rowend, length, nvals + + do i = 1, m + rowstart = rowstarts(i) + rowend = rowstarts(i+1) + length = rowend - rowstart + + rowsum = 0 + do j = 1, length + rowsum = rowsum + & + x(colindices(rowstart+j-1))*values(rowstart+j-1) + end do + y(i) = rowsum + end do +end + +!$loopy begin transform +! sparse = lp.split_iname(sparse, "i", 128) +! sparse = lp.tag_inames(sparse, {"i_outer": "g.0"}) +! sparse = lp.tag_inames(sparse, {"i_inner": "l.0"}) +! sparse = lp.split_iname(sparse, "j", 4) +! sparse = lp.tag_inames(sparse, {"j_inner": "unr"}) +!$loopy end transform diff --git a/examples/fortran/tagging.floopy b/examples/fortran/tagging.floopy new file mode 100644 index 0000000000000000000000000000000000000000..f4b4e28eab3ddd544c791a088279718ef5221bb9 --- /dev/null +++ b/examples/fortran/tagging.floopy @@ -0,0 +1,24 @@ +subroutine fill(out, a, n) + implicit none + + real*8 a, out(n) + integer n + +!$loopy begin tagged: init + do i = 1, n + out(i) = a + end do +!$loopy end tagged: init + do i = 1, n + out(i) = out(i) * 2 + end do +end + +!$loopy begin transform +! +! fill = lp.split_iname(fill, "i", 128, +! outer_tag="g.0", inner_tag="l.0") +! fill = lp.split_iname(fill, "i_1", 128, +! outer_tag="g.0", inner_tag="l.0") +!$loopy end transform +! vim:filetype=floopy diff --git a/examples/fortran/volumeKernel.floopy b/examples/fortran/volumeKernel.floopy new file mode 100644 index 0000000000000000000000000000000000000000..953432d2253e90e396385cd53f4c0f2d81d70e5a --- /dev/null +++ b/examples/fortran/volumeKernel.floopy @@ -0,0 +1,79 @@ +subroutine volumeKernel(elements, Nfields, Ngeo, Ndim, Dop, geo, Q, rhsQ ) + + implicit none + + integer elements, Nfields, Ngeo, Ndim + + real*4 Dop(Nq,Nq) + real*4 Q(Nq,Nq,Nq,Nfields,elements) + real*4 geo(Nq,Nq,Nq,Ngeo,elements) + real*4 rhsQ(Nq,Nq,Nq,Nfields,elements) + + integer e,i,j,k,d,n,cnt + + real*4 u,v,w,p, dFdr, dFds, dFdt, divF + real*4 F(Nq,Nq,Nq,Ndim) + + + do e=1,elements + do k=1,Nq + do j=1,Nq + do i=1,Nq + + u = Q(i,j,k,1,e) + v = Q(i,j,k,2,e) + w = Q(i,j,k,3,e) + p = Q(i,j,k,4,e) + + F(i,j,k,1) = -u + F(i,j,k,2) = -v + F(i,j,k,3) = -w + + end do + end do + end do + + do k=1,Nq + do j=1,Nq + do i=1,Nq + divF = 0 + cnt = 1 + do d=1,Ndim + dFdr = 0 + dFds = 0 + dFdt = 0 + + do n=1,Nq + dFdr = dFdr + Dop(i,n)*F(n,j,k,d) + dFds = dFds + Dop(j,n)*F(i,n,k,d) + dFdt = dFdt + Dop(k,n)*F(i,j,n,d) + end do + + divF = divF & + + geo(i,j,k,cnt,e)*dFdr & + + geo(i,j,k,cnt+1,e)*dFds & + + geo(i,j,k,cnt+2,e)*dFdt + cnt = cnt + Ndim + end do + + rhsQ(i,j,k,1,e) = divF + + end do + end do + end do + end do + +end subroutine volumeKernel + +!$loopy begin transform +! +! volumeKernel = lp.split_iname(volumeKernel, +! "e", 32, outer_tag="g.1", inner_tag="g.0") +! volumeKernel = lp.fix_parameters(volumeKernel, +! Nq=5, Ndim=3) +! volumeKernel = lp.tag_inames(volumeKernel, dict( +! i="l.0", j="l.1", k="l.2", +! i_1="l.0", j_1="l.1", k_1="l.2" +! )) +! +!$loopy end transform diff --git a/examples/fortran/volumeKernelSimple.floopy b/examples/fortran/volumeKernelSimple.floopy new file mode 100644 index 0000000000000000000000000000000000000000..67948020d6fce06e858718fa9cca366d78b33295 --- /dev/null +++ b/examples/fortran/volumeKernelSimple.floopy @@ -0,0 +1,36 @@ +subroutine volumeKernel(elements, Nfields, Ngeo, Ndim, Dop, geo, Q, rhsQ ) + + implicit none + + integer elements, Nfields, Ngeo, Ndim + + real*4 Dop(Nq,Nq) + real*4 Q(Nq,Nq,Nq,Nfields,elements) + real*4 geo(Nq,Nq,Nq,Ngeo,elements) + real*4 rhsQ(Nq,Nq,Nq,Nfields,elements) + + integer e,i,j,k,d,n,cnt + + real*4 u,v,w,p, dFdr, dFds, dFdt, divF + real*4 F(Nq,Ndim) + + + do e=1,elements + do i=1,Nq + + F(i,1) = 5 + F(i,2) = 7 + + end do + + end do + +end subroutine volumeKernel + +!$loopy begin transform +! +! volumeKernel = lp.fix_parameters(volumeKernel, +! Nq=5, Ndim=3) +! volumeKernel = lp.tag_inames(volumeKernel, dict(i="l.0")) +! +!$loopy end transform diff --git a/examples/hello-loopy.cl b/examples/python/hello-loopy.cl similarity index 100% rename from examples/hello-loopy.cl rename to examples/python/hello-loopy.cl diff --git a/examples/hello-loopy.py b/examples/python/hello-loopy.py similarity index 100% rename from examples/hello-loopy.py rename to examples/python/hello-loopy.py diff --git a/examples/rank-one.py b/examples/python/rank-one.py similarity index 100% rename from examples/rank-one.py rename to examples/python/rank-one.py diff --git a/examples/sem_reagan.py b/examples/sem_reagan.py deleted file mode 100644 index 39a9ea80960916568b65a7bd2949505cf85fe64a..0000000000000000000000000000000000000000 --- a/examples/sem_reagan.py +++ /dev/null @@ -1,221 +0,0 @@ -from __future__ import division - -__copyright__ = "Copyright (C) 2012 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - - - -import numpy as np -import pyopencl as cl -import loopy as lp - -from pyopencl.tools import pytest_generate_tests_for_pyopencl \ - as pytest_generate_tests - - - - -def cannot_schedule_test_tim3d_slab(ctx_factory): - dtype = np.float32 - ctx = ctx_factory() - order = "C" - - Nq = 8 - - knl = lp.make_kernel(ctx.devices[0], - "[E] -> {[i,j, k, o,m, e]: 0<=i,j,k,o,m < Nq and 0<=e<E }", - """ - ur(a,b,c) := sum(o, D[a,o]*u[e,o,b,c]) - us(a,b,c) := sum(o, D[b,o]*u[e,a,o,c]) - ut(a,b,c) := sum(o, D[c,o]*u[e,a,b,o]) - - Gur(a,b,c) := G[0,e,a,b,c]*ur(a,b,c)+G[1,e,a,b,c]*us(a,b,c)+G[2,e,a,b,c]*ut(a,b,c) - Gus(a,b,c) := G[1,e,a,b,c]*ur(a,b,c)+G[3,e,a,b,c]*us(a,b,c)+G[4,e,a,b,c]*ut(a,b,c) - Gut(a,b,c) := G[2,e,a,b,c]*ur(a,b,c)+G[4,e,a,b,c]*us(a,b,c)+G[5,e,a,b,c]*ut(a,b,c) - - lapr(a,b,c):= sum(m, D[m,a]*Gur(m,b,c)) - laps(a,b,c):= sum(m, D[m,b]*Gus(a,m,c)) - lapt(a,b,c):= sum(m, D[m,c]*Gut(a,b,m)) - - lap[e,i,j,k] = lapr(i,j,k) + laps(i,j,k) + lapt(i,j,k) - """, - [ - lp.GlobalArg("u,lap", dtype, shape="E,Nq,Nq,Nq", order=order), - lp.GlobalArg("G", dtype, shape="6,E,Nq,Nq,Nq", order=order), - # lp.ConstantArrayArg("D", dtype, shape="Nq,Nq", order=order), - lp.GlobalArg("D", dtype, shape="Nq, Nq", order=order), - # lp.ImageArg("D", dtype, shape="Nq, Nq"), - lp.ValueArg("E", np.int32, approximately=1000), - ], - name="semdiff3D", assumptions="E>=1", - defines={"Nq": Nq}) - - - - def duplicate_os(knl): - for derivative in "rst": - knl = lp.duplicate_inames( - knl, "o", - within="... < lap"+derivative, suffix="_"+derivative) - return knl - - def variant_orig(knl): - # NOTE: Removing this makes the thing unschedulable - #knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), ) - - knl = lp.precompute(knl, "ur", ["i", "j"], within="... < lapr") - knl = lp.precompute(knl, "us", ["i", "j"], within="... < lapr") - knl = lp.precompute(knl, "ut", ["i", "j"], within="... < lapr") - - # prefetch the derivative matrix - knl = lp.add_prefetch(knl, "D[:,:]") - - knl = duplicate_os(knl) - - print(knl) - - return knl - - seq_knl = duplicate_os(knl) - - #print lp.preprocess_kernel(knl) - #1/0 - - for variant in [variant_orig]: - kernel_gen = lp.generate_loop_schedules(variant(knl), loop_priority=["e", "i", "j"]) - kernel_gen = lp.check_kernels(kernel_gen, dict(E=1000)) - - E = 1000 - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, - op_count=[-666], - op_label=["GFlops"], - parameters={"E": E}) - - - - -def test_tim3d_slab(ctx_factory): - dtype = np.float32 - ctx = ctx_factory() - order = "C" - - Nq = 8 - - knl = lp.make_kernel(ctx.devices[0], - "[E] -> {[i,j, k, o,m, e]: 0<=i,j,k,o,m < Nq and 0<=e<E }", - """ - ur(a,b,c) := sum(o, D[a,o]*u[e,o,b,c]) - us(a,b,c) := sum(o, D[b,o]*u[e,a,o,c]) - ut(a,b,c) := sum(o, D[c,o]*u[e,a,b,o]) - - Gur(a,b,c) := G[0,e,a,b,c]*ur(a,b,c)+G[1,e,a,b,c]*us(a,b,c)+G[2,e,a,b,c]*ut(a,b,c) - Gus(a,b,c) := G[1,e,a,b,c]*ur(a,b,c)+G[3,e,a,b,c]*us(a,b,c)+G[4,e,a,b,c]*ut(a,b,c) - #Gut(a,b,c) := G[2,e,a,b,c]*ur(a,b,c)+G[4,e,a,b,c]*us(a,b,c)+G[5,e,a,b,c]*ut(a,b,c) - - Gut(a,b,c) := G[5,e,a,b,c]*ut(a,b,c) - - lapr(a,b,c):= sum(m, D[m,a]*Gur(m,b,c)) - laps(a,b,c):= sum(m, D[m,b]*Gus(a,m,c)) - lapt(a,b,c):= sum(m, D[m,c]*Gut(a,b,m)) - - part_r := lapr(i,j,k) - part_s := laps(i,j,k) - part_t := lapt(i,j,k) - - lap[e,i,j,k] = part_t #part_r + part_s # - """, - [ - lp.GlobalArg("u,lap", dtype, shape="E,Nq,Nq,Nq", order=order), - lp.GlobalArg("G", dtype, shape="6,E,Nq,Nq,Nq", order=order), - # lp.ConstantArrayArg("D", dtype, shape="Nq,Nq", order=order), - lp.GlobalArg("D", dtype, shape="Nq, Nq", order=order), - # lp.ImageArg("D", dtype, shape="Nq, Nq"), - lp.ValueArg("E", np.int32, approximately=1000), - ], - name="semdiff3D", assumptions="E>=1", - defines={"Nq": Nq}) - - - - def duplicate_os(knl): - for derivative in "rst": - knl = lp.duplicate_inames( - knl, "o", - within="... < lap"+derivative, suffix="_"+derivative) - return knl - - def variant_orig(knl): - # NOTE: Removing this makes the thing unschedulable - knl = lp.tag_inames(knl, dict(e="g.0", i="l.0", j="l.1"), ) - - if 0: - for derivative in "rst": - for iname in "ij": - knl = lp.duplicate_inames( - knl, iname, within="part_%s" % derivative, suffix="_"+derivative) - - knl = lp.duplicate_inames(knl, "k", within="part_t", suffix="_t", tags=dict(k="ilp")) - knl = lp.duplicate_inames(knl, "o", within="... < Gut", suffix="_t") - - knl = lp.link_inames(knl, "i", tag="l.0", new_iname="p0") - knl = lp.link_inames(knl, "j", tag="l.1", new_iname="p1") - - knl = lp.precompute(knl, "Gut", ["p0", "p1"]) - #knl = lp.precompute(knl, "Gur", ["m", "p1"]) - #knl = lp.precompute(knl, "Gus", ["p0", "m"]) - - #knl = lp.precompute(knl, "us", ["i", "j"], within="... < lapr") - #knl = lp.precompute(knl, "ut", ["i", "j"], within="... < lapr") - - knl = lp.precompute(knl, "lapt", ["", "j"]) - - # prefetch the derivative matrix - knl = lp.add_prefetch(knl, "D[:,:]") - - print(knl) - - return knl - - #seq_knl = duplicate_os(knl) - seq_knl = knl - - #print lp.preprocess_kernel(knl) - #1/0 - - for variant in [variant_orig]: - kernel_gen = lp.generate_loop_schedules(variant(knl)) - kernel_gen = lp.check_kernels(kernel_gen, dict(E=1000)) - - E = 1000 - lp.auto_test_vs_ref(seq_knl, ctx, kernel_gen, - op_count=[-666], - op_label=["GFlops"], - parameters={"E": E}) - -if __name__ == "__main__": - import sys - if len(sys.argv) > 1: - exec(sys.argv[1]) - else: - from py.test.cmdline import main - main([__file__]) diff --git a/loopy/frontend/__init__.py b/loopy/frontend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..20a6d19e69949bd6d318fefe9d83794f401da67a --- /dev/null +++ b/loopy/frontend/fortran/__init__.py @@ -0,0 +1,38 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +def f2loopy(target, source, free_form=True, strict=True, + pre_transform_code=None): + from fparser import api + tree = api.parse(source, isfree=free_form, isstrict=strict, + analyze=False, ignore_comments=False) + + from floopy.fortran.translator import F2LoopyTranslator + f2loopy = F2LoopyTranslator() + f2loopy(tree) + + return f2loopy.make_kernels(pre_transform_code=pre_transform_code) + +# vim: foldmethod=marker diff --git a/loopy/frontend/fortran/diagnostic.py b/loopy/frontend/fortran/diagnostic.py new file mode 100644 index 0000000000000000000000000000000000000000..7cb3c79cc646f0959f69614e5141441e8fc3261b --- /dev/null +++ b/loopy/frontend/fortran/diagnostic.py @@ -0,0 +1,31 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2009 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +class TranslatorWarning(UserWarning): + pass + + +class TranslationError(RuntimeError): + pass diff --git a/loopy/frontend/fortran/expression.py b/loopy/frontend/fortran/expression.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a67193daa1596a38dde8f8ff53680297cb7d6c --- /dev/null +++ b/loopy/frontend/fortran/expression.py @@ -0,0 +1,216 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from pymbolic.parser import Parser as ExpressionParserBase +from floopy.fortran.diagnostic import TranslationError + +import pymbolic.primitives as prim +import numpy as np + +import pytools.lex +import re + + +_less_than = intern("less_than") +_greater_than = intern("greater_than") +_less_equal = intern("less_equal") +_greater_equal = intern("greater_equal") +_equal = intern("equal") +_not_equal = intern("not_equal") + +_not = intern("not") +_and = intern("and") +_or = intern("or") + + +class TypedLiteral(prim.Leaf): + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __getinitargs__(self): + return self.value, self.dtype + + mapper_method = intern("map_literal") + + +# {{{ expression parser + +class FortranExpressionParser(ExpressionParserBase): + # FIXME double/single prec literals + + lex_table = [ + (_less_than, pytools.lex.RE(r"\.lt\.", re.I)), + (_greater_than, pytools.lex.RE(r"\.gt\.", re.I)), + (_less_equal, pytools.lex.RE(r"\.le\.", re.I)), + (_greater_equal, pytools.lex.RE(r"\.ge\.", re.I)), + (_equal, pytools.lex.RE(r"\.eq\.", re.I)), + (_not_equal, pytools.lex.RE(r"\.ne\.", re.I)), + + (_not, pytools.lex.RE(r"\.not\.", re.I)), + (_and, pytools.lex.RE(r"\.and\.", re.I)), + (_or, pytools.lex.RE(r"\.or\.", re.I)), + ] + ExpressionParserBase.lex_table + + def __init__(self, tree_walker): + self.tree_walker = tree_walker + + _PREC_FUNC_ARGS = 1 + + def parse_terminal(self, pstate): + scope = self.tree_walker.scope_stack[-1] + + from pymbolic.primitives import Subscript, Call, Variable + from pymbolic.parser import ( + _identifier, _openpar, _closepar, _float) + + next_tag = pstate.next_tag() + if next_tag is _float: + value = pstate.next_str_and_advance().lower() + if "d" in value: + dtype = np.float64 + else: + dtype = np.float32 + + value = value.replace("d", "e") + if value.startswith("."): + prev_value = value + value = "0"+value + print value, prev_value + elif value.startswith("-."): + prev_value = value + value = "-0"+value[1:] + print value, prev_value + return TypedLiteral(value, dtype) + + elif next_tag is _identifier: + name = pstate.next_str_and_advance() + + if pstate.is_at_end() or pstate.next_tag() is not _openpar: + # not a subscript + scope.use_name(name) + + return Variable(name) + + left_exp = Variable(name) + + pstate.advance() + pstate.expect_not_end() + + if scope.is_known(name): + cls = Subscript + else: + cls = Call + + if pstate.next_tag is _closepar: + pstate.advance() + left_exp = cls(left_exp, ()) + else: + args = self.parse_expression(pstate, self._PREC_FUNC_ARGS) + if not isinstance(args, tuple): + args = (args,) + left_exp = cls(left_exp, args) + pstate.expect(_closepar) + pstate.advance() + + return left_exp + else: + return ExpressionParserBase.parse_terminal( + self, pstate) + + COMP_MAP = { + _less_than: "<", + _less_equal: "<=", + _greater_than: ">", + _greater_equal: ">=", + _equal: "==", + _not_equal: "!=", + } + + def parse_prefix(self, pstate, min_precedence=0): + from pymbolic.parser import _PREC_UNARY + import pymbolic.primitives as primitives + + pstate.expect_not_end() + + if pstate.is_next(_not): + pstate.advance() + return primitives.LogicalNot( + self.parse_expression(pstate, _PREC_UNARY)) + else: + return ExpressionParserBase.parse_prefix(self, pstate) + + def parse_postfix(self, pstate, min_precedence, left_exp): + from pymbolic.parser import ( + _PREC_CALL, _PREC_COMPARISON, _openpar, + _PREC_LOGICAL_OR, _PREC_LOGICAL_AND) + from pymbolic.primitives import ( + Comparison, LogicalAnd, LogicalOr) + + next_tag = pstate.next_tag() + if next_tag is _openpar and _PREC_CALL > min_precedence: + raise TranslationError("parenthesis operator only works on names") + + elif next_tag in self.COMP_MAP and _PREC_COMPARISON > min_precedence: + pstate.advance() + left_exp = Comparison( + left_exp, + self.COMP_MAP[next_tag], + self.parse_expression(pstate, _PREC_COMPARISON)) + did_something = True + elif next_tag is _and and _PREC_LOGICAL_AND > min_precedence: + pstate.advance() + left_exp = LogicalAnd((left_exp, + self.parse_expression(pstate, _PREC_LOGICAL_AND))) + did_something = True + elif next_tag is _or and _PREC_LOGICAL_OR > min_precedence: + pstate.advance() + left_exp = LogicalOr((left_exp, + self.parse_expression(pstate, _PREC_LOGICAL_OR))) + did_something = True + else: + left_exp, did_something = ExpressionParserBase.parse_postfix( + self, pstate, min_precedence, left_exp) + + if isinstance(left_exp, tuple) and min_precedence < self._PREC_FUNC_ARGS: + # this must be a complex literal + if len(left_exp) != 2: + raise TranslationError("complex literals must have " + "two entries") + + r, i = left_exp + + dtype = (r.dtype.type(0) + i.dtype.type(0)) + if dtype == np.float32: + dtype = np.complex64 + else: + dtype = np.complex128 + + left_exp = TypedLiteral(left_exp, dtype) + + return left_exp, did_something + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py new file mode 100644 index 0000000000000000000000000000000000000000..5a55cf921e9c440bca13b786e128466cf9df41d0 --- /dev/null +++ b/loopy/frontend/fortran/translator.py @@ -0,0 +1,658 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2013 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import re + +import loopy as lp +import numpy as np +from warnings import warn +from floopy.fortran.tree import FTreeWalkerBase +from floopy.fortran.diagnostic import ( + TranslationError, TranslatorWarning) +import islpy as isl +from islpy import dim_type +from loopy.symbolic import IdentityMapper + + +# {{{ subscript base shifter + +class SubscriptIndexBaseShifter(IdentityMapper): + def __init__(self, scope): + self.scope = scope + + def map_subscript(self, expr): + from pymbolic.primitives import Variable + assert isinstance(expr.aggregate, Variable) + + name = expr.aggregate.name + dims = self.scope.dim_map.get(name) + if dims is None: + return IdentityMapper.map_subscript(self, expr) + + subscript = expr.index + + if not isinstance(subscript, tuple): + subscript = (subscript,) + + subscript = list(subscript) + + if len(dims) != len(subscript): + raise TranslationError("inconsistent number of indices " + "to '%s'" % name) + + for i in xrange(len(dims)): + if len(dims[i]) == 2: + # has a base index + subscript[i] -= dims[i][0] + elif len(dims[i]) == 1: + # base index is 1 implicitly + subscript[i] -= 1 + + return expr.aggregate[self.rec(tuple(subscript))] + +# }}} + + +# {{{ scope + +class Scope(object): + def __init__(self, subprogram_name, arg_names=set()): + self.subprogram_name = subprogram_name + + # map name to data + self.data_statements = {} + + # map first letter to type + self.implicit_types = {} + + # map name to dim tuple + self.dim_map = {} + + # map name to type + self.type_map = {} + + # map name to data + self.data = {} + + self.arg_names = arg_names + + self.index_sets = [] + + # This dict has a key for every iname that is + # currently active. These keys map to the loopy-side + # expression for the iname, which may differ because + # of non-zero lower iteration bounds or because of + # duplicate inames need to be renamed for loopy. + self.active_iname_aliases = {} + + self.active_loopy_inames = set() + + self.instructions = [] + self.temporary_variables = [] + + self.used_names = set() + + self.previous_instruction_id = None + + def known_names(self): + return (self.used_names + | set(self.dim_map.iterkeys()) + | set(self.type_map.iterkeys())) + + def is_known(self, name): + return (name in self.used_names + or name in self.dim_map + or name in self.type_map + or name in self.arg_names) + + def all_inames(self): + result = set() + for iset in self.index_sets: + result.update(iset.get_var_dict(dim_type.set)) + + return frozenset(result) + + def use_name(self, name): + self.used_names.add(name) + + def get_type(self, name, none_ok=False): + try: + return self.type_map[name] + except KeyError: + if self.implicit_types is None: + if none_ok: + return None + + raise TranslationError( + "no type for '%s' found in 'implict none' routine" + % name) + + return self.implicit_types.get(name[0], np.dtype(np.int32)) + + def get_loopy_shape(self, name): + dims = self.dim_map.get(name, ()) + + shape = [] + for i, dim in enumerate(dims): + if len(dim) == 1: + shape.append(dim[0]) + elif len(dim) == 2: + shape.append(dim[1]-dim[0]+1) + else: + raise TranslationError("dimension axis %d " + "of '%s' not understood: %s" + % (i, name, dim)) + + return tuple(shape) + + def process_expression_for_loopy(self, expr): + from pymbolic.mapper.substitutor import make_subst_func + from loopy.symbolic import SubstitutionMapper + + submap = SubstitutionMapper( + make_subst_func(self.active_iname_aliases)) + + expr = submap(expr) + + subshift = SubscriptIndexBaseShifter(self) + expr = subshift(expr) + + return expr + +# }}} + + +def remove_common_indentation(lines): + while lines and lines[0].strip() == "": + lines.pop(0) + while lines and lines[-1].strip() == "": + lines.pop(-1) + + if lines: + base_indent = 0 + while lines[0][base_indent] in " \t": + base_indent += 1 + + for line in lines[1:]: + if line[:base_indent].strip(): + raise ValueError("inconsistent indentation") + + return "\n".join(line[base_indent:] for line in lines) + + +# {{{ translator + +class F2LoopyTranslator(FTreeWalkerBase): + def __init__(self): + FTreeWalkerBase.__init__(self) + + self.scope_stack = [] + self.isl_context = isl.Context() + + self.insn_id_counter = 0 + + self.kernels = [] + + # Flag to record whether 'loopy begin transform' comment + # has been seen. + self.in_transform_code = False + + self.instruction_tags = [] + + self.transform_code_lines = [] + + # {{{ map_XXX functions + + def map_BeginSource(self, node): + scope = Scope(None) + self.scope_stack.append(scope) + + for c in node.content: + self.rec(c) + + def map_Subroutine(self, node): + assert not node.prefix + assert not hasattr(node, "suffix") + + scope = Scope(node.name, list(node.args)) + self.scope_stack.append(scope) + + for c in node.content: + self.rec(c) + + self.scope_stack.pop() + + self.kernels.append(scope) + + def map_EndSubroutine(self, node): + return [] + + def map_Implicit(self, node): + scope = self.scope_stack[-1] + + if not node.items: + assert not scope.implicit_types + scope.implicit_types = None + + for stmt, specs in node.items: + if scope.implict_types is None: + raise TranslationError("implicit decl not allowed after " + "'implicit none'") + tp = self.dtype_from_stmt(stmt) + for start, end in specs: + for char_code in range(ord(start), ord(end)+1): + scope.implicit_types[chr(char_code)] = tp + + return [] + + # {{{ types, declarations + + def map_Equivalence(self, node): + raise NotImplementedError("equivalence") + + TYPE_MAP = { + ("real", "4"): np.float32, + ("real", "8"): np.float64, + ("real", "16"): np.float128, + + ("complex", "8"): np.complex64, + ("complex", "16"): np.complex128, + ("complex", "32"): np.complex256, + + ("integer", ""): np.int32, + ("integer", "4"): np.int32, + ("complex", "8"): np.int64, + } + + def dtype_from_stmt(self, stmt): + length, kind = stmt.selector + assert not kind + return np.dtype(self.TYPE_MAP[(type(stmt).__name__.lower(), length)]) + + def map_type_decl(self, node): + scope = self.scope_stack[-1] + + tp = self.dtype_from_stmt(node) + + for name, shape in self.parse_dimension_specs(node.entity_decls): + if shape is not None: + assert name not in scope.dim_map + scope.dim_map[name] = shape + scope.use_name(name) + + assert name not in scope.type_map + scope.type_map[name] = tp + + return [] + + map_Logical = map_type_decl + map_Integer = map_type_decl + map_Real = map_type_decl + map_Complex = map_type_decl + + def map_Dimension(self, node): + scope = self.scope_stack[-1] + + for name, shape in self.parse_dimension_specs(node.items): + if shape is not None: + assert name not in scope.dim_map + scope.dim_map[name] = shape + scope.use_name(name) + + return [] + + def map_External(self, node): + raise NotImplementedError("external") + + # }}} + + def map_Data(self, node): + scope = self.scope_stack[-1] + + for name, data in node.stmts: + name, = name + assert name not in scope.data + scope.data[name] = [self.parse_expr(i) for i in data] + + return [] + + def map_Parameter(self, node): + raise NotImplementedError("parameter") + + # {{{ I/O + + def map_Open(self, node): + raise NotImplementedError + + def map_Format(self, node): + warn("'format' unsupported", TranslatorWarning) + + def map_Write(self, node): + warn("'write' unsupported", TranslatorWarning) + + def map_Print(self, node): + warn("'print' unsupported", TranslatorWarning) + + def map_Read1(self, node): + warn("'read' unsupported", TranslatorWarning) + + # }}} + + def map_Assignment(self, node): + scope = self.scope_stack[-1] + + lhs = scope.process_expression_for_loopy( + self.parse_expr(node.variable)) + from pymbolic.primitives import Subscript, Call + if isinstance(lhs, Call): + raise TranslationError("function call (to '%s') on left hand side of" + "assignment--check for misspelled variable name" % lhs) + elif isinstance(lhs, Subscript): + lhs_name = lhs.aggregate.name + else: + lhs_name = lhs.name + + scope.use_name(lhs_name) + + from loopy.kernel.data import ExpressionInstruction + + rhs = scope.process_expression_for_loopy(self.parse_expr(node.expr)) + + new_id = "insn%d" % self.insn_id_counter + self.insn_id_counter += 1 + + if scope.previous_instruction_id: + insn_deps = frozenset([scope.previous_instruction_id]) + else: + insn_deps = frozenset() + + insn = ExpressionInstruction( + lhs, rhs, + forced_iname_deps=frozenset( + scope.active_loopy_inames), + insn_deps=insn_deps, + id=new_id, + tags=tuple(self.instruction_tags)) + + scope.previous_instruction_id = new_id + scope.instructions.append(insn) + + def map_Allocate(self, node): + raise NotImplementedError("allocate") + + def map_Deallocate(self, node): + raise NotImplementedError("deallocate") + + def map_Save(self, node): + raise NotImplementedError("save") + + def map_Line(self, node): + #from warnings import warn + #warn("Encountered a 'line': %s" % node) + raise NotImplementedError + + def map_Program(self, node): + raise NotImplementedError + + def map_Entry(self, node): + raise NotImplementedError + + # {{{ control flow + + def map_Goto(self, node): + raise NotImplementedError("goto") + + def map_Call(self, node): + raise NotImplementedError("call") + + def map_Return(self, node): + raise NotImplementedError("return") + + def map_ArithmeticIf(self, node): + raise NotImplementedError("arithmetic-if") + + def map_If(self, node): + raise NotImplementedError("if") + # node.expr + # node.content[0] + + def map_IfThen(self, node): + raise NotImplementedError("if-then") + + def map_EndIfThen(self, node): + return [] + + def map_Do(self, node): + scope = self.scope_stack[-1] + + if node.loopcontrol: + loop_var, loop_bounds = node.loopcontrol.split("=") + loop_var = loop_var.strip() + scope.use_name(loop_var) + loop_bounds = [self.parse_expr(s) for s in loop_bounds.split(",")] + + if len(loop_bounds) == 2: + start, stop = loop_bounds + step = 1 + elif len(loop_bounds) == 3: + start, stop, step = loop_bounds + else: + raise RuntimeError("loop bounds not understood: %s" + % node.loopcontrol) + + if step != 1: + raise NotImplementedError( + "do loops with non-unit stride") + + if not isinstance(step, int): + print type(step) + raise TranslationError( + "non-constant steps not supported: %s" % step) + + from loopy.symbolic import get_dependencies + loop_bound_deps = ( + get_dependencies(start) + | get_dependencies(stop) + | get_dependencies(step)) + + # {{{ find a usable loopy-side loop name + + loopy_loop_var = loop_var + loop_var_suffix = None + while True: + already_used = False + for iset in scope.index_sets: + if loopy_loop_var in iset.get_var_dict(dim_type.set): + already_used = True + break + + if not already_used: + break + + if loop_var_suffix is None: + loop_var_suffix = 0 + + loop_var_suffix += 1 + loopy_loop_var = loop_var + "_%d" % loop_var_suffix + + # }}} + + space = isl.Space.create_from_names(self.isl_context, + set=[loopy_loop_var], params=list(loop_bound_deps)) + + from loopy.isl_helpers import iname_rel_aff + from loopy.symbolic import aff_from_expr + index_set = ( + isl.BasicSet.universe(space) + .add_constraint( + isl.Constraint.inequality_from_aff( + iname_rel_aff(space, + loopy_loop_var, ">=", + aff_from_expr(space, 0)))) + .add_constraint( + isl.Constraint.inequality_from_aff( + iname_rel_aff(space, + loopy_loop_var, "<=", + aff_from_expr(space, stop-start))))) + + from pymbolic import var + scope.active_iname_aliases[loop_var] = \ + var(loopy_loop_var) + start + scope.active_loopy_inames.add(loopy_loop_var) + + scope.index_sets.append(index_set) + + for c in node.content: + self.rec(c) + + del scope.active_iname_aliases[loop_var] + scope.active_loopy_inames.remove(loopy_loop_var) + + else: + raise NotImplementedError("unbounded do loop") + + def map_EndDo(self, node): + pass + + def map_Continue(self, node): + raise NotImplementedError("continue") + + def map_Stop(self, node): + raise NotImplementedError("stop") + + begin_tag_re = re.compile(r"\$loopy begin tagged:\s*(.*?)\s*$") + end_tag_re = re.compile(r"\$loopy end tagged:\s*(.*?)\s*$") + + def map_Comment(self, node): + stripped_comment_line = node.content.strip() + + begin_tag_match = self.begin_tag_re.match(stripped_comment_line) + end_tag_match = self.end_tag_re.match(stripped_comment_line) + + if stripped_comment_line == "$loopy begin transform": + if self.in_transform_code: + raise TranslationError("can't enter transform code twice") + self.in_transform_code = True + + elif stripped_comment_line == "$loopy end transform": + if not self.in_transform_code: + raise TranslationError("can't leave transform code twice") + self.in_transform_code = False + + elif begin_tag_match: + tag = begin_tag_match.group(1) + if tag in self.instruction_tags: + raise TranslationError("nested begin tag for tag '%s'" % tag) + self.instruction_tags.append(tag) + + elif end_tag_match: + tag = end_tag_match.group(1) + if tag not in self.instruction_tags: + raise TranslationError( + "end tag without begin tag for tag '%s'" % tag) + self.instruction_tags.remove(tag) + + elif self.in_transform_code: + self.transform_code_lines.append(node.content) + + # }}} + + # }}} + + def make_kernels(self, pre_transform_code=None): + kernel_names = [ + sub.subprogram_name + for sub in self.kernels] + + proc_dict = {} + proc_dict["lp"] = lp + proc_dict["np"] = np + + for sub in self.kernels: + # {{{ figure out arguments + + kernel_data = [] + for arg_name in sub.arg_names: + dims = sub.dim_map.get(arg_name) + + if dims is not None: + # default order is set to "F" in kernel creation below + kernel_data.append( + lp.GlobalArg( + arg_name, + dtype=sub.get_type(arg_name), + shape=sub.get_loopy_shape(arg_name), + )) + else: + kernel_data.append( + lp.ValueArg(arg_name, + dtype=sub.get_type(arg_name))) + + # }}} + + # {{{ figure out temporary variables + + for var_name in ( + sub.known_names() + - set(sub.arg_names) + - sub.all_inames()): + dtype = sub.get_type(var_name, none_ok=True) + if sub.implicit_types is None and dtype is None: + continue + + kernel_data.append( + lp.TemporaryVariable( + var_name, dtype=dtype, + shape=sub.get_loopy_shape(var_name))) + + # }}} + + knl = lp.make_kernel( + sub.index_sets, + sub.instructions, + kernel_data, + name=sub.subprogram_name, + default_order="F" + ) + + proc_dict[sub.subprogram_name] = knl + + transform_code = remove_common_indentation( + self.transform_code_lines) + + if pre_transform_code is not None: + proc_dict["_MODULE_SOURCE_CODE"] = pre_transform_code + exec(compile(pre_transform_code, + "<loopy transforms>", "exec"), proc_dict) + + proc_dict["_MODULE_SOURCE_CODE"] = transform_code + exec(compile(transform_code, + "<loopy transforms>", "exec"), proc_dict) + + return [proc_dict[knl_name] + for knl_name in kernel_names] + +# }}} + +# vim: foldmethod=marker diff --git a/loopy/frontend/fortran/tree.py b/loopy/frontend/fortran/tree.py new file mode 100644 index 0000000000000000000000000000000000000000..102b9cc3ca853005f9848303cea944608f355094 --- /dev/null +++ b/loopy/frontend/fortran/tree.py @@ -0,0 +1,89 @@ +from __future__ import division, with_statement + +__copyright__ = "Copyright (C) 2009 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import re + + +class FTreeWalkerBase(object): + def __init__(self): + from floopy.fortran.expression import FortranExpressionParser + self.expr_parser = FortranExpressionParser(self) + + def rec(self, expr, *args, **kwargs): + mro = list(type(expr).__mro__) + dispatch_class = kwargs.pop("dispatch_class", type(self)) + + while mro: + method_name = "map_"+mro.pop(0).__name__ + + try: + method = getattr(dispatch_class, method_name) + except AttributeError: + pass + else: + return method(self, expr, *args, **kwargs) + + raise NotImplementedError( + "%s does not know how to map type '%s'" + % (type(self).__name__, + type(expr))) + + ENTITY_RE = re.compile( + r"^(?P<name>[_0-9a-zA-Z]+)" + "(\((?P<shape>[-+*0-9:a-zA-Z, \t]+)\))?$") + + def parse_dimension_specs(self, dim_decls): + def parse_bounds(bounds_str): + start_end = bounds_str.split(":") + + assert 1 <= len(start_end) <= 2 + + return [self.parse_expr(s) for s in start_end] + + for decl in dim_decls: + entity_match = self.ENTITY_RE.match(decl) + assert entity_match + + groups = entity_match.groupdict() + name = groups["name"] + assert name + + if groups["shape"]: + shape = [parse_bounds(s) for s in groups["shape"].split(",")] + else: + shape = None + + yield name, shape + + def __call__(self, expr, *args, **kwargs): + return self.rec(expr, *args, **kwargs) + + # {{{ expressions + + def parse_expr(self, expr_str): + return self.expr_parser(expr_str) + + # }}} + +# vim: foldmethod=marker diff --git a/requirements.txt b/requirements.txt index 16cb4e70ff8b7306ec64a7e6b65506775ce8a009..33aca183d68a80aa33697efc77863a95d93dde12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ git+git://github.com/inducer/islpy cgen git+git://github.com/pyopencl/pyopencl git+git://github.com/inducer/pymbolic + +hg+https://code.google.com/p/f2py/ diff --git a/setup.py b/setup.py index 7651315bfe2178cc81bfb71dac61bab4053a0985..84dd33b3041e7544bfef0109c1d7771c03927441 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,8 @@ setup(name="loo.py", "six", ], + scripts=["bin/loopy"], + author="Andreas Kloeckner", url="http://mathema.tician.de/software/loopy", author_email="inform@tiker.net",