Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Showing
with 6366 additions and 4797 deletions
This diff is collapsed.
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#define int_floor_div_pos_b(a,b) ( ( (a) - ( ((a)<0) ? ((b)-1) : 0 ) ) / (b) )
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float4 const *__restrict__ a, int const n, __global float4 *__restrict__ out)
{
/* bulk slab for 'i_outer' */
for (int i_outer = 0; i_outer <= -2 + int_floor_div_pos_b(3 + n, 4); ++i_outer)
out[i_outer] = 2.0f * a[i_outer];
/* final slab for 'i_outer' */
{
int const i_outer = -1 + n + -1 * int_floor_div_pos_b(3 * n, 4);
if (-1 + n >= 0)
{
if (-1 + -4 * i_outer + n >= 0)
out[i_outer].s0 = 2.0f * a[i_outer].s0;
if (-1 + -4 * i_outer + -1 + n >= 0)
out[i_outer].s1 = 2.0f * a[i_outer].s1;
if (-1 + -4 * i_outer + -1 * 2 + n >= 0)
out[i_outer].s2 = 2.0f * a[i_outer].s2;
if (-1 + -4 * i_outer + -1 * 3 + n >= 0)
out[i_outer].s3 = 2.0f * a[i_outer].s3;
}
}
}
import numpy as np
import pyopencl as cl
import pyopencl.array
import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa: F401
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 15 * 10**6
a = cl.array.arange(queue, n, dtype=np.float32)
knl = lp.make_kernel(
"{ [i]: 0<=i<n }",
"out[i] = 2*a[i]")
knl = lp.set_options(knl, write_code=True)
knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="vec")
knl = lp.split_array_axis(knl, "a,out", axis_nr=0, count=4)
knl = lp.tag_array_axes(knl, "a,out", "C,vec")
knl(queue, a=a.reshape(-1, 4), n=n)
This diff is collapsed.
from __future__ import annotations
import loopy.cli
loopy.cli.main()
This diff is collapsed.
This diff is collapsed.
from __future__ import annotations
import sys
import numpy as np
import loopy as lp
def to_python_literal(value):
try:
int(value)
except ValueError:
pass
else:
# It's an integer
return value
try:
float(value)
except ValueError:
pass
else:
# It's a float
return repr(float(value))
if value.endswith("f"):
try:
float(value[:-1])
except ValueError:
pass
else:
# It's a float
return repr(float(value[:-1]))
return repr(value)
def defines_to_python_code(defines_str):
import re
define_re = re.compile(r"^\#define\s+([a-zA-Z0-9_]+)\s+(.*)$")
result = []
for line in defines_str.split("\n"):
if not line.strip():
continue
match = define_re.match(line)
if match is None:
raise RuntimeError("#define not understood: '%s'" % line)
result.append(
"{} = {}".format(match.group(1), to_python_literal(match.group(2))))
return "\n".join(result)
def main():
from argparse import ArgumentParser
parser = ArgumentParser(description="Stand-alone loopy frontend")
parser.add_argument("infile", metavar="INPUT_FILE")
parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE",
help="Defaults to stdout ('-').", nargs="?")
parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran")
parser.add_argument("--target", choices=(
"opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"),
default="opencl")
parser.add_argument("--transform")
parser.add_argument("--edit-code", action="store_true")
parser.add_argument("--occa-defines")
parser.add_argument("--print-ir", action="store_true")
args = parser.parse_args()
if args.target == "opencl":
from loopy.target.opencl import OpenCLTarget
target = OpenCLTarget
elif args.target == "ispc":
from loopy.target.ispc import ISPCTarget
target = ISPCTarget
elif args.target == "ispc-occa":
from loopy.target.ispc import ISPCTarget
target = lambda: ISPCTarget() # noqa: E731
elif args.target == "c":
from loopy.target.c import CTarget
target = CTarget
elif args.target == "c-fortran":
from loopy.target.c import CTarget
target = lambda: CTarget(fortran_abi=True) # noqa: E731
elif args.target == "cuda":
from loopy.target.cuda import CudaTarget
target = CudaTarget
else:
raise ValueError(f"unknown target: {args.target}")
lp.set_default_target(target)
lang = None
if args.infile == "-":
infile_content = sys.stdin.read()
else:
from os.path import splitext
_, ext = splitext(args.infile)
lang = {
".py": "loopy",
".loopy": "loopy",
".floopy": "fortran",
".f90": "fortran",
".F90": "fortran",
".fpp": "fortran",
".f": "fortran",
".f77": "fortran",
".F77": "fortran",
}.get(ext)
with open(args.infile) as infile_fd:
infile_content = infile_fd.read()
if args.lang is not None:
lang = args.lang
if lang is None:
raise RuntimeError("unable to deduce input language "
"(wrong input file extension? --lang flag?)")
if lang == "loopy":
# {{{ path wrangling
from os import getcwd
from os.path import abspath, dirname
infile_dirname = dirname(args.infile)
if infile_dirname:
infile_dirname = abspath(infile_dirname)
else:
infile_dirname = getcwd()
sys.path.append(infile_dirname)
# }}}
data_dic = {}
data_dic["lp"] = lp
data_dic["np"] = np
if args.occa_defines:
with open(args.occa_defines) as defines_fd:
occa_define_code = defines_to_python_code(defines_fd.read())
exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic)
with open(args.infile) as infile_fd:
exec(compile(infile_content, args.infile, "exec"), data_dic)
if args.transform:
with open(args.transform) as xform_fd:
exec(compile(xform_fd.read(),
args.transform, "exec"), data_dic)
try:
kernel = data_dic["lp_knl"]
except KeyError as err:
raise RuntimeError("loopy-lang requires 'lp_knl' "
"to be defined on exit") from err
t_unit = [kernel]
elif lang in ["fortran", "floopy", "fpp"]:
pre_transform_code = None
if args.transform:
with open(args.transform) as xform_fd:
pre_transform_code = xform_fd.read()
if args.occa_defines:
if pre_transform_code is None:
pre_transform_code = ""
with open(args.occa_defines) as defines_fd:
pre_transform_code = (
defines_to_python_code(defines_fd.read())
+ pre_transform_code)
t_unit = lp.parse_transformed_fortran(
infile_content, pre_transform_code=pre_transform_code,
filename=args.infile)
else:
raise RuntimeError("unknown language: '%s'"
% args.lang)
if not isinstance(t_unit, lp.TranslationUnit):
# FIXME
assert isinstance(t_unit, list) # of kernels
raise NotImplementedError("convert list of kernels to TranslationUnit")
if args.print_ir:
print(t_unit, file=sys.stderr)
t_unit = lp.preprocess_kernel(t_unit)
cgr = lp.generate_code_v2(t_unit)
if args.outfile is not None:
outfile = args.outfile
else:
outfile = "-"
code = cgr.device_code()
if outfile == "-":
sys.stdout.write(code)
else:
with open(outfile, "w") as outfile_fd:
outfile_fd.write(code)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.