Skip to content
Snippets Groups Projects
Commit 426c0319 authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Implement reduction iname uniquification.

parent 020203ae
No related branches found
No related tags found
No related merge requests found
...@@ -34,6 +34,8 @@ Things to consider ...@@ -34,6 +34,8 @@ Things to consider
- Measure efficiency of corner cases - Measure efficiency of corner cases
- Loopy as a data model for implementing custom rewritings
To-do To-do
^^^^^ ^^^^^
...@@ -45,12 +47,18 @@ To-do ...@@ -45,12 +47,18 @@ To-do
- user interface for dim length prescription - user interface for dim length prescription
- Way too many barriers in SEM test.
- Deal with equality constraints. - Deal with equality constraints.
(These arise, e.g., when partitioning a loop of length 16 into 16s.) (These arise, e.g., when partitioning a loop of length 16 into 16s.)
Future ideas Future ideas
^^^^^^^^^^^^ ^^^^^^^^^^^^
- Float4 joining on fetch/store?
- How can one automatically generate something like microblocks?
- Better for loop bound generation - Better for loop bound generation
-> Try a triangular loop -> Try a triangular loop
......
...@@ -699,7 +699,8 @@ class LoopKernel(Record): ...@@ -699,7 +699,8 @@ class LoopKernel(Record):
if all_inames_by_insns != self.all_inames(): if all_inames_by_insns != self.all_inames():
raise RuntimeError("inames collected from instructions (%s) " raise RuntimeError("inames collected from instructions (%s) "
"do not match domain inames (%s)" "do not match domain inames (%s)"
% (", ".join(all_inames_by_insns), ", ".join(self.all_inames()))) % (", ".join(sorted(all_inames_by_insns)),
", ".join(sorted(self.all_inames()))))
global_sizes = {} global_sizes = {}
local_sizes = {} local_sizes = {}
...@@ -780,11 +781,13 @@ class LoopKernel(Record): ...@@ -780,11 +781,13 @@ class LoopKernel(Record):
def __str__(self): def __str__(self):
lines = [] lines = []
for insn in self.instructions:
lines.append(str(insn))
lines.append("")
for iname in sorted(self.all_inames()): for iname in sorted(self.all_inames()):
lines.append("%s: %s" % (iname, self.iname_to_tag.get(iname))) lines.append("%s: %s" % (iname, self.iname_to_tag.get(iname)))
lines.append("")
lines.append(str(self.domain))
lines.append("")
for insn in self.instructions:
lines.append(str(insn))
return "\n".join(lines) return "\n".join(lines)
......
...@@ -6,6 +6,86 @@ import pyopencl.characterize as cl_char ...@@ -6,6 +6,86 @@ import pyopencl.characterize as cl_char
# {{{ make reduction variables unique
def make_reduction_variables_unique(kernel):
# {{{ count number of uses of each reduction iname
def count_reduction_iname_uses(expr, rec):
rec(expr.expr)
for iname in expr.inames:
reduction_iname_uses[iname] = (
reduction_iname_uses.get(iname, 0)
+ 1)
from loopy.symbolic import ReductionCallbackMapper
cb_mapper = ReductionCallbackMapper(count_reduction_iname_uses)
reduction_iname_uses = {}
for insn in kernel.instructions:
cb_mapper(insn.expression)
# }}}
# {{{ make iname uses in reduction unique
def ensure_reduction_iname_uniqueness(expr, rec):
child = rec(expr.expr)
my_created_inames = []
new_red_inames = []
for iname in expr.inames:
if reduction_iname_uses[iname] > 1:
new_iname = kernel.make_unique_var_name(iname, set(new_inames))
old_inames.append(iname)
new_inames.append(new_iname)
my_created_inames.append(new_iname)
new_red_inames.append(new_iname)
reduction_iname_uses[iname] -= 1
else:
new_red_inames.append(iname)
if my_created_inames:
from loopy.symbolic import SubstitutionMapper
from pymbolic.mapper.substitutor import make_subst_func
from pymbolic import var
subst_dict = dict(
(old_iname, var(new_iname))
for old_iname, new_iname in zip(expr.inames, my_created_inames))
subst_map = SubstitutionMapper(make_subst_func(subst_dict))
child = subst_map(child)
from loopy.symbolic import Reduction
return Reduction(
operation=expr.operation,
inames=tuple(new_red_inames),
expr=child)
new_insns = []
old_inames = []
new_inames = []
from loopy.symbolic import ReductionCallbackMapper
cb_mapper = ReductionCallbackMapper(ensure_reduction_iname_uniqueness)
new_insns = [
insn.copy(expression=cb_mapper(insn.expression))
for insn in kernel.instructions]
domain = kernel.domain
from loopy.isl_helpers import duplicate_axes
for old, new in zip(old_inames, new_inames):
domain = duplicate_axes(domain, [old], [new])
return kernel.copy(instructions=new_insns, domain=domain)
# }}}
# }}}
# {{{ rewrite reduction to imperative form # {{{ rewrite reduction to imperative form
def realize_reduction(kernel): def realize_reduction(kernel):
...@@ -466,6 +546,7 @@ def adjust_local_temp_var_storage(kernel): ...@@ -466,6 +546,7 @@ def adjust_local_temp_var_storage(kernel):
def preprocess_kernel(kernel): def preprocess_kernel(kernel):
kernel = make_reduction_variables_unique(kernel)
kernel = realize_reduction(kernel) kernel = realize_reduction(kernel)
# {{{ check that all CSEs have been realized # {{{ check that all CSEs have been realized
......
...@@ -514,7 +514,7 @@ class IndexVariableFinder(CombineMapper): ...@@ -514,7 +514,7 @@ class IndexVariableFinder(CombineMapper):
result = self.rec(expr.expr) result = self.rec(expr.expr)
if not (set(expr.inames) & result): if not (set(expr.inames) & result):
raise RuntimeError("reduction '%s' does not depend on " raise RuntimeError("reduction '%s' does not depend on "
"reduction inames" % expr) "reduction inames (%s)" % (expr, ",".join(expr.inames)))
if self.include_reduction_inames: if self.include_reduction_inames:
return result return result
else: else:
......
...@@ -4,7 +4,6 @@ import numpy as np ...@@ -4,7 +4,6 @@ import numpy as np
import numpy.linalg as la import numpy.linalg as la
import pyopencl as cl import pyopencl as cl
import pyopencl.array as cl_array import pyopencl.array as cl_array
import pyopencl.clrandom as cl_random
import loopy as lp import loopy as lp
from pyopencl.tools import pytest_generate_tests_for_pyopencl \ from pyopencl.tools import pytest_generate_tests_for_pyopencl \
...@@ -287,16 +286,16 @@ def test_sem_3d(ctx_factory): ...@@ -287,16 +286,16 @@ def test_sem_3d(ctx_factory):
# K - run-time symbolic # K - run-time symbolic
n = 8 n = 8
knl = lp.make_kernel(ctx.devices[0], knl = lp.make_kernel(ctx.devices[0],
"[K] -> {[i,j,k,e,m,mp]: 0<=i,j,k,m<%d and 0<=e<K}" % n, "[K] -> {[i,j,k,e,m]: 0<=i,j,k,m<%d and 0<=e<K}" % n,
[ [
"[|i,j,k] <float32> ur[i,j,k] = sum_float32(m, D[i,m]*u[m,j,k,e])", "[|i,j,k] <float32> ur[i,j,k] = sum_float32(m, D[i,m]*u[m,j,k,e])",
"[|i,j,k] <float32> us[i,j,k] = sum_float32(m, D[j,m]*u[i,m,k,e])", "[|i,j,k] <float32> us[i,j,k] = sum_float32(m, D[j,m]*u[i,m,k,e])",
"[|i,j,k] <float32> ut[i,j,k] = sum_float32(m, D[k,m]*u[i,j,m,e])", "[|i,j,k] <float32> ut[i,j,k] = sum_float32(m, D[k,m]*u[i,j,m,e])",
"lap[i,j,k,e] = " "lap[i,j,k,e] = "
" sum_float32(m, D[m,i]*(G[0,m,j,k,e]*ur[m,j,k,e] + G[1,m,j,k,e]*us[m,j,k,e] + G[2,m,j,k,e]*ut[m,j,k,e]))" " sum_float32(m, D[m,i]*(G[0,m,j,k,e]*ur[m,j,k] + G[1,m,j,k,e]*us[m,j,k] + G[2,m,j,k,e]*ut[m,j,k]))"
"+ sum_float32(m, D[m,j]*(G[1,i,m,k,e]*ur[i,m,k,e] + G[3,i,m,k,e]*us[i,m,k,e] + G[4,i,m,k,e]*ut[i,m,k,e]))" "+ sum_float32(m, D[m,j]*(G[1,i,m,k,e]*ur[i,m,k] + G[3,i,m,k,e]*us[i,m,k] + G[4,i,m,k,e]*ut[i,m,k]))"
"+ sum_float32(m, D[m,k]*(G[2,i,j,m,e]*ur[i,j,m,e] + G[4,i,j,m,e]*us[i,j,m,e] + G[5,i,j,m,e]*ut[i,j,m,e]))" "+ sum_float32(m, D[m,k]*(G[2,i,j,m,e]*ur[i,j,m] + G[4,i,j,m,e]*us[i,j,m] + G[5,i,j,m,e]*ut[i,j,m]))"
], ],
[ [
lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("u", dtype, shape=field_shape, order=order),
...@@ -307,17 +306,18 @@ def test_sem_3d(ctx_factory): ...@@ -307,17 +306,18 @@ def test_sem_3d(ctx_factory):
], ],
name="semlap", assumptions="K>=1") name="semlap", assumptions="K>=1")
print knl #print knl
#for tv in knl.temporary_variables.iteritems(): #for tv in knl.temporary_variables.iteritems():
#print tv #print tv
1/0 #1/0
knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1))
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp") #knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1"))
#knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k"]) #knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k"])
knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k", "mp"]) #knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k", "mp"])
print knl knl = lp.preprocess_kernel(knl)
#print knl
#1/0 #1/0
kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.generate_loop_schedules(knl)
...@@ -343,10 +343,6 @@ def test_sem_3d(ctx_factory): ...@@ -343,10 +343,6 @@ def test_sem_3d(ctx_factory):
if __name__ == "__main__": if __name__ == "__main__":
# make sure that import failures get reported, instead of skipping the
# tests.
import pyopencl as cl
import sys import sys
if len(sys.argv) > 1: if len(sys.argv) > 1:
exec(sys.argv[1]) exec(sys.argv[1])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment