Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
loopy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ben Sepanski
loopy
Commits
426c0319
Commit
426c0319
authored
13 years ago
by
Andreas Klöckner
Browse files
Options
Downloads
Patches
Plain Diff
Implement reduction iname uniquification.
parent
020203ae
No related branches found
No related tags found
No related merge requests found
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
MEMO
+8
-0
8 additions, 0 deletions
MEMO
loopy/kernel.py
+7
-4
7 additions, 4 deletions
loopy/kernel.py
loopy/preprocess.py
+81
-0
81 additions, 0 deletions
loopy/preprocess.py
loopy/symbolic.py
+1
-1
1 addition, 1 deletion
loopy/symbolic.py
test/test_sem.py
+9
-13
9 additions, 13 deletions
test/test_sem.py
with
106 additions
and
18 deletions
MEMO
+
8
−
0
View file @
426c0319
...
@@ -34,6 +34,8 @@ Things to consider
...
@@ -34,6 +34,8 @@ Things to consider
- Measure efficiency of corner cases
- Measure efficiency of corner cases
- Loopy as a data model for implementing custom rewritings
To-do
To-do
^^^^^
^^^^^
...
@@ -45,12 +47,18 @@ To-do
...
@@ -45,12 +47,18 @@ To-do
- user interface for dim length prescription
- user interface for dim length prescription
- Way too many barriers in SEM test.
- Deal with equality constraints.
- Deal with equality constraints.
(These arise, e.g., when partitioning a loop of length 16 into 16s.)
(These arise, e.g., when partitioning a loop of length 16 into 16s.)
Future ideas
Future ideas
^^^^^^^^^^^^
^^^^^^^^^^^^
- Float4 joining on fetch/store?
- How can one automatically generate something like microblocks?
- Better for loop bound generation
- Better for loop bound generation
-> Try a triangular loop
-> Try a triangular loop
...
...
This diff is collapsed.
Click to expand it.
loopy/kernel.py
+
7
−
4
View file @
426c0319
...
@@ -699,7 +699,8 @@ class LoopKernel(Record):
...
@@ -699,7 +699,8 @@ class LoopKernel(Record):
if
all_inames_by_insns
!=
self
.
all_inames
():
if
all_inames_by_insns
!=
self
.
all_inames
():
raise
RuntimeError
(
"
inames collected from instructions (%s)
"
raise
RuntimeError
(
"
inames collected from instructions (%s)
"
"
do not match domain inames (%s)
"
"
do not match domain inames (%s)
"
%
(
"
,
"
.
join
(
all_inames_by_insns
),
"
,
"
.
join
(
self
.
all_inames
())))
%
(
"
,
"
.
join
(
sorted
(
all_inames_by_insns
)),
"
,
"
.
join
(
sorted
(
self
.
all_inames
()))))
global_sizes
=
{}
global_sizes
=
{}
local_sizes
=
{}
local_sizes
=
{}
...
@@ -780,11 +781,13 @@ class LoopKernel(Record):
...
@@ -780,11 +781,13 @@ class LoopKernel(Record):
def
__str__
(
self
):
def
__str__
(
self
):
lines
=
[]
lines
=
[]
for
insn
in
self
.
instructions
:
lines
.
append
(
str
(
insn
))
lines
.
append
(
""
)
for
iname
in
sorted
(
self
.
all_inames
()):
for
iname
in
sorted
(
self
.
all_inames
()):
lines
.
append
(
"
%s: %s
"
%
(
iname
,
self
.
iname_to_tag
.
get
(
iname
)))
lines
.
append
(
"
%s: %s
"
%
(
iname
,
self
.
iname_to_tag
.
get
(
iname
)))
lines
.
append
(
""
)
lines
.
append
(
str
(
self
.
domain
))
lines
.
append
(
""
)
for
insn
in
self
.
instructions
:
lines
.
append
(
str
(
insn
))
return
"
\n
"
.
join
(
lines
)
return
"
\n
"
.
join
(
lines
)
...
...
This diff is collapsed.
Click to expand it.
loopy/preprocess.py
+
81
−
0
View file @
426c0319
...
@@ -6,6 +6,86 @@ import pyopencl.characterize as cl_char
...
@@ -6,6 +6,86 @@ import pyopencl.characterize as cl_char
# {{{ make reduction variables unique
def
make_reduction_variables_unique
(
kernel
):
# {{{ count number of uses of each reduction iname
def
count_reduction_iname_uses
(
expr
,
rec
):
rec
(
expr
.
expr
)
for
iname
in
expr
.
inames
:
reduction_iname_uses
[
iname
]
=
(
reduction_iname_uses
.
get
(
iname
,
0
)
+
1
)
from
loopy.symbolic
import
ReductionCallbackMapper
cb_mapper
=
ReductionCallbackMapper
(
count_reduction_iname_uses
)
reduction_iname_uses
=
{}
for
insn
in
kernel
.
instructions
:
cb_mapper
(
insn
.
expression
)
# }}}
# {{{ make iname uses in reduction unique
def
ensure_reduction_iname_uniqueness
(
expr
,
rec
):
child
=
rec
(
expr
.
expr
)
my_created_inames
=
[]
new_red_inames
=
[]
for
iname
in
expr
.
inames
:
if
reduction_iname_uses
[
iname
]
>
1
:
new_iname
=
kernel
.
make_unique_var_name
(
iname
,
set
(
new_inames
))
old_inames
.
append
(
iname
)
new_inames
.
append
(
new_iname
)
my_created_inames
.
append
(
new_iname
)
new_red_inames
.
append
(
new_iname
)
reduction_iname_uses
[
iname
]
-=
1
else
:
new_red_inames
.
append
(
iname
)
if
my_created_inames
:
from
loopy.symbolic
import
SubstitutionMapper
from
pymbolic.mapper.substitutor
import
make_subst_func
from
pymbolic
import
var
subst_dict
=
dict
(
(
old_iname
,
var
(
new_iname
))
for
old_iname
,
new_iname
in
zip
(
expr
.
inames
,
my_created_inames
))
subst_map
=
SubstitutionMapper
(
make_subst_func
(
subst_dict
))
child
=
subst_map
(
child
)
from
loopy.symbolic
import
Reduction
return
Reduction
(
operation
=
expr
.
operation
,
inames
=
tuple
(
new_red_inames
),
expr
=
child
)
new_insns
=
[]
old_inames
=
[]
new_inames
=
[]
from
loopy.symbolic
import
ReductionCallbackMapper
cb_mapper
=
ReductionCallbackMapper
(
ensure_reduction_iname_uniqueness
)
new_insns
=
[
insn
.
copy
(
expression
=
cb_mapper
(
insn
.
expression
))
for
insn
in
kernel
.
instructions
]
domain
=
kernel
.
domain
from
loopy.isl_helpers
import
duplicate_axes
for
old
,
new
in
zip
(
old_inames
,
new_inames
):
domain
=
duplicate_axes
(
domain
,
[
old
],
[
new
])
return
kernel
.
copy
(
instructions
=
new_insns
,
domain
=
domain
)
# }}}
# }}}
# {{{ rewrite reduction to imperative form
# {{{ rewrite reduction to imperative form
def
realize_reduction
(
kernel
):
def
realize_reduction
(
kernel
):
...
@@ -466,6 +546,7 @@ def adjust_local_temp_var_storage(kernel):
...
@@ -466,6 +546,7 @@ def adjust_local_temp_var_storage(kernel):
def
preprocess_kernel
(
kernel
):
def
preprocess_kernel
(
kernel
):
kernel
=
make_reduction_variables_unique
(
kernel
)
kernel
=
realize_reduction
(
kernel
)
kernel
=
realize_reduction
(
kernel
)
# {{{ check that all CSEs have been realized
# {{{ check that all CSEs have been realized
...
...
This diff is collapsed.
Click to expand it.
loopy/symbolic.py
+
1
−
1
View file @
426c0319
...
@@ -514,7 +514,7 @@ class IndexVariableFinder(CombineMapper):
...
@@ -514,7 +514,7 @@ class IndexVariableFinder(CombineMapper):
result
=
self
.
rec
(
expr
.
expr
)
result
=
self
.
rec
(
expr
.
expr
)
if
not
(
set
(
expr
.
inames
)
&
result
):
if
not
(
set
(
expr
.
inames
)
&
result
):
raise
RuntimeError
(
"
reduction
'
%s
'
does not depend on
"
raise
RuntimeError
(
"
reduction
'
%s
'
does not depend on
"
"
reduction inames
"
%
expr
)
"
reduction inames
(%s)
"
%
(
expr
,
"
,
"
.
join
(
expr
.
inames
))
)
if
self
.
include_reduction_inames
:
if
self
.
include_reduction_inames
:
return
result
return
result
else
:
else
:
...
...
This diff is collapsed.
Click to expand it.
test/test_sem.py
+
9
−
13
View file @
426c0319
...
@@ -4,7 +4,6 @@ import numpy as np
...
@@ -4,7 +4,6 @@ import numpy as np
import
numpy.linalg
as
la
import
numpy.linalg
as
la
import
pyopencl
as
cl
import
pyopencl
as
cl
import
pyopencl.array
as
cl_array
import
pyopencl.array
as
cl_array
import
pyopencl.clrandom
as
cl_random
import
loopy
as
lp
import
loopy
as
lp
from
pyopencl.tools
import
pytest_generate_tests_for_pyopencl
\
from
pyopencl.tools
import
pytest_generate_tests_for_pyopencl
\
...
@@ -287,16 +286,16 @@ def test_sem_3d(ctx_factory):
...
@@ -287,16 +286,16 @@ def test_sem_3d(ctx_factory):
# K - run-time symbolic
# K - run-time symbolic
n
=
8
n
=
8
knl
=
lp
.
make_kernel
(
ctx
.
devices
[
0
],
knl
=
lp
.
make_kernel
(
ctx
.
devices
[
0
],
"
[K] -> {[i,j,k,e,m
,mp
]: 0<=i,j,k,m<%d and 0<=e<K}
"
%
n
,
"
[K] -> {[i,j,k,e,m]: 0<=i,j,k,m<%d and 0<=e<K}
"
%
n
,
[
[
"
[|i,j,k] <float32> ur[i,j,k] = sum_float32(m, D[i,m]*u[m,j,k,e])
"
,
"
[|i,j,k] <float32> ur[i,j,k] = sum_float32(m, D[i,m]*u[m,j,k,e])
"
,
"
[|i,j,k] <float32> us[i,j,k] = sum_float32(m, D[j,m]*u[i,m,k,e])
"
,
"
[|i,j,k] <float32> us[i,j,k] = sum_float32(m, D[j,m]*u[i,m,k,e])
"
,
"
[|i,j,k] <float32> ut[i,j,k] = sum_float32(m, D[k,m]*u[i,j,m,e])
"
,
"
[|i,j,k] <float32> ut[i,j,k] = sum_float32(m, D[k,m]*u[i,j,m,e])
"
,
"
lap[i,j,k,e] =
"
"
lap[i,j,k,e] =
"
"
sum_float32(m, D[m,i]*(G[0,m,j,k,e]*ur[m,j,k
,e
] + G[1,m,j,k,e]*us[m,j,k
,e
] + G[2,m,j,k,e]*ut[m,j,k
,e
]))
"
"
sum_float32(m, D[m,i]*(G[0,m,j,k,e]*ur[m,j,k] + G[1,m,j,k,e]*us[m,j,k] + G[2,m,j,k,e]*ut[m,j,k]))
"
"
+ sum_float32(m, D[m,j]*(G[1,i,m,k,e]*ur[i,m,k
,e
] + G[3,i,m,k,e]*us[i,m,k
,e
] + G[4,i,m,k,e]*ut[i,m,k
,e
]))
"
"
+ sum_float32(m, D[m,j]*(G[1,i,m,k,e]*ur[i,m,k] + G[3,i,m,k,e]*us[i,m,k] + G[4,i,m,k,e]*ut[i,m,k]))
"
"
+ sum_float32(m, D[m,k]*(G[2,i,j,m,e]*ur[i,j,m
,e
] + G[4,i,j,m,e]*us[i,j,m
,e
] + G[5,i,j,m,e]*ut[i,j,m
,e
]))
"
"
+ sum_float32(m, D[m,k]*(G[2,i,j,m,e]*ur[i,j,m] + G[4,i,j,m,e]*us[i,j,m] + G[5,i,j,m,e]*ut[i,j,m]))
"
],
],
[
[
lp
.
ArrayArg
(
"
u
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
u
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
...
@@ -307,17 +306,18 @@ def test_sem_3d(ctx_factory):
...
@@ -307,17 +306,18 @@ def test_sem_3d(ctx_factory):
],
],
name
=
"
semlap
"
,
assumptions
=
"
K>=1
"
)
name
=
"
semlap
"
,
assumptions
=
"
K>=1
"
)
print
knl
#
print knl
#for tv in knl.temporary_variables.iteritems():
#for tv in knl.temporary_variables.iteritems():
#print tv
#print tv
1
/
0
#
1/0
knl
=
lp
.
split_dimension
(
knl
,
"
e
"
,
16
,
outer_tag
=
"
g.0
"
)
#, slabs=(0, 1))
knl
=
lp
.
split_dimension
(
knl
,
"
e
"
,
16
,
outer_tag
=
"
g.0
"
)
#, slabs=(0, 1))
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
knl
=
lp
.
tag_dimensions
(
knl
,
dict
(
i
=
"
l.0
"
,
j
=
"
l.1
"
))
knl
=
lp
.
tag_dimensions
(
knl
,
dict
(
i
=
"
l.0
"
,
j
=
"
l.1
"
))
#knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k"])
#knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k"])
knl
=
lp
.
realize_cse
(
knl
,
"
build_ur
"
,
np
.
float32
,
[
"
j
"
,
"
k
"
,
"
mp
"
])
#knl = lp.realize_cse(knl, "build_ur", np.float32, ["j", "k", "mp"])
print
knl
knl
=
lp
.
preprocess_kernel
(
knl
)
#print knl
#1/0
#1/0
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
)
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
)
...
@@ -343,10 +343,6 @@ def test_sem_3d(ctx_factory):
...
@@ -343,10 +343,6 @@ def test_sem_3d(ctx_factory):
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
# make sure that import failures get reported, instead of skipping the
# tests.
import
pyopencl
as
cl
import
sys
import
sys
if
len
(
sys
.
argv
)
>
1
:
if
len
(
sys
.
argv
)
>
1
:
exec
(
sys
.
argv
[
1
])
exec
(
sys
.
argv
[
1
])
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment