Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
loopy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Ben Sepanski
loopy
Commits
be92e8ba
Commit
be92e8ba
authored
13 years ago
by
Tim Warburton
Browse files
Options
Downloads
Patches
Plain Diff
Test hackery.
parent
7c8dcfa0
No related branches found
Branches containing commit
Tags
v2016.1.1
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
test/test_advect.py
+0
-116
0 additions, 116 deletions
test/test_advect.py
test/test_loopy.py
+1
-1
1 addition, 1 deletion
test/test_loopy.py
test/test_sem.py
+126
-15
126 additions, 15 deletions
test/test_sem.py
with
127 additions
and
132 deletions
test/test_advect.py
deleted
100644 → 0
+
0
−
116
View file @
7c8dcfa0
def
test_advect
(
ctx_factory
):
dtype
=
np
.
float32
ctx
=
ctx_factory
()
order
=
"
C
"
queue
=
cl
.
CommandQueue
(
ctx
,
properties
=
cl
.
command_queue_properties
.
PROFILING_ENABLE
)
N
=
8
from
pymbolic
import
var
K_sym
=
var
(
"
K
"
)
field_shape
=
(
K_sym
,
N
,
N
,
N
)
# 1. direction-by-direction similarity transform on u
# 2. invert diagonal
# 3. transform back (direction-by-direction)
# K - run-time symbolic
# A. updated for CSE: notation.
# B. fixed temp indexing and C ordering
# load: 3+9 fields + 1/N D entry
# store: 3 fields
# perform: N*2*6 + 3*5 + 3*5 flops
# ratio: (12*N+30)/15 flops per 4 bytes on bus
# ~ 8.4 FLOPS per 4 bytes at N=8
# ~ 300 GFLOPS max on a 150GB/s device at N=8 if done perfectly
knl
=
lp
.
make_kernel
(
ctx
.
devices
[
0
],
"
[K] -> {[i,j,k,m,e]: 0<=i,j,k,m<%d AND 0<=e<K}
"
%
N
[
# differentiate u
"
CSE: ur[i,j,k] = sum_float32(@m, D[i,m]*u[e,m,j,k])
"
,
"
CSE: us[i,j,k] = sum_float32(@m, D[j,m]*u[e,i,m,k])
"
,
"
CSE: ut[i,j,k] = sum_float32(@m, D[k,m]*u[e,i,j,m])
"
,
# differentiate v
"
CSE: vr[i,j,k] = sum_float32(@m, D[i,m]*v[e,m,j,k])
"
,
"
CSE: vs[i,j,k] = sum_float32(@m, D[j,m]*v[e,i,m,k])
"
,
"
CSE: vt[i,j,k] = sum_float32(@m, D[k,m]*v[e,i,j,m])
"
,
# differentiate w
"
CSE: wr[i,j,k] = sum_float32(@m, D[i,m]*w[e,m,j,k])
"
,
"
CSE: ws[i,j,k] = sum_float32(@m, D[j,m]*w[e,i,m,k])
"
,
"
CSE: wt[i,j,k] = sum_float32(@m, D[k,m]*w[e,i,j,m])
"
,
# find velocity in (r,s,t) coordinates
# CSE?
"
Vr[i,j,k] = G[0,e,i,j,k]*u[e,i,j,k] + G[1,e,i,j,k]*v[e,i,j,k] + G[2,e,i,j,k]*w[e,i,j,k]
"
,
"
Vs[i,j,k] = G[3,e,i,j,k]*u[e,i,j,k] + G[4,e,i,j,k]*v[e,i,j,k] + G[5,e,i,j,k]*w[e,i,j,k]
"
,
"
Vt[i,j,k] = G[6,e,i,j,k]*u[e,i,j,k] + G[7,e,i,j,k]*v[e,i,j,k] + G[8,e,i,j,k]*w[e,i,j,k]
"
,
# form nonlinear term on integration nodes
"
Nu[e,i,j,k] = Vr[i,j,k]*ur[i,j,k]+Vs[i,j,k]*us[i,j,k]+Vt[i,j,k]*ut[i,j,k]
"
,
"
Nv[e,i,j,k] = Vr[i,j,k]*vr[i,j,k]+Vs[i,j,k]*vs[i,j,k]+Vt[i,j,k]*vt[i,j,k]
"
,
"
Nw[e,i,j,k] = Vr[i,j,k]*wr[i,j,k]+Vs[i,j,k]*ws[i,j,k]+Vt[i,j,k]*wt[i,j,k]
"
,
],
[
lp
.
ArrayArg
(
"
u
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
v
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
w
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nu
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nv
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nw
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
G
"
,
dtype
,
shape
=
(
6
,)
+
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
D
"
,
dtype
,
shape
=
(
N
,
N
),
order
=
order
),
lp
.
ScalarArg
(
"
K
"
,
np
.
int32
,
approximately
=
1000
),
],
name
=
"
sem_advect
"
,
assumptions
=
"
K>=1
"
)
print
knl
1
/
0
knl
=
lp
.
split_dimension
(
knl
,
"
e
"
,
16
,
outer_tag
=
"
g.0
"
)
#, slabs=(0, 1))
knl
=
lp
.
tag_dimensions
(
knl
,
dict
(
i
=
"
l.0
"
,
j
=
"
l.1
"
))
print
knl
#1/0
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
)
kernel_gen
=
lp
.
check_kernels
(
kernel_gen
,
dict
(
K
=
1000
),
kill_level_min
=
5
)
a
=
make_well_conditioned_dev_matrix
(
queue
,
n
,
dtype
=
dtype
,
order
=
order
)
b
=
make_well_conditioned_dev_matrix
(
queue
,
n
,
dtype
=
dtype
,
order
=
order
)
c
=
cl_array
.
empty_like
(
a
)
refsol
=
np
.
dot
(
a
.
get
(),
b
.
get
())
def
launcher
(
kernel
,
gsize
,
lsize
,
check
):
evt
=
kernel
(
queue
,
gsize
(),
lsize
(),
a
.
data
,
b
.
data
,
c
.
data
,
g_times_l
=
True
)
if
check
:
check_error
(
refsol
,
c
.
get
())
return
evt
lp
.
drive_timing_run
(
kernel_gen
,
queue
,
launcher
,
2
*
n
**
3
)
if
__name__
==
"
__main__
"
:
# make sure that import failures get reported, instead of skipping the
# tests.
import
pyopencl
as
cl
import
sys
if
len
(
sys
.
argv
)
>
1
:
exec
(
sys
.
argv
[
1
])
else
:
from
py.test.cmdline
import
main
main
([
__file__
])
This diff is collapsed.
Click to expand it.
test/test_loopy.py
+
1
−
1
View file @
be92e8ba
...
...
@@ -128,7 +128,7 @@ def test_stencil(ctx_factory):
"
+ cse(a[i,j-1])
"
"
+ cse(a[i,j+1])
"
"
+ cse(a[i-1,j])
"
"
+ cse(a[i+1,j])
"
# watch out: i!
"
+ cse(a[i+1,j])
"
],
[
lp
.
ArrayArg
(
"
a
"
,
np
.
float32
,
shape
=
(
32
,
32
,))
...
...
This diff is collapsed.
Click to expand it.
test/test_sem.py
+
126
−
15
View file @
be92e8ba
from
__future__
import
division
import
numpy
as
np
import
numpy.linalg
as
la
import
pyopencl
as
cl
import
pyopencl.array
as
cl_array
import
loopy
as
lp
from
pyopencl.tools
import
pytest_generate_tests_for_pyopencl
\
...
...
@@ -12,7 +10,7 @@ from pyopencl.tools import pytest_generate_tests_for_pyopencl \
def
test_
sem_3d
(
ctx_factory
):
def
test_
laplacian
(
ctx_factory
):
dtype
=
np
.
float32
ctx
=
ctx_factory
()
order
=
"
C
"
...
...
@@ -33,16 +31,21 @@ def test_sem_3d(ctx_factory):
# K - run-time symbolic
knl
=
lp
.
make_kernel
(
ctx
.
devices
[
0
],
"
[K] -> {[i,j,k,e,m,o,gi]: 0<=i,j,k,m,o<%d and 0<=e<K and 0<=gi<6}
"
%
n
,
"
[K] -> {[i,j,k,e,m,o
1,o2,o3
,gi]: 0<=i,j,k,m,o
1,o2,o3
<%d and 0<=e<K and 0<=gi<6}
"
%
n
,
[
"
CSE: ur(i,j,k) = sum_float32(@o, D[i,o]*u[e,o,j,k])
"
,
"
CSE: us(i,j,k) = sum_float32(@o, D[j,o]*u[e,i,o,k])
"
,
"
CSE: ut(i,j,k) = sum_float32(@o, D[k,o]*u[e,i,j,o])
"
,
"
CSE: ur(i,j,k) = sum_float32(o1, D[i,o1]*cse(u[e,o1,j,k], urf))
"
,
"
CSE: us(i,j,k) = sum_float32(o2, D[j,o2]*cse(u[e,i,o2,k], usf))
"
,
"
CSE: ut(i,j,k) = sum_float32(o3, D[k,o3]*cse(u[e,i,j,o3], utf))
"
,
# define function
"
CSE: Gu(i,j,k) = G[0,e,i,j,k]*ur(i,j,k) + G[1,e,i,j,k]*us(i,j,k) + G[2,e,i,j,k]*ut(i,j,k)
"
,
"
CSE: Gv(i,j,k) = G[1,e,i,j,k]*ur(i,j,k) + G[3,e,i,j,k]*us(i,j,k) + G[4,e,i,j,k]*ut(i,j,k)
"
,
"
CSE: Gw(i,j,k) = G[2,e,i,j,k]*ur(i,j,k) + G[4,e,i,j,k]*us(i,j,k) + G[5,e,i,j,k]*ut(i,j,k)
"
,
"
lap[e,i,j,k] =
"
"
sum_float32(m, D[m,i]*
(G[0,e,m,j,k]*ur(m,j,k) + G[1,e,m,j,k]*us(m,j,k) + G[2,e,m,j,k]*ut(m,j,k)
))
"
"
+ sum_float32(m, D[m,j]*
(G[1,e,i,m,k]*ur(i,m,k) + G[3,e,i,m,k]*us(i,m,k) + G[4,e,i,m,k]*ut(i,m,k)
))
"
"
+ sum_float32(m, D[m,k]*
(G[2,e,i,j,m]*ur(i,j,m) + G[4,e,i,j,m]*us(i,j,m) + G[5,e,i,j,m]*ut(i,j,m)
))
"
"
sum_float32(m, D[m,i]*
Gu(m,j,k
))
"
"
+ sum_float32(m, D[m,j]*
Gv(i,m,k
))
"
"
+ sum_float32(m, D[m,k]*
Gw(i,j,m
))
"
],
[
lp
.
ArrayArg
(
"
u
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
...
...
@@ -53,15 +56,28 @@ def test_sem_3d(ctx_factory):
],
name
=
"
semlap
"
,
assumptions
=
"
K>=1
"
)
#print lp.preprocess_kernel(knl, cse_ok=True)
#1/0
#
#print knl
#1/0
knl
=
lp
.
realize_cse
(
knl
,
"
urf
"
,
np
.
float32
,
[
"
o1
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
usf
"
,
np
.
float32
,
[
"
o2
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
utf
"
,
np
.
float32
,
[
"
o3
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
Gu
"
,
np
.
float32
,
[
"
m
"
,
"
j
"
,
"
k
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
Gv
"
,
np
.
float32
,
[
"
i
"
,
"
m
"
,
"
k
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
Gw
"
,
np
.
float32
,
[
"
i
"
,
"
j
"
,
"
m
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
ur
"
,
np
.
float32
,
[
"
k
"
,
"
j
"
,
"
m
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
us
"
,
np
.
float32
,
[
"
i
"
,
"
m
"
,
"
k
"
])
knl
=
lp
.
realize_cse
(
knl
,
"
ut
"
,
np
.
float32
,
[
"
i
"
,
"
j
"
,
"
m
"
])
if
0
:
seq_knl
=
lp
.
add_prefetch
(
knl
,
"
G
"
,
[
"
gi
"
,
"
m
"
,
"
j
"
,
"
k
"
],
"
G[gi,e,m,j,k]
"
)
seq_knl
=
lp
.
add_prefetch
(
seq_knl
,
"
D
"
,
[
"
m
"
,
"
j
"
])
seq_knl
=
lp
.
add_prefetch
(
seq_knl
,
"
u
"
,
[
"
i
"
,
"
j
"
,
"
k
"
],
"
u[*,i,j,k]
"
)
pass
#seq_knl = lp.add_prefetch(knl, "G", ["gi", "m", "j", "k"], "G[gi,e,m,j,k]")
#seq_knl = lp.add_prefetch(seq_knl, "D", ["m", "j"])
#seq_knl = lp.add_prefetch(seq_knl, "u", ["i", "j", "k"], "u[*,i,j,k]")
else
:
seq_knl
=
knl
...
...
@@ -69,7 +85,8 @@ def test_sem_3d(ctx_factory):
knl
=
lp
.
add_prefetch
(
knl
,
"
G
"
,
[
"
gi
"
,
"
m
"
,
"
j
"
,
"
k
"
],
"
G[gi,e,m,j,k]
"
)
knl
=
lp
.
add_prefetch
(
knl
,
"
D
"
,
[
"
m
"
,
"
j
"
])
knl
=
lp
.
add_prefetch
(
knl
,
"
u
"
,
[
"
i
"
,
"
j
"
,
"
k
"
],
"
u[*,i,j,k]
"
)
#knl = lp.add_prefetch(knl, "u", ["i", "j", "k"], "u[*,i,j,k]")
#knl = lp.split_dimension(knl, "e_inner", 4, inner_tag="ilp")
#print seq_knl
...
...
@@ -78,7 +95,8 @@ def test_sem_3d(ctx_factory):
knl
=
lp
.
tag_dimensions
(
knl
,
dict
(
i
=
"
l.0
"
,
j
=
"
l.1
"
))
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
)
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
,
loop_priority
=
[
"
m_fetch_G
"
,
"
i_fetch_u
"
])
kernel_gen
=
lp
.
check_kernels
(
kernel_gen
,
dict
(
K
=
1000
))
K
=
1000
...
...
@@ -89,6 +107,99 @@ def test_sem_3d(ctx_factory):
def
test_advect
(
ctx_factory
):
dtype
=
np
.
float32
ctx
=
ctx_factory
()
order
=
"
C
"
N
=
8
from
pymbolic
import
var
K_sym
=
var
(
"
K
"
)
field_shape
=
(
K_sym
,
N
,
N
,
N
)
# 1. direction-by-direction similarity transform on u
# 2. invert diagonal
# 3. transform back (direction-by-direction)
# K - run-time symbolic
# A. updated for CSE: notation.
# B. fixed temp indexing and C ordering
# load: 3+9 fields + 1/N D entry
# store: 3 fields
# perform: N*2*6 + 3*5 + 3*5 flops
# ratio: (12*N+30)/15 flops per 4 bytes on bus
# ~ 8.4 FLOPS per 4 bytes at N=8
# ~ 300 GFLOPS max on a 150GB/s device at N=8 if done perfectly
knl
=
lp
.
make_kernel
(
ctx
.
devices
[
0
],
"
[K] -> {[i,j,k,m,e]: 0<=i,j,k,m<%d AND 0<=e<K}
"
%
N
,
[
# differentiate u
"
CSE: ur(i,j,k) = sum_float32(@m, D[i,m]*u[e,m,j,k])
"
,
"
CSE: us(i,j,k) = sum_float32(@m, D[j,m]*u[e,i,m,k])
"
,
"
CSE: ut(i,j,k) = sum_float32(@m, D[k,m]*u[e,i,j,m])
"
,
# differentiate v
"
CSE: vr(i,j,k) = sum_float32(@m, D[i,m]*v[e,m,j,k])
"
,
"
CSE: vs(i,j,k) = sum_float32(@m, D[j,m]*v[e,i,m,k])
"
,
"
CSE: vt(i,j,k) = sum_float32(@m, D[k,m]*v[e,i,j,m])
"
,
# differentiate w
"
CSE: wr(i,j,k) = sum_float32(@m, D[i,m]*w[e,m,j,k])
"
,
"
CSE: ws(i,j,k) = sum_float32(@m, D[j,m]*w[e,i,m,k])
"
,
"
CSE: wt(i,j,k) = sum_float32(@m, D[k,m]*w[e,i,j,m])
"
,
# find velocity in (r,s,t) coordinates
# CSE?
"
CSE: Vr(i,j,k) = G[0,e,i,j,k]*u[e,i,j,k] + G[1,e,i,j,k]*v[e,i,j,k] + G[2,e,i,j,k]*w[e,i,j,k]
"
,
"
CSE: Vs(i,j,k) = G[3,e,i,j,k]*u[e,i,j,k] + G[4,e,i,j,k]*v[e,i,j,k] + G[5,e,i,j,k]*w[e,i,j,k]
"
,
"
CSE: Vt(i,j,k) = G[6,e,i,j,k]*u[e,i,j,k] + G[7,e,i,j,k]*v[e,i,j,k] + G[8,e,i,j,k]*w[e,i,j,k]
"
,
# form nonlinear term on integration nodes
"
Nu[e,i,j,k] = Vr(i,j,k)*ur(i,j,k)+Vs(i,j,k)*us(i,j,k)+Vt(i,j,k)*ut(i,j,k)
"
,
"
Nv[e,i,j,k] = Vr(i,j,k)*vr(i,j,k)+Vs(i,j,k)*vs(i,j,k)+Vt(i,j,k)*vt(i,j,k)
"
,
"
Nw[e,i,j,k] = Vr(i,j,k)*wr(i,j,k)+Vs(i,j,k)*ws(i,j,k)+Vt(i,j,k)*wt(i,j,k)
"
,
],
[
lp
.
ArrayArg
(
"
u
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
v
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
w
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nu
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nv
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
Nw
"
,
dtype
,
shape
=
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
G
"
,
dtype
,
shape
=
(
9
,)
+
field_shape
,
order
=
order
),
lp
.
ArrayArg
(
"
D
"
,
dtype
,
shape
=
(
N
,
N
),
order
=
order
),
lp
.
ScalarArg
(
"
K
"
,
np
.
int32
,
approximately
=
1000
),
],
name
=
"
sem_advect
"
,
assumptions
=
"
K>=1
"
)
print
knl
1
/
0
seq_knl
=
knl
knl
=
lp
.
split_dimension
(
knl
,
"
e
"
,
16
,
outer_tag
=
"
g.0
"
)
#, slabs=(0, 1))
knl
=
lp
.
tag_dimensions
(
knl
,
dict
(
i
=
"
l.0
"
,
j
=
"
l.1
"
))
kernel_gen
=
lp
.
generate_loop_schedules
(
knl
)
kernel_gen
=
lp
.
check_kernels
(
kernel_gen
,
dict
(
K
=
1000
),
kill_level_min
=
5
)
K
=
1000
lp
.
auto_test_vs_seq
(
seq_knl
,
ctx
,
kernel_gen
,
op_count
=
0
,
op_label
=
"
GFlops
"
,
parameters
=
{
"
K
"
:
K
},
print_seq_code
=
True
,)
if
__name__
==
"
__main__
"
:
import
sys
if
len
(
sys
.
argv
)
>
1
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment