Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
sumpy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Andreas Klöckner
sumpy
Commits
2df8cd3b
Commit
2df8cd3b
authored
7 years ago
by
Alexandru Fikl
Browse files
Options
Downloads
Patches
Plain Diff
add block matrix generators
parent
d06c01d5
No related branches found
No related tags found
1 merge request
!73
Add block matrix generators
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
sumpy/p2p.py
+112
-9
112 additions, 9 deletions
sumpy/p2p.py
sumpy/qbx.py
+106
-7
106 additions, 7 deletions
sumpy/qbx.py
test/test_matrixgen.py
+54
-5
54 additions, 5 deletions
test/test_matrixgen.py
with
272 additions
and
21 deletions
sumpy/p2p.py
+
112
−
9
View file @
2df8cd3b
...
...
@@ -120,10 +120,26 @@ class SingleSrcTgtListP2PBase(P2PComputationBase):
shape
=
(
self
.
dim
,
"
nsources
"
)),
lp
.
GlobalArg
(
"
targets
"
,
None
,
shape
=
(
self
.
dim
,
"
ntargets
"
)),
lp
.
ValueArg
(
"
nsources
"
,
None
),
lp
.
ValueArg
(
"
ntargets
"
,
None
)
lp
.
ValueArg
(
"
nsources
"
,
np
.
int32
),
lp
.
ValueArg
(
"
ntargets
"
,
np
.
int32
)
]
def
get_domains
(
self
):
return
[
"
{[itgt]: 0 <= itgt < ntargets}
"
,
"
{[isrc]: 0 <= isrc < nsources}
"
,
"
{[idim]: 0 <= idim < dim}
"
]
def
get_loop_begin
(
self
):
return
[
"
for itgt, isrc
"
]
def
get_loop_end
(
self
):
return
[
"
end
"
]
def
get_assumptions
(
self
):
return
"
nsources>=1 and ntargets>=1
"
def
get_kernel
(
self
):
loopy_insns
,
result_names
=
self
.
get_loopy_insns_and_result_names
()
...
...
@@ -146,25 +162,24 @@ class SingleSrcTgtListP2PBase(P2PComputationBase):
+
gather_loopy_source_arguments
(
self
.
kernels
))
loopy_knl
=
lp
.
make_kernel
(
"
{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources
\
and 0<=idim<dim}
"
,
self
.
get_domains
(),
self
.
get_kernel_scaling_assignments
()
+
[
"
for itgt, isrc
"
]
+
loopy_insns
+
self
.
get_loop_begin
()
+
[
"
<> d[idim] = targets[idim,itgt] - sources[idim,isrc]
"
]
+
[
"
<> is_self = (isrc == target_to_source[itgt])
"
if
self
.
exclude_self
else
""
]
+
loopy_insns
+
[
lp
.
Assignment
(
id
=
None
,
assignee
=
"
pair_result_%d
"
%
i
,
expression
=
expr
,
temp_var_type
=
lp
.
auto
)
for
i
,
expr
in
enumerate
(
exprs
)
]
+
[
"
end
"
]
+
self
.
get_loop_
end
()
+
self
.
get_result_store_instructions
(),
arguments
,
assumptions
=
self
.
get_assumptions
(),
name
=
self
.
name
,
assumptions
=
"
nsources>=1 and ntargets>=1
"
,
fixed_parameters
=
dict
(
dim
=
self
.
dim
,
nstrengths
=
self
.
strength_count
,
...
...
@@ -186,7 +201,8 @@ class SingleSrcTgtListP2PBase(P2PComputationBase):
if
targets_is_obj_array
:
knl
=
lp
.
tag_array_axes
(
knl
,
"
targets
"
,
"
sep,C
"
)
knl
=
lp
.
split_iname
(
knl
,
"
itgt
"
,
1024
,
outer_tag
=
"
g.0
"
)
# FIXME: how to split when using blocks?
# knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0")
return
knl
...
...
@@ -266,6 +282,93 @@ class P2PMatrixGenerator(SingleSrcTgtListP2PBase):
# }}}
# {{{
class
P2PMatrixBlockGenerator
(
SingleSrcTgtListP2PBase
):
default_name
=
"
p2p_block
"
def
get_src_tgt_arguments
(
self
):
return
super
(
P2PMatrixBlockGenerator
,
self
).
get_src_tgt_arguments
()
\
+
[
lp
.
GlobalArg
(
"
srcindices
"
,
None
,
shape
=
"
nsrcindices
"
),
lp
.
GlobalArg
(
"
tgtindices
"
,
None
,
shape
=
"
ntgtindices
"
),
lp
.
GlobalArg
(
"
srcranges
"
,
None
,
shape
=
"
nranges
"
),
lp
.
GlobalArg
(
"
tgtranges
"
,
None
,
shape
=
"
nranges
"
),
lp
.
ValueArg
(
"
nsrcindices
"
,
np
.
int32
),
lp
.
ValueArg
(
"
ntgtindices
"
,
np
.
int32
),
lp
.
ValueArg
(
"
nranges
"
,
None
)
]
def
get_domains
(
self
):
# FIXME: this doesn't work when separating j and k
return
[
"
{[i]: 0 <= i < nranges - 1}
"
,
"
{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}
"
,
"
{[idim]: 0 <= idim < dim}
"
]
def
get_loop_begin
(
self
):
return
[
"""
for i
<> tgtstart = tgtranges[i]
<> tgtend = tgtranges[i + 1]
<> tgt_length = tgtend - tgtstart
<> srcstart = srcranges[i]
<> srcend = srcranges[i + 1]
<> src_length = srcend - srcstart
for j, k
<> itgt = tgtindices[tgtstart + j]
<> isrc = srcindices[srcstart + k]
"""
]
def
get_loop_end
(
self
):
return
[
"""
end
end
"""
]
def
get_strength_or_not
(
self
,
isrc
,
kernel_idx
):
return
1
def
get_input_and_output_arguments
(
self
):
return
[
lp
.
GlobalArg
(
"
result_%d
"
%
i
,
dtype
,
shape
=
"
ntgtindices,nsrcindices
"
)
for
i
,
dtype
in
enumerate
(
self
.
value_dtypes
)
]
def
get_result_store_instructions
(
self
):
# FIXME: doesn't work without inames=i. check how the loops are nested!
return
[
"""
result_{i}[tgtstart + j, srcstart + k] =
\
knl_{i}_scaling * pair_result_{i} {{inames=i}}
"""
.
format
(
i
=
iknl
)
for
iknl
in
range
(
len
(
self
.
kernels
))
]
def
get_assumptions
(
self
):
return
"
nranges>=2
"
def
__call__
(
self
,
queue
,
targets
,
sources
,
tgtindices
,
srcindices
,
tgtranges
,
srcranges
,
**
kwargs
):
from
pytools.obj_array
import
is_obj_array
knl
=
self
.
get_optimized_kernel
(
targets_is_obj_array
=
(
is_obj_array
(
targets
)
or
isinstance
(
targets
,
(
tuple
,
list
))),
sources_is_obj_array
=
(
is_obj_array
(
sources
)
or
isinstance
(
sources
,
(
tuple
,
list
))))
return
knl
(
queue
,
targets
=
targets
,
sources
=
sources
,
tgtindices
=
tgtindices
,
srcindices
=
srcindices
,
tgtranges
=
tgtranges
,
srcranges
=
srcranges
,
**
kwargs
)
# }}}
# {{{ P2P from CSR-like interaction list
class
P2PFromCSR
(
P2PComputationBase
):
...
...
This diff is collapsed.
Click to expand it.
sumpy/qbx.py
+
106
−
7
View file @
2df8cd3b
...
...
@@ -116,10 +116,26 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
lp
.
GlobalArg
(
"
center
"
,
None
,
shape
=
(
self
.
dim
,
"
ntargets
"
),
order
=
"
C
"
),
lp
.
GlobalArg
(
"
expansion_radii
"
,
None
,
shape
=
"
ntargets
"
),
lp
.
ValueArg
(
"
nsources
"
,
None
),
lp
.
ValueArg
(
"
ntargets
"
,
None
),
lp
.
ValueArg
(
"
nsources
"
,
np
.
int32
),
lp
.
ValueArg
(
"
ntargets
"
,
np
.
int32
),
]
def
get_domains
(
self
):
return
[
"
{[itgt]: 0 <= itgt < ntargets}
"
,
"
{[isrc]: 0 <= isrc < nsources}
"
,
"
{[idim]: 0 <= idim < dim}
"
]
def
get_loop_begin
(
self
):
return
[
"
for itgt, isrc
"
]
def
get_loop_end
(
self
):
return
[
"
end
"
]
def
get_assumptions
(
self
):
return
"
nsources>=1 and ntargets>=1
"
@memoize_method
def
get_kernel
(
self
):
from
sumpy.symbolic
import
make_sym_vector
...
...
@@ -162,10 +178,9 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
+
gather_loopy_source_arguments
(
self
.
kernels
))
loopy_knl
=
lp
.
make_kernel
(
"
{[isrc,itgt,idim]: 0<=itgt<ntargets and 0<=isrc<nsources
"
"
and 0<=idim<dim}
"
,
self
.
get_domains
(),
self
.
get_kernel_scaling_assignments
()
+
[
"
for itgt, isrc
"
]
+
self
.
get_loop_begin
()
+
[
self
.
get_compute_a_and_b_vecs
()]
+
loopy_insns
+
[
...
...
@@ -174,11 +189,11 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
temp_var_type
=
lp
.
auto
)
for
i
,
(
expr
,
dtype
)
in
enumerate
(
zip
(
exprs
,
self
.
value_dtypes
))
]
+
[
"
end
"
]
+
self
.
get_loop_
end
()
+
self
.
get_result_store_instructions
(),
arguments
,
name
=
self
.
name
,
assumptions
=
"
nsources>=1 and ntargets>=1
"
,
assumptions
=
self
.
get_assumptions
()
,
default_offset
=
lp
.
auto
,
silenced_warnings
=
"
write_race(write_lpot*)
"
,
fixed_parameters
=
dict
(
dim
=
self
.
dim
),
...
...
@@ -197,7 +212,9 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
def
get_optimized_kernel
(
self
):
# FIXME specialize/tune for GPU/CPU
loopy_knl
=
self
.
get_kernel
()
return
loopy_knl
# FIXME: how to tune for blocks?
import
pyopencl
as
cl
dev
=
self
.
context
.
devices
[
0
]
if
dev
.
type
&
cl
.
device_type
.
CPU
:
...
...
@@ -297,6 +314,88 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
# }}}
# {{{
class
LayerPotentialMatrixBlockGenerator
(
LayerPotentialBase
):
default_name
=
"
qbx_block
"
def
get_src_tgt_arguments
(
self
):
return
LayerPotentialBase
.
get_src_tgt_arguments
(
self
)
\
+
[
lp
.
GlobalArg
(
"
srcindices
"
,
None
,
shape
=
"
nsrcindices
"
),
lp
.
GlobalArg
(
"
tgtindices
"
,
None
,
shape
=
"
ntgtindices
"
),
lp
.
GlobalArg
(
"
srcranges
"
,
None
,
shape
=
"
nranges
"
),
lp
.
GlobalArg
(
"
tgtranges
"
,
None
,
shape
=
"
nranges
"
),
lp
.
ValueArg
(
"
nsrcindices
"
,
np
.
int32
),
lp
.
ValueArg
(
"
ntgtindices
"
,
np
.
int32
),
lp
.
ValueArg
(
"
nranges
"
,
None
)
]
def
get_domains
(
self
):
# FIXME: this doesn't work when separating j and k
return
[
"
{[i]: 0 <= i < nranges - 1}
"
,
"
{[j, k]: 0 <= j < tgt_length and 0 <= k < src_length}
"
,
"
{[idim]: 0 <= idim < dim}
"
]
def
get_loop_begin
(
self
):
return
[
"""
for i
<> tgtstart = tgtranges[i]
<> tgtend = tgtranges[i + 1]
<> tgt_length = tgtend - tgtstart
<> srcstart = srcranges[i]
<> srcend = srcranges[i + 1]
<> src_length = srcend - srcstart
for j, k
<> itgt = tgtindices[tgtstart + j]
<> isrc = srcindices[srcstart + k]
"""
]
def
get_loop_end
(
self
):
return
[
"""
end
end
"""
]
def
get_strength_or_not
(
self
,
isrc
,
kernel_idx
):
return
1
def
get_input_and_output_arguments
(
self
):
return
[
lp
.
GlobalArg
(
"
result_%d
"
%
i
,
dtype
,
shape
=
"
ntgtindices,nsrcindices
"
)
for
i
,
dtype
in
enumerate
(
self
.
value_dtypes
)
]
def
get_result_store_instructions
(
self
):
return
[
"""
result_KNLIDX[tgtstart + j, srcstart + k] =
\
knl_KNLIDX_scaling*pair_result_KNLIDX {inames=i}
"""
.
replace
(
"
KNLIDX
"
,
str
(
iknl
))
for
iknl
in
range
(
len
(
self
.
expansions
))
]
def
get_assumptions
(
self
):
return
"
nranges>=2
"
def
__call__
(
self
,
queue
,
targets
,
sources
,
centers
,
expansion_radii
,
tgtindices
,
srcindices
,
tgtranges
,
srcranges
,
**
kwargs
):
knl
=
self
.
get_optimized_kernel
()
return
knl
(
queue
,
src
=
sources
,
tgt
=
targets
,
center
=
centers
,
expansion_radii
=
expansion_radii
,
tgtindices
=
tgtindices
,
srcindices
=
srcindices
,
tgtranges
=
tgtranges
,
srcranges
=
srcranges
,
**
kwargs
)
# }}}
# }}}
...
...
This diff is collapsed.
Click to expand it.
test/test_matrixgen.py
+
54
−
5
View file @
2df8cd3b
...
...
@@ -73,15 +73,19 @@ def test_qbx_direct(ctx_getter):
from
sumpy.kernel
import
LaplaceKernel
lknl
=
LaplaceKernel
(
2
)
nblks
=
10
order
=
12
mode_nr
=
25
from
sumpy.qbx
import
LayerPotential
from
sumpy.qbx
import
LayerPotentialMatrixGenerator
from
sumpy.qbx
import
LayerPotentialMatrixBlockGenerator
from
sumpy.expansion.local
import
LineTaylorLocalExpansion
lpot
=
LayerPotential
(
ctx
,
[
LineTaylorLocalExpansion
(
lknl
,
order
)])
mat_gen
=
LayerPotentialMatrixGenerator
(
ctx
,
[
LineTaylorLocalExpansion
(
lknl
,
order
)])
lpot
=
LayerPotential
(
ctx
,
[
LineTaylorLocalExpansion
(
lknl
,
order
)])
blk_gen
=
LayerPotentialMatrixBlockGenerator
(
ctx
,
[
LineTaylorLocalExpansion
(
lknl
,
order
)])
for
n
in
[
200
,
300
,
400
]:
targets
,
sources
,
centers
,
sigma
,
expansion_radii
=
\
...
...
@@ -90,16 +94,37 @@ def test_qbx_direct(ctx_getter):
h
=
2
*
np
.
pi
/
n
strengths
=
(
sigma
*
h
,)
tgtindices
=
np
.
arange
(
0
,
n
)
tgtindices
=
np
.
random
.
choice
(
tgtindices
,
size
=
int
(
0.8
*
n
))
tgtranges
=
np
.
arange
(
0
,
tgtindices
.
shape
[
0
]
+
1
,
tgtindices
.
shape
[
0
]
//
nblks
)
srcindices
=
np
.
arange
(
0
,
n
)
srcindices
=
np
.
random
.
choice
(
srcindices
,
size
=
int
(
0.8
*
n
))
srcranges
=
np
.
arange
(
0
,
srcindices
.
shape
[
0
]
+
1
,
srcindices
.
shape
[
0
]
//
nblks
)
assert
tgtranges
.
shape
==
srcranges
.
shape
_
,
(
mat
,)
=
mat_gen
(
queue
,
targets
,
sources
,
centers
,
expansion_radii
=
expansion_radii
)
expansion_radii
)
result_mat
=
mat
.
dot
(
strengths
[
0
])
_
,
(
result_lpot
,)
=
lpot
(
queue
,
targets
,
sources
,
centers
,
strengths
,
expansion_radii
=
expansion_radii
)
expansion_radii
)
eps
=
1.0e-10
*
la
.
norm
(
result_lpot
)
assert
la
.
norm
(
result_mat
-
result_lpot
)
<
eps
_
,
(
blk
,)
=
blk_gen
(
queue
,
targets
,
sources
,
centers
,
expansion_radii
,
tgtindices
,
srcindices
,
tgtranges
,
srcranges
)
for
i
in
range
(
srcranges
.
shape
[
0
]
-
1
):
itgt
=
np
.
s_
[
tgtranges
[
i
]:
tgtranges
[
i
+
1
]]
isrc
=
np
.
s_
[
srcranges
[
i
]:
srcranges
[
i
+
1
]]
block
=
np
.
ix_
(
tgtindices
[
itgt
],
srcindices
[
isrc
])
eps
=
1.0e-10
*
la
.
norm
(
mat
[
block
])
assert
la
.
norm
(
blk
[
itgt
,
isrc
]
-
mat
[
block
])
<
eps
@pytest.mark.parametrize
(
"
exclude_self
"
,
[
True
,
False
])
def
test_p2p_direct
(
ctx_getter
,
exclude_self
):
...
...
@@ -112,12 +137,14 @@ def test_p2p_direct(ctx_getter, exclude_self):
from
sumpy.kernel
import
LaplaceKernel
lknl
=
LaplaceKernel
(
2
)
nblks
=
10
mode_nr
=
25
from
sumpy.p2p
import
P2P
from
sumpy.p2p
import
P2PMatrixGenerator
mat_gen
=
P2PMatrixGenerator
(
ctx
,
[
lknl
],
exclude_self
=
exclude_self
)
from
sumpy.p2p
import
P2PMatrixGenerator
,
P2PMatrixBlockGenerator
lpot
=
P2P
(
ctx
,
[
lknl
],
exclude_self
=
exclude_self
)
mat_gen
=
P2PMatrixGenerator
(
ctx
,
[
lknl
],
exclude_self
=
exclude_self
)
blk_gen
=
P2PMatrixBlockGenerator
(
ctx
,
[
lknl
],
exclude_self
=
exclude_self
)
for
n
in
[
200
,
300
,
400
]:
targets
,
sources
,
_
,
sigma
,
_
=
\
...
...
@@ -126,6 +153,16 @@ def test_p2p_direct(ctx_getter, exclude_self):
h
=
2
*
np
.
pi
/
n
strengths
=
(
sigma
*
h
,)
tgtindices
=
np
.
arange
(
0
,
n
)
tgtindices
=
np
.
random
.
choice
(
tgtindices
,
size
=
int
(
0.8
*
n
))
tgtranges
=
np
.
arange
(
0
,
tgtindices
.
shape
[
0
]
+
1
,
tgtindices
.
shape
[
0
]
//
nblks
)
srcindices
=
np
.
arange
(
0
,
n
)
srcindices
=
np
.
random
.
choice
(
srcindices
,
size
=
int
(
0.8
*
n
))
srcranges
=
np
.
arange
(
0
,
srcindices
.
shape
[
0
]
+
1
,
srcindices
.
shape
[
0
]
//
nblks
)
assert
tgtranges
.
shape
==
srcranges
.
shape
extra_kwargs
=
{}
if
exclude_self
:
extra_kwargs
[
"
target_to_source
"
]
=
np
.
arange
(
n
,
dtype
=
np
.
int32
)
...
...
@@ -139,6 +176,18 @@ def test_p2p_direct(ctx_getter, exclude_self):
eps
=
1.0e-10
*
la
.
norm
(
result_lpot
)
assert
la
.
norm
(
result_mat
-
result_lpot
)
<
eps
_
,
(
blk
,)
=
blk_gen
(
queue
,
targets
,
sources
,
tgtindices
,
srcindices
,
tgtranges
,
srcranges
,
**
extra_kwargs
)
for
i
in
range
(
srcranges
.
shape
[
0
]
-
1
):
itgt
=
np
.
s_
[
tgtranges
[
i
]:
tgtranges
[
i
+
1
]]
isrc
=
np
.
s_
[
srcranges
[
i
]:
srcranges
[
i
+
1
]]
block
=
np
.
ix_
(
tgtindices
[
itgt
],
srcindices
[
isrc
])
eps
=
1.0e-10
*
la
.
norm
(
mat
[
block
])
assert
la
.
norm
(
blk
[
itgt
,
isrc
]
-
mat
[
block
])
<
eps
# You can test individual routines by typing
# $ python test_kernels.py 'test_p2p(cl.create_some_context)'
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment