Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mattwala/sumpy
  • isuruf/sumpy
  • xywei/sumpy
  • inducer/sumpy
  • fikl2/sumpy
  • ben_sepanski/sumpy
6 results
Show changes
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = """
Copyright (C) 2012 Andreas Kloeckner
......@@ -25,15 +26,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import six
from six.moves import range
import numpy as np
import loopy as lp
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
from pymbolic import var
from sumpy.tools import KernelComputation, KernelCacheWrapper
from sumpy.codegen import register_optimization_preambles
from sumpy.tools import KernelCacheMixin, KernelComputation, is_obj_array_like
__doc__ = """
......@@ -44,7 +43,7 @@ Particle-to-particle
.. autoclass:: P2PBase
.. autoclass:: P2P
.. autoclass:: P2PMatrixGenerator
.. autoclass:: P2PMatrixBlockGenerator
.. autoclass:: P2PMatrixSubsetGenerator
.. autoclass:: P2PFromCSR
"""
......@@ -55,78 +54,118 @@ Particle-to-particle
# {{{ p2p base class
class P2PBase(KernelComputation, KernelCacheWrapper):
def __init__(self, ctx, kernels, exclude_self, strength_usage=None,
value_dtypes=None,
options=[], name=None, device=None):
class P2PBase(KernelCacheMixin, KernelComputation):
def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None,
value_dtypes=None, name=None, device=None, source_kernels=None):
"""
:arg kernels: list of :class:`sumpy.kernel.Kernel` instances
:arg target_kernels: list of :class:`sumpy.kernel.Kernel` instances
with only target derivatives.
:arg source_kernels: list of :class:`sumpy.kernel.Kernel` instances
with only source derivatives.
:arg strength_usage: A list of integers indicating which expression
uses which source strength indicator. This implicitly specifies the
number of strength arrays that need to be passed.
Default: all kernels use the same strength.
"""
KernelComputation.__init__(self, ctx, kernels, strength_usage,
value_dtypes,
name, options, device)
from pytools import single_valued
from sumpy.kernel import (
SourceTransformationRemover,
TargetTransformationRemover,
)
txr = TargetTransformationRemover()
sxr = SourceTransformationRemover()
if source_kernels is None:
source_kernels = [single_valued(txr(knl) for knl in target_kernels)]
target_kernels = [sxr(knl) for knl in target_kernels]
else:
for knl in source_kernels:
assert txr(knl) == knl
for knl in target_kernels:
assert sxr(knl) == knl
base_source_kernel = single_valued(sxr(knl) for knl in source_kernels)
base_target_kernel = single_valued(txr(knl) for knl in target_kernels)
assert base_source_kernel == base_target_kernel
KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels,
source_kernels=source_kernels, strength_usage=strength_usage,
value_dtypes=value_dtypes, name=name, device=device)
import pyopencl as cl
self.is_gpu = not (self.device.type & cl.device_type.CPU)
self.exclude_self = exclude_self
from pytools import single_valued
self.dim = single_valued(knl.dim for knl in self.kernels)
self.dim = single_valued(knl.dim for knl in
list(self.target_kernels) + list(self.source_kernels))
def get_cache_key(self):
return (type(self).__name__, tuple(self.kernels), self.exclude_self,
tuple(self.strength_usage), tuple(self.value_dtypes))
return (type(self).__name__, tuple(self.target_kernels), self.exclude_self,
tuple(self.strength_usage), tuple(self.value_dtypes),
tuple(self.source_kernels),
self.device.hashable_model_and_version_identifier)
def get_loopy_insns_and_result_names(self):
from pymbolic import var
from sumpy.symbolic import make_sym_vector
dvec = make_sym_vector("d", self.dim)
from sumpy.assignment_collection import SymbolicAssignmentCollection
sac = SymbolicAssignmentCollection()
isrc_sym = var("isrc")
exprs = []
for out_knl in self.target_kernels:
expr_sum = 0
for j, in_knl in enumerate(self.source_kernels):
expr = in_knl.postprocess_at_source(
in_knl.get_expression(dvec),
dvec)
expr *= self.get_strength_or_not(isrc_sym, j)
expr_sum += expr
expr_sum = out_knl.postprocess_at_target(expr_sum, dvec)
exprs.append(expr_sum)
result_name_prefix = "pair_result_tmp" if self.exclude_self else "pair_result"
result_names = [
sac.assign_unique("knl%d" % i,
knl.postprocess_at_target(
knl.postprocess_at_source(
knl.get_expression(dvec),
dvec),
dvec)
)
for i, knl in enumerate(self.kernels)]
sac.add_assignment(f"{result_name_prefix}_{i}", expr)
for i, expr in enumerate(exprs)
]
sac.run_global_cse()
from sumpy.codegen import to_loopy_insns
loopy_insns = to_loopy_insns(six.iteritems(sac.assignments),
vector_names=set(["d"]),
pymbolic_expr_maps=[
knl.get_code_transformer() for knl in self.kernels],
loopy_insns = to_loopy_insns(sac.assignments.items(),
vector_names={"d"},
pymbolic_expr_maps=(
[knl.get_code_transformer() for knl in self.source_kernels]
+ [knl.get_code_transformer() for knl in self.target_kernels]),
retain_names=result_names,
complex_dtype=np.complex128 # FIXME
)
return loopy_insns, result_names
def get_strength_or_not(self, isrc, kernel_idx):
return var("strength").index((self.strength_usage[kernel_idx], isrc))
def get_kernel_exprs(self, result_names):
from pymbolic import var
isrc_sym = var("isrc")
exprs = [var(name) * self.get_strength_or_not(isrc_sym, i)
for i, name in enumerate(result_names)]
if self.exclude_self:
assignments = [lp.Assignment(id=None,
assignee=f"pair_result_{i}", expression=var(name),
temp_var_type=lp.Optional(None))
for i, name in enumerate(result_names)]
from pymbolic.primitives import If, Variable
exprs = [If(Variable("is_self"), 0, expr) for expr in exprs]
assignments = [assign.copy(expression=If(Variable("is_self"), 0,
assign.expression)) for assign in assignments]
else:
assignments = []
return [lp.Assignment(id=None,
assignee="pair_result_%d" % i, expression=expr,
temp_var_type=lp.Optional(None))
for i, expr in enumerate(exprs)]
return assignments + loopy_insns, result_names
def get_strength_or_not(self, isrc, kernel_idx):
from sumpy.symbolic import Symbol
return Symbol(f"strength_{self.strength_usage[kernel_idx]}")
def get_default_src_tgt_arguments(self):
from sumpy.tools import gather_loopy_source_arguments
......@@ -139,10 +178,7 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
lp.ValueArg("ntargets", None)]
+ ([lp.GlobalArg("target_to_source", None, shape=("ntargets",))]
if self.exclude_self else [])
+ gather_loopy_source_arguments(self.kernels))
def get_kernel(self):
raise NotImplementedError
+ gather_loopy_source_arguments(self.source_kernels))
def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
# FIXME
......@@ -154,6 +190,12 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
knl = lp.tag_array_axes(knl, "targets", "sep,C")
knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0")
knl = self._allow_redundant_execution_of_knl_scaling(knl)
knl = lp.set_options(knl,
enforce_variable_access_ordered="no_check")
knl = register_optimization_preambles(knl, self.device)
return knl
......@@ -165,19 +207,19 @@ class P2PBase(KernelComputation, KernelCacheWrapper):
class P2P(P2PBase):
"""Direct applier for P2P interactions."""
default_name = "p2p_apply"
@property
def default_name(self):
return "p2p_apply"
def get_kernel(self):
loopy_insns, result_names = self.get_loopy_insns_and_result_names()
kernel_exprs = self.get_kernel_exprs(result_names)
arguments = (
self.get_default_src_tgt_arguments()
+ [
loopy_insns, _result_names = self.get_loopy_insns_and_result_names()
arguments = [
*self.get_default_src_tgt_arguments(),
lp.GlobalArg("strength", None,
shape="nstrengths, nsources", dim_tags="sep,C"),
lp.GlobalArg("result", None,
shape="nresults, ntargets", dim_tags="sep,C")
])
]
loopy_knl = lp.make_kernel(["""
{[itgt, isrc, idim]: \
......@@ -190,36 +232,35 @@ class P2P(P2PBase):
+ ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"]
+ ["<> is_self = (isrc == target_to_source[itgt])"
if self.exclude_self else ""]
+ loopy_insns + kernel_exprs
+ ["""
result[{i}, itgt] = knl_{i}_scaling * \
simul_reduce(sum, isrc, pair_result_{i}) {{inames=itgt}}
""".format(i=iknl)
for iknl in range(len(self.kernels))]
+ [f"<> strength_{i} = strength[{i}, isrc]" for
i in set(self.strength_usage)]
+ loopy_insns
+ [f"""
result[{iknl}, itgt] = knl_{iknl}_scaling * \
simul_reduce(sum, isrc, pair_result_{iknl}) {{inames=itgt}}
""" for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
assumptions="nsources>=1 and ntargets>=1",
name=self.name,
fixed_parameters=dict(
dim=self.dim,
nstrengths=self.strength_count,
nresults=len(self.kernels)),
default_offset=lp.auto,
fixed_parameters={
"dim": self.dim,
"nstrengths": self.strength_count,
"nresults": len(self.target_kernels)},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
for knl in self.kernels:
for knl in self.target_kernels + self.source_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
def __call__(self, queue, targets, sources, strength, **kwargs):
from pytools.obj_array import is_obj_array
knl = self.get_cached_optimized_kernel(
targets_is_obj_array=(
is_obj_array(targets) or isinstance(targets, (tuple, list))),
sources_is_obj_array=(
is_obj_array(sources) or isinstance(sources, (tuple, list))))
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources))
return knl(queue, sources=sources, targets=targets, strength=strength,
**kwargs)
......@@ -232,19 +273,21 @@ class P2P(P2PBase):
class P2PMatrixGenerator(P2PBase):
"""Generator for P2P interaction matrix entries."""
default_name = "p2p_matrix"
@property
def default_name(self):
return "p2p_matrix"
def get_strength_or_not(self, isrc, kernel_idx):
return 1
def get_kernel(self):
loopy_insns, result_names = self.get_loopy_insns_and_result_names()
kernel_exprs = self.get_kernel_exprs(result_names)
loopy_insns, _result_names = self.get_loopy_insns_and_result_names()
arguments = (
self.get_default_src_tgt_arguments()
+ [lp.GlobalArg("result_%d" % i, dtype,
shape="ntargets,nsources")
for i, dtype in enumerate(self.value_dtypes)])
+ [
lp.GlobalArg(f"result_{i}", dtype, shape="ntargets,nsources")
for i, dtype in enumerate(self.value_dtypes)
])
loopy_knl = lp.make_kernel(["""
{[itgt, isrc, idim]: \
......@@ -257,55 +300,55 @@ class P2PMatrixGenerator(P2PBase):
+ ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"]
+ ["<> is_self = (isrc == target_to_source[itgt])"
if self.exclude_self else ""]
+ loopy_insns + kernel_exprs
+ ["""
result_{i}[itgt, isrc] = \
knl_{i}_scaling * pair_result_{i} {{inames=isrc:itgt}}
""".format(i=iknl)
for iknl in range(len(self.kernels))]
+ loopy_insns
+ [f"""
result_{iknl}[itgt, isrc] = knl_{iknl}_scaling \
* pair_result_{iknl} {{inames=isrc:itgt}}
""" for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
assumptions="nsources>=1 and ntargets>=1",
name=self.name,
fixed_parameters=dict(dim=self.dim),
fixed_parameters={"dim": self.dim},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
for knl in self.kernels:
for knl in self.target_kernels + self.source_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
def __call__(self, queue, targets, sources, **kwargs):
from pytools.obj_array import is_obj_array
knl = self.get_cached_optimized_kernel(
targets_is_obj_array=(
is_obj_array(targets) or isinstance(targets, (tuple, list))),
sources_is_obj_array=(
is_obj_array(sources) or isinstance(sources, (tuple, list))))
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources))
return knl(queue, sources=sources, targets=targets, **kwargs)
# }}}
# {{{ P2P matrix block writer
# {{{ P2P matrix subset generator
class P2PMatrixBlockGenerator(P2PBase):
class P2PMatrixSubsetGenerator(P2PBase):
"""Generator for a subset of P2P interaction matrix entries.
This generator evaluates a generic set of entries in the matrix. See
:class:`P2PFromCSR` for when a compressed row storage format is available.
.. automethod:: __call__
"""
default_name = "p2p_block"
@property
def default_name(self):
return "p2p_subset"
def get_strength_or_not(self, isrc, kernel_idx):
return 1
def get_kernel(self):
loopy_insns, result_names = self.get_loopy_insns_and_result_names()
kernel_exprs = self.get_kernel_exprs(result_names)
loopy_insns, _result_names = self.get_loopy_insns_and_result_names()
arguments = (
self.get_default_src_tgt_arguments()
+ [
......@@ -313,8 +356,10 @@ class P2PMatrixBlockGenerator(P2PBase):
lp.GlobalArg("tgtindices", None, shape="nresult"),
lp.ValueArg("nresult", None)
]
+ [lp.GlobalArg("result_%d" % i, dtype, shape="nresult")
for i, dtype in enumerate(self.value_dtypes)])
+ [
lp.GlobalArg(f"result_{i}", dtype, shape="nresult")
for i, dtype in enumerate(self.value_dtypes)
])
loopy_knl = lp.make_kernel(
"{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}",
......@@ -332,26 +377,25 @@ class P2PMatrixBlockGenerator(P2PBase):
+ ["""
<> is_self = (isrc == target_to_source[itgt])
""" if self.exclude_self else ""]
+ loopy_insns + kernel_exprs
+ ["""
result_{i}[imat] = \
knl_{i}_scaling * pair_result_{i} \
+ loopy_insns
+ [f"""
result_{iknl}[imat] = \
knl_{iknl}_scaling * pair_result_{iknl} \
{{id_prefix=write_p2p}}
""".format(i=iknl)
for iknl in range(len(self.kernels))]
""" for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
assumptions="nresult>=1",
silenced_warnings="write_race(write_p2p*)",
name=self.name,
fixed_parameters=dict(dim=self.dim),
fixed_parameters={"dim": self.dim},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.add_dtypes(loopy_knl,
dict(nsources=np.int32, ntargets=np.int32))
loopy_knl = lp.add_dtypes(
loopy_knl, {"nsources": np.int32, "ntargets": np.int32})
for knl in self.kernels:
for knl in self.target_kernels + self.source_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
......@@ -366,42 +410,38 @@ class P2PMatrixBlockGenerator(P2PBase):
knl = lp.tag_array_axes(knl, "targets", "sep,C")
knl = lp.split_iname(knl, "imat", 1024, outer_tag="g.0")
return knl
knl = self._allow_redundant_execution_of_knl_scaling(knl)
knl = lp.set_options(knl,
enforce_variable_access_ordered="no_check")
knl = register_optimization_preambles(knl, self.device)
def __call__(self, queue, targets, sources, index_set, **kwargs):
"""Construct a set of blocks of the full P2P interaction matrix.
The blocks are returned as one-dimensional arrays, for performance
and storage reasons. If the two-dimensional form is desired, it can
be obtained using the information in the `index_set` for a block
:math:`i` in the following way:
return knl
.. code-block:: python
def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs):
"""Evaluate a subset of the P2P matrix interactions.
blkranges = index_set.linear_ranges()
blkshape = index_set.block_shape(i)
:arg targets: target point coordinates, which can be an object
:class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of
coordinates or a single stacked array.
:arg sources: source point coordinates, which can also be in any of the
formats of the *targets*,
block2d = result[blkranges[i]:blkranges[i + 1]].reshape(*blkshape)
:arg srcindices: an array of indices into *sources*.
:arg tgtindices: an array of indices into *targets*, of the same size
as *srcindices*.
:arg targets: target point coordinates.
:arg sources: source point coordinates.
:arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
to define the blocks.
:return: a tuple of one-dimensional arrays of kernel evaluations at
target-source pairs described by `index_set`.
:returns: a one-dimensional array of interactions, for each index pair
in (*srcindices*, *tgtindices*)
"""
from pytools.obj_array import is_obj_array
knl = self.get_cached_optimized_kernel(
targets_is_obj_array=(
is_obj_array(targets) or isinstance(targets, (tuple, list))),
sources_is_obj_array=(
is_obj_array(sources) or isinstance(sources, (tuple, list))))
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources))
return knl(queue,
targets=targets,
sources=sources,
tgtindices=index_set.linear_row_indices,
srcindices=index_set.linear_col_indices, **kwargs)
tgtindices=tgtindices,
srcindices=srcindices, **kwargs)
# }}}
......@@ -409,14 +449,15 @@ class P2PMatrixBlockGenerator(P2PBase):
# {{{ P2P from CSR-like interaction list
class P2PFromCSR(P2PBase):
default_name = "p2p_from_csr"
def get_kernel(self):
loopy_insns, result_names = self.get_loopy_insns_and_result_names()
kernel_exprs = self.get_kernel_exprs(result_names)
arguments = (
self.get_default_src_tgt_arguments()
+ [
@property
def default_name(self):
return "p2p_from_csr"
def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box,
work_items_per_group=32):
loopy_insns, _result_names = self.get_loopy_insns_and_result_names()
arguments = [
*self.get_default_src_tgt_arguments(),
lp.GlobalArg("box_target_starts",
None, shape=None),
lp.GlobalArg("box_target_counts_nonchild",
......@@ -432,20 +473,121 @@ class P2PFromCSR(P2PBase):
lp.GlobalArg("strength", None,
shape="nstrengths, nsources", dim_tags="sep,C"),
lp.GlobalArg("result", None,
shape="nkernels, ntargets", dim_tags="sep,C"),
shape="noutputs, ntargets", dim_tags="sep,C"),
lp.TemporaryVariable("tgt_center", shape=(self.dim,)),
"..."
])
]
loopy_knl = lp.make_kernel([
domains = [
"{[itgt_box]: 0 <= itgt_box < ntgt_boxes}",
"{[iknl]: 0 <= iknl < noutputs}",
"{[isrc_box]: isrc_box_start <= isrc_box < isrc_box_end}",
"{[itgt, isrc, idim]: \
itgt_start <= itgt < itgt_end and \
isrc_start <= isrc < isrc_end and \
0 <= idim < dim}",
],
self.get_kernel_scaling_assignments()
+ ["""
"{[idim]: 0 <= idim < dim}",
]
tgt_outer_limit = (max_ntargets_in_one_box - 1) // work_items_per_group
if self.is_gpu:
arguments += [
lp.TemporaryVariable("local_isrc",
shape=(self.dim, max_nsources_in_one_box)),
lp.TemporaryVariable("local_isrc_strength",
shape=(self.strength_count, max_nsources_in_one_box)),
]
domains += [
"{[istrength]: 0 <= istrength < nstrengths}",
"{[inner]: 0 <= inner < work_items_per_group}",
"{[itgt_offset_outer]: 0 <= itgt_offset_outer <= tgt_outer_limit}",
"{[isrc_prefetch]: 0 <= isrc_prefetch < max_nsources_in_one_box}",
"{[isrc_offset]: 0 <= isrc_offset < max_nsources_in_one_box"
" and isrc_offset < isrc_end - isrc_start}",
]
else:
domains += [
"{[itgt]: itgt_start <= itgt < itgt_end}",
"{[isrc]: isrc_start <= isrc < isrc_end}"
]
# There are two algorithms here because pocl-pthread 1.9 miscompiles
# the "gpu" kernel with prefetching.
if self.is_gpu:
instructions = (self.get_kernel_scaling_assignments()
+ ["""
for itgt_box
<> tgt_ibox = target_boxes[itgt_box] {id=init_0}
<> itgt_start = box_target_starts[tgt_ibox] {id=init_1}
<> itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] \
{id=init_2}
<> isrc_box_start = source_box_starts[itgt_box] {id=init_3}
<> isrc_box_end = source_box_starts[itgt_box+1] {id=init_4}
for itgt_offset_outer
for inner
<> itgt_offset = itgt_offset_outer * work_items_per_group + inner
<> itgt = itgt_offset + itgt_start
<> cond_itgt = itgt < itgt_end
<> acc[iknl] = 0 {id=init_acc}
if cond_itgt
tgt_center[idim] = targets[idim, itgt] {id=set_tgt,dup=idim}
end
end
for isrc_box
<> src_ibox = source_box_lists[isrc_box] {id=src_box_insn_0}
<> isrc_start = box_source_starts[src_ibox] {id=src_box_insn_1}
<> isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] \
{id=src_box_insn_2}
for isrc_prefetch
<> cond_isrc_prefetch = isrc_prefetch < isrc_end - isrc_start \
{id=cond_isrc_prefetch}
if cond_isrc_prefetch
local_isrc[idim, isrc_prefetch] = sources[idim,
isrc_prefetch + isrc_start] {id=prefetch_src, dup=idim}
local_isrc_strength[istrength, isrc_prefetch] = strength[
istrength, isrc_prefetch + isrc_start] {id=prefetch_charge}
end
end
for inner
if cond_itgt
for isrc_offset
<> isrc = isrc_offset + isrc_start
<> d[idim] = (tgt_center[idim] - local_isrc[idim,
isrc_offset]) \
{id=set_d,dep=prefetch_src:set_tgt}
"""] + ["""
<> is_self = (isrc == target_to_source[itgt])
""" if self.exclude_self else ""]
+ [f"""
<> strength_{i} = local_isrc_strength[{i}, isrc_offset] \
{{id=set_strength{i},dep=prefetch_charge}}
""" for
i in set(self.strength_usage)]
+ loopy_insns
+ [f"""
acc[{iknl}] = acc[{iknl}] + \
pair_result_{iknl} \
{{id=update_acc_{iknl}, dep=init_acc}}
""" for iknl in range(len(self.target_kernels))]
+ ["""
end
end
end
end
"""]
+ [f"""
for inner
if cond_itgt
result[{iknl}, itgt] = knl_{iknl}_scaling * acc[{iknl}] \
{{id_prefix=write_csr,dep=update_acc_{iknl} }}
end
end
""" for iknl in range(len(self.target_kernels))]
+ ["""
end
end
"""])
else:
instructions = (self.get_kernel_scaling_assignments()
+ ["""
for itgt_box
<> tgt_ibox = target_boxes[itgt_box]
<> itgt_start = box_target_starts[tgt_ibox]
......@@ -454,68 +596,178 @@ class P2PFromCSR(P2PBase):
<> isrc_box_start = source_box_starts[itgt_box]
<> isrc_box_end = source_box_starts[itgt_box+1]
for isrc_box
<> src_ibox = source_box_lists[isrc_box]
<> isrc_start = box_source_starts[src_ibox]
<> isrc_end = isrc_start + box_source_counts_nonchild[src_ibox]
for itgt
for itgt
<> acc[iknl] = 0 {id=init_acc}
tgt_center[idim] = targets[idim, itgt] {id=prefetch_tgt,dup=idim}
for isrc_box
<> src_ibox = source_box_lists[isrc_box] {id=src_box_insn_0}
<> isrc_start = box_source_starts[src_ibox] {id=src_box_insn_1}
<> isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] \
{id=src_box_insn_2}
for isrc
<> d[idim] = \
targets[idim, itgt] - sources[idim, isrc] {dup=idim}
"""] + ["""
<> d[idim] = (tgt_center[idim] - sources[idim,
isrc]) {dep=prefetch_tgt}
"""] + ["""
<> is_self = (isrc == target_to_source[itgt])
""" if self.exclude_self else ""]
+ loopy_insns + kernel_exprs
+ [" end"]
+ ["""
result[{i}, itgt] = result[{i}, itgt] + \
knl_{i}_scaling * simul_reduce(sum, isrc, pair_result_{i}) \
{{id_prefix=write_csr}}
""".format(i=iknl)
for iknl in range(len(self.kernels))]
+ ["""
+ [f"<> strength_{i} = strength[{i}, isrc]" for
i in set(self.strength_usage)]
+ loopy_insns
+ [f"""
acc[{iknl}] = acc[{iknl}] + \
pair_result_{iknl} \
{{id=update_acc_{iknl}, dep=init_acc}}
""" for iknl in range(len(self.target_kernels))]
+ ["""
end
end
"""]
+ [f"""
result[{iknl}, itgt] = knl_{iknl}_scaling * acc[{iknl}] \
{{id_prefix=write_csr,dep=update_acc_{iknl} }}
""" for iknl in range(len(self.target_kernels))]
+ ["""
end
end
"""],
"""])
loopy_knl = lp.make_kernel(
domains,
instructions,
arguments,
assumptions="ntgt_boxes>=1",
name=self.name,
silenced_warnings="write_race(write_csr*)",
fixed_parameters=dict(
dim=self.dim,
nstrengths=self.strength_count,
nkernels=len(self.kernels)),
silenced_warnings=["write_race(write_csr*)", "write_race(prefetch_src)",
"write_race(prefetch_charge)"],
fixed_parameters={
"dim": self.dim,
"nstrengths": self.strength_count,
"max_nsources_in_one_box": max_nsources_in_one_box,
"max_ntargets_in_one_box": max_ntargets_in_one_box,
"work_items_per_group": work_items_per_group,
"tgt_outer_limit": tgt_outer_limit,
"noutputs": len(self.target_kernels)},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.add_dtypes(loopy_knl,
dict(nsources=np.int32, ntargets=np.int32))
loopy_knl = lp.add_dtypes(
loopy_knl, {"nsources": np.int32, "ntargets": np.int32})
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.tag_inames(loopy_knl, "istrength*:unr")
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C")
for knl in self.kernels:
for knl in self.target_kernels + self.source_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
def get_optimized_kernel(self):
# FIXME
knl = self.get_kernel()
import pyopencl as cl
dev = self.context.devices[0]
if dev.type & cl.device_type.CPU:
def get_optimized_kernel(self, max_nsources_in_one_box,
max_ntargets_in_one_box, source_dtype, strength_dtype):
if not self.is_gpu:
knl = self.get_kernel(max_nsources_in_one_box,
max_ntargets_in_one_box)
knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0")
knl = self._allow_redundant_execution_of_knl_scaling(knl)
else:
knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0")
dtype_size = np.dtype(strength_dtype).alignment
work_items_per_group = min(256, max_ntargets_in_one_box)
total_local_mem = max_nsources_in_one_box * \
(self.dim + self.strength_count) * dtype_size
# multiplying by 2 here to make sure at least 2 work groups
# can be scheduled at the same time for latency hiding
nprefetch = (2 * total_local_mem - 1) // self.device.local_mem_size + 1
knl = self.get_kernel(max_nsources_in_one_box,
max_ntargets_in_one_box,
work_items_per_group=work_items_per_group)
knl = lp.tag_inames(knl, {"itgt_box": "g.0", "inner": "l.0"})
knl = lp.set_temporary_address_space(knl,
["local_isrc", "local_isrc_strength"], lp.AddressSpace.LOCAL)
local_arrays = ["local_isrc", "local_isrc_strength"]
local_array_isrc_axis = [1, 1]
local_array_sizes = [self.dim, self.strength_count]
local_array_dtypes = [source_dtype, strength_dtype]
# By having a concatenated memory layout of the temporaries
# and marking the first axis as vec, we are transposing the
# the arrays and also making the access of the source
# co-ordinates and the strength for each source a coalesced
# access of 256 bits (assuming double precision) which is
# optimized for NVIDIA GPUs. On an NVIDIA Titan V, this
# optimization led to a 8% speedup in the performance.
if strength_dtype == source_dtype:
knl = lp.concatenate_arrays(knl, local_arrays, "local_isrc")
local_arrays = ["local_isrc"]
local_array_sizes = [self.dim + self.strength_count]
local_array_dtypes = [source_dtype]
# We try to mark the local arrays (sources, strengths)
# as vec for the first dimension
for i, (array_name, array_size, array_dtype) in \
enumerate(zip(local_arrays, local_array_sizes,
local_array_dtypes, strict=True)):
if issubclass(array_dtype.type, np.complexfloating):
# pyopencl does not support complex data type vectors
continue
if array_size in [2, 3, 4, 8, 16]:
knl = lp.tag_array_axes(knl, array_name, "vec,C")
else:
# FIXME: check if CUDA
n = 16 // dtype_size
if n in [1, 2, 4, 8]:
knl = lp.split_array_axis(knl, array_name, 0, n)
knl = lp.tag_array_axes(knl, array_name, "C,vec,C")
local_array_isrc_axis[i] = 2
# We need to split isrc_prefetch and isrc_offset into chunks.
nsources = (max_nsources_in_one_box + nprefetch - 1) // nprefetch
for local_array, axis in zip(local_arrays, local_array_isrc_axis,
strict=True):
knl = lp.split_array_axis(knl, local_array, axis, nsources)
knl = lp.split_iname(knl, "isrc_prefetch", nsources,
outer_iname="iprefetch")
knl = lp.split_iname(knl, "isrc_prefetch_inner", work_items_per_group)
knl = lp.tag_inames(knl, {"isrc_prefetch_inner_inner": "l.0"})
knl = lp.split_iname(knl, "isrc_offset", nsources,
outer_iname="iprefetch")
# After splitting, the temporary array local_isrc need not
# be as large as before. Need to simplify before unprivatizing
knl = lp.simplify_indices(knl)
knl = lp.unprivatize_temporaries_with_inames(knl,
"iprefetch", only_var_names=local_arrays)
knl = lp.add_inames_to_insn(knl,
"inner", "id:init_* or id:*_scaling or id:src_box_insn_*")
knl = lp.add_inames_to_insn(knl, "itgt_box", "id:*_scaling")
# knl = lp.set_options(knl, write_code=True)
knl = lp.set_options(knl,
enforce_variable_access_ordered="no_check")
knl = register_optimization_preambles(knl, self.device)
return knl
def __call__(self, queue, **kwargs):
knl = self.get_cached_optimized_kernel()
max_nsources_in_one_box = kwargs.pop("max_nsources_in_one_box")
max_ntargets_in_one_box = kwargs.pop("max_ntargets_in_one_box")
if self.is_gpu:
source_dtype = kwargs.get("sources")[0].dtype
strength_dtype = kwargs.get("strength").dtype
else:
# these are unused for not GPU and defeats the caching
# set them to None to keep the caching across dtypes
source_dtype = None
strength_dtype = None
knl = self.get_cached_kernel_executor(
max_nsources_in_one_box=max_nsources_in_one_box,
max_ntargets_in_one_box=max_ntargets_in_one_box,
source_dtype=source_dtype,
strength_dtype=strength_dtype,
)
return knl(queue, **kwargs)
......
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = "Copyright (C) 2017 Andreas Kloeckner"
......@@ -24,8 +25,10 @@ THE SOFTWARE.
import numpy as np
import numpy.linalg as la
from pytools import memoize_method
__doc__ = """
.. autoclass:: CalculusPatch
......@@ -33,13 +36,13 @@ __doc__ = """
"""
class CalculusPatch(object):
class CalculusPatch:
"""Sets up a grid of points on which derivatives can be calculated. Useful
to verify that an evaluated potential actually solves a PDE.
.. attribute: dim
.. attribute:: dim
.. attribute: points
.. attribute:: points
shape: ``(dim, npoints_total)``
......@@ -48,7 +51,7 @@ class CalculusPatch(object):
.. automethod:: diff
.. automethod:: dx
.. automethod:: dy
.. automethod:: dy
.. automethod:: dz
.. automethod:: laplace
.. automethod:: div
.. automethod:: curl
......@@ -80,7 +83,7 @@ class CalculusPatch(object):
weights_1d = weights_1d * (h/2)
else:
raise ValueError("invalid node set: %s" % nodes)
raise ValueError(f"invalid node set: {nodes}")
self.h = h
self.npoints = npoints
......@@ -116,13 +119,14 @@ class CalculusPatch(object):
return self._vandermonde_1d()[0]
def basis(self):
""""
"""
:returns: a :class:`list` containing functions that realize
a high-order interpolation basis on the :attr:`points`.
a high-order interpolation basis on the :py:attr:`points`.
"""
from scipy.special import eval_chebyt # pylint: disable=no-name-in-module
from pytools import indices_in_shape
from scipy.special import eval_chebyt
def eval_basis(ind, x):
result = 1
......@@ -161,7 +165,7 @@ class CalculusPatch(object):
coeff_diff_mat = np.diag(np.arange(1, npoints), 1)
n_diff_mat = np.eye(npoints)
for i in range(nderivs):
for _ in range(nderivs):
n_diff_mat = n_diff_mat.dot(coeff_diff_mat)
deriv_coeffs_mat = la.solve(vandermonde.T, n_diff_mat.T).T
......@@ -175,7 +179,7 @@ class CalculusPatch(object):
"""
from numbers import Number
if isinstance(f_values, (np.number, Number)):
if isinstance(f_values, np.number | Number):
# constants differentiate to 0
return 0
......@@ -188,7 +192,7 @@ class CalculusPatch(object):
tgt_axes = (axes[:axis] + "i" + axes[axis:])[:dim]
return np.einsum(
"ij,%s->%s" % (src_axes, tgt_axes),
f"ij,{src_axes}->{tgt_axes}",
self._diff_mat_1d(nderivs),
f_values.reshape(*self._pshape)).reshape(-1)
......@@ -211,9 +215,9 @@ class CalculusPatch(object):
return sum(self.diff(iaxis, f_values, 2) for iaxis in range(self.dim))
def div(self, arg):
"""
r"""
:arg arg: an object array containing
:class:`numpy.ndarrays` with shape ``(npoints_total,)``.
:class:`numpy.ndarray`\ s with shape ``(npoints_total,)``.
"""
result = 0
for i, arg_i in enumerate(arg):
......@@ -222,18 +226,18 @@ class CalculusPatch(object):
return result
def curl(self, arg):
"""Take the curl of the vector quantity *arg*.
r"""Take the curl of the vector quantity *arg*.
:arg arg: an object array of shape ``(3,)`` containing
:class:`numpy.ndarrays` with shape ``(npoints_total,)``.
:class:`numpy.ndarray`\ s with shape ``(npoints_total,)``.
"""
from pytools import levi_civita
from pytools.obj_array import make_obj_array
return make_obj_array([
sum(
levi_civita((l, m, n)) * self.diff(m, arg[n])
levi_civita((k, m, n)) * self.diff(m, arg[n])
for m in range(3) for n in range(3))
for l in range(3)])
for k in range(3)])
def eval_at_center(self, f_values):
"""Interpolate *f_values* to the center point.
......@@ -242,8 +246,10 @@ class CalculusPatch(object):
:returns: a scalar.
"""
f_values = f_values.reshape(*self._pshape)
for i in range(self.dim):
f_values = self._zero_eval_vec_1d.dot(f_values)
zero_eval_vec_1d = self._zero_eval_vec_1d
for _ in range(self.dim):
f_values = zero_eval_vec_1d @ f_values
return f_values
......@@ -261,7 +267,7 @@ class CalculusPatch(object):
def norm(self, arg, p):
if p == np.inf:
if arg.dtype == np.object:
if arg.dtype == object:
return max(
la.norm(x_i, p)
for x_i in arg)
......@@ -297,7 +303,7 @@ def frequency_domain_maxwell(cpatch, e, h, k):
# https://en.wikipedia.org/w/index.php?title=Maxwell%27s_equations&oldid=798940325#Macroscopic_formulation
# assumed time dependence exp(-1j*omega*t)
# Agrees with Jackson, Third Ed., (8.16)
# This agrees with Jackson, Third Ed., (8.16)
resid_faraday = cpatch.curl(e) - 1j * omega * b
resid_ampere = cpatch.curl(h) + 1j * omega * d
......
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = """
Copyright (C) 2012 Andreas Kloeckner
......@@ -26,18 +27,19 @@ THE SOFTWARE.
"""
import six
from six.moves import range
import logging
import numpy as np
import loopy as lp
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
import sumpy.symbolic as sym
from pytools import memoize_method
from pymbolic import parse, var
from pytools import memoize_method
import sumpy.symbolic as sym
from sumpy.tools import KernelCacheMixin, KernelComputation, is_obj_array_like
from sumpy.tools import KernelComputation, KernelCacheWrapper
import logging
logger = logging.getLogger(__name__)
......@@ -49,7 +51,7 @@ QBX for Layer Potentials
.. autoclass:: LayerPotentialBase
.. autoclass:: LayerPotential
.. autoclass:: LayerPotentialMatrixGenerator
.. autoclass:: LayerPotentialMatrixBlockGenerator
.. autoclass:: LayerPotentialMatrixSubsetGenerator
"""
......@@ -60,49 +62,67 @@ def stringify_expn_index(i):
else:
assert isinstance(i, int)
if i < 0:
return "m%d" % (-i)
return f"m{-i}"
else:
return str(i)
def expand(expansion_nr, sac, expansion, avec, bvec):
rscale = sym.Symbol("rscale")
coefficients = expansion.coefficients_from_source(avec, bvec, rscale)
assigned_coeffs = [
sym.Symbol(
sac.assign_unique("expn%dcoeff%s" % (
expansion_nr, stringify_expn_index(i)),
coefficients[expansion.get_storage_index(i)]))
for i in expansion.get_coefficient_identifiers()]
return sac.assign_unique("expn%d_result" % expansion_nr,
expansion.evaluate(assigned_coeffs, bvec, rscale))
# {{{ layer potential computation
# {{{ base class
class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
def __init__(self, ctx, expansions, strength_usage=None,
value_dtypes=None,
options=[], name=None, device=None):
KernelComputation.__init__(self, ctx, expansions, strength_usage,
value_dtypes,
name, options, device)
class LayerPotentialBase(KernelCacheMixin, KernelComputation):
def __init__(self, ctx, expansion, strength_usage=None,
value_dtypes=None, name=None, device=None,
source_kernels=None, target_kernels=None):
from pytools import single_valued
self.dim = single_valued(knl.dim for knl in self.expansions)
KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels,
strength_usage=strength_usage, source_kernels=source_kernels,
value_dtypes=value_dtypes, name=name, device=device)
self.dim = single_valued(knl.dim for knl in self.target_kernels)
self.expansion = expansion
def get_cache_key(self):
return (type(self).__name__, tuple(self.kernels),
tuple(self.strength_usage), tuple(self.value_dtypes))
return (type(self).__name__, self.expansion, tuple(self.target_kernels),
tuple(self.source_kernels), tuple(self.strength_usage),
tuple(self.value_dtypes),
self.device.hashable_model_and_version_identifier)
def _expand(self, sac, avec, bvec, rscale, isrc):
from sumpy.symbolic import PymbolicToSympyMapper
conv = PymbolicToSympyMapper()
strengths = [conv(self.get_strength_or_not(isrc, idx))
for idx in range(len(self.source_kernels))]
coefficients = self.expansion.coefficients_from_source_vec(
self.source_kernels, avec, bvec, rscale=rscale, weights=strengths,
sac=sac)
return coefficients
def _evaluate(self, sac, avec, bvec, rscale, expansion_nr, coefficients):
from sumpy.expansion.local import LineTaylorLocalExpansion
tgt_knl = self.target_kernels[expansion_nr]
if isinstance(tgt_knl, LineTaylorLocalExpansion):
# In LineTaylorLocalExpansion.evaluate, we can't run
# postprocess_at_target because the coefficients are assigned
# symbols and postprocess with a derivative will make them zero.
# Instead run postprocess here before the coefficients are assigned.
coefficients = [tgt_knl.postprocess_at_target(coeff, bvec) for
coeff in coefficients]
assigned_coeffs = [
sym.Symbol(
sac.assign_unique(
f"expn{expansion_nr}coeff{stringify_expn_index(i)}",
coefficients[self.expansion.get_storage_index(i)])
)
for i in self.expansion.get_coefficient_identifiers()]
@property
def expansions(self):
return self.kernels
return sac.assign_unique(f"expn{expansion_nr}_result",
self.expansion.evaluate(tgt_knl, assigned_coeffs, bvec, rscale))
def get_loopy_insns_and_result_names(self):
from sumpy.symbolic import make_sym_vector
......@@ -114,19 +134,25 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
logger.info("compute expansion expressions: start")
result_names = [expand(i, sac, expn, avec, bvec)
for i, expn in enumerate(self.expansions)]
rscale = sym.Symbol("rscale")
isrc_sym = var("isrc")
coefficients = self._expand(sac, avec, bvec, rscale, isrc_sym)
result_names = [self._evaluate(sac, avec, bvec, rscale, i, coefficients)
for i in range(len(self.target_kernels))]
logger.info("compute expansion expressions: done")
sac.run_global_cse()
pymbolic_expr_maps = [knl.get_code_transformer() for knl in (
self.target_kernels + self.source_kernels)]
from sumpy.codegen import to_loopy_insns
loopy_insns = to_loopy_insns(
six.iteritems(sac.assignments),
vector_names=set(["a", "b"]),
pymbolic_expr_maps=[
expn.kernel.get_code_transformer() for expn in self.expansions],
sac.assignments.items(),
vector_names={"a", "b"},
pymbolic_expr_maps=pymbolic_expr_maps,
retain_names=result_names,
complex_dtype=np.complex128 # FIXME
)
......@@ -134,52 +160,66 @@ class LayerPotentialBase(KernelComputation, KernelCacheWrapper):
return loopy_insns, result_names
def get_strength_or_not(self, isrc, kernel_idx):
return var("strength_%d" % self.strength_usage[kernel_idx]).index(isrc)
return var(f"strength_{self.strength_usage[kernel_idx]}_isrc")
def get_kernel_exprs(self, result_names):
isrc_sym = var("isrc")
exprs = [var(name) * self.get_strength_or_not(isrc_sym, i)
for i, name in enumerate(result_names)]
exprs = [var(name) for i, name in enumerate(result_names)]
return [lp.Assignment(id=None,
assignee="pair_result_%d" % i, expression=expr,
assignee=f"pair_result_{i}",
expression=expr,
temp_var_type=lp.Optional(None))
for i, expr in enumerate(exprs)]
def get_default_src_tgt_arguments(self):
from sumpy.tools import gather_loopy_source_arguments
return ([
lp.GlobalArg("src", None,
return [
lp.GlobalArg("sources", None,
shape=(self.dim, "nsources"), order="C"),
lp.GlobalArg("tgt", None,
lp.GlobalArg("targets", None,
shape=(self.dim, "ntargets"), order="C"),
lp.GlobalArg("center", None,
shape=(self.dim, "ntargets"), dim_tags="sep,C"),
shape=(self.dim, "ntargets"), order="C"),
lp.GlobalArg("expansion_radii",
None, shape="ntargets"),
lp.ValueArg("nsources", None),
lp.ValueArg("ntargets", None)]
+ gather_loopy_source_arguments(self.kernels))
lp.ValueArg("ntargets", None),
*gather_loopy_source_arguments(self.source_kernels)
]
def get_kernel(self):
raise NotImplementedError
def get_optimized_kernel(self):
def get_optimized_kernel(self,
targets_is_obj_array, sources_is_obj_array, centers_is_obj_array,
# Used by pytential to override the name of the loop to be
# parallelized. In the case of QBX, that's the loop over QBX
# targets (not global targets).
itgt_name="itgt"):
# FIXME specialize/tune for GPU/CPU
loopy_knl = self.get_kernel()
if targets_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
if sources_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C")
if centers_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C")
import pyopencl as cl
dev = self.context.devices[0]
if dev.type & cl.device_type.CPU:
loopy_knl = lp.split_iname(loopy_knl, "itgt", 16, outer_tag="g.0",
loopy_knl = lp.split_iname(loopy_knl, itgt_name, 16, outer_tag="g.0",
inner_tag="l.0")
loopy_knl = lp.split_iname(loopy_knl, "isrc", 256)
loopy_knl = lp.prioritize_loops(loopy_knl,
["isrc_outer", "itgt_inner"])
["isrc_outer", f"{itgt_name}_inner"])
else:
from warnings import warn
warn("don't know how to tune layer potential computation for '%s'" % dev)
loopy_knl = lp.split_iname(loopy_knl, "itgt", 128, outer_tag="g.0")
warn(f"don't know how to tune layer potential computation for '{dev}'",
stacklevel=1)
loopy_knl = lp.split_iname(loopy_knl, itgt_name, 128, outer_tag="g.0")
loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl)
return loopy_knl
......@@ -194,7 +234,9 @@ class LayerPotential(LayerPotentialBase):
.. automethod:: __call__
"""
default_name = "qbx_apply"
@property
def default_name(self):
return "qbx_apply"
@memoize_method
def get_kernel(self):
......@@ -202,12 +244,14 @@ class LayerPotential(LayerPotentialBase):
kernel_exprs = self.get_kernel_exprs(result_names)
arguments = (
self.get_default_src_tgt_arguments()
+ [lp.GlobalArg("strength_%d" % i,
None, shape="nsources", order="C")
for i in range(self.strength_count)]
+ [lp.GlobalArg("result_%d" % i,
None, shape="ntargets", order="C")
for i in range(len(self.kernels))])
+ [
lp.GlobalArg(f"strength_{i}", None, shape="nsources", order="C")
for i in range(self.strength_count)
]
+ [
lp.GlobalArg(f"result_{i}", None, shape="ntargets", order="C")
for i in range(len(self.target_kernels))
])
loopy_knl = lp.make_kernel(["""
{[itgt, isrc, idim]: \
......@@ -217,28 +261,30 @@ class LayerPotential(LayerPotentialBase):
"""],
self.get_kernel_scaling_assignments()
+ ["for itgt, isrc"]
+ ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"]
+ ["<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}"]
+ ["<> a[idim] = center[idim, itgt] - sources[idim, isrc]"]
+ ["<> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim}"]
+ ["<> rscale = expansion_radii[itgt]"]
+ [f"<> strength_{i}_isrc = strength_{i}[isrc]" for i in
range(self.strength_count)]
+ loopy_insns + kernel_exprs
+ ["""
result_{i}[itgt] = knl_{i}_scaling * \
simul_reduce(sum, isrc, pair_result_{i}) \
+ [f"""
result_{iknl}[itgt] = knl_{iknl}_scaling * \
simul_reduce(sum, isrc, pair_result_{iknl}) \
{{id_prefix=write_lpot,inames=itgt}}
""".format(i=iknl)
for iknl in range(len(self.expansions))]
"""
for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
name=self.name,
assumptions="ntargets>=1 and nsources>=1",
default_offset=lp.auto,
silenced_warnings="write_race(write_lpot*)",
fixed_parameters=dict(dim=self.dim),
fixed_parameters={"dim": self.dim},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
for expn in self.expansions:
loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
for knl in self.target_kernels + self.source_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
......@@ -249,12 +295,15 @@ class LayerPotential(LayerPotentialBase):
already multiplied in.
"""
knl = self.get_cached_optimized_kernel()
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources),
centers_is_obj_array=is_obj_array_like(centers))
for i, dens in enumerate(strengths):
kwargs["strength_%d" % i] = dens
kwargs[f"strength_{i}"] = dens
return knl(queue, src=sources, tgt=targets, center=centers,
return knl(queue, sources=sources, targets=targets, center=centers,
expansion_radii=expansion_radii, **kwargs)
# }}}
......@@ -265,7 +314,9 @@ class LayerPotential(LayerPotentialBase):
class LayerPotentialMatrixGenerator(LayerPotentialBase):
"""Generator for layer potential matrix entries."""
default_name = "qbx_matrix"
@property
def default_name(self):
return "qbx_matrix"
def get_strength_or_not(self, isrc, kernel_idx):
return 1
......@@ -276,9 +327,11 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
kernel_exprs = self.get_kernel_exprs(result_names)
arguments = (
self.get_default_src_tgt_arguments()
+ [lp.GlobalArg("result_%d" % i,
dtype, shape="ntargets, nsources", order="C")
for i, dtype in enumerate(self.value_dtypes)])
+ [
lp.GlobalArg(f"result_{i}",
dtype, shape="ntargets, nsources", order="C")
for i, dtype in enumerate(self.value_dtypes)
])
loopy_knl = lp.make_kernel(["""
{[itgt, isrc, idim]: \
......@@ -288,48 +341,53 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase):
"""],
self.get_kernel_scaling_assignments()
+ ["for itgt, isrc"]
+ ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"]
+ ["<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}"]
+ ["<> a[idim] = center[idim, itgt] - sources[idim, isrc]"]
+ ["<> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim}"]
+ ["<> rscale = expansion_radii[itgt]"]
+ loopy_insns + kernel_exprs
+ ["""
result_{i}[itgt, isrc] = \
knl_{i}_scaling * pair_result_{i} \
+ [f"""
result_{iknl}[itgt, isrc] = \
knl_{iknl}_scaling * pair_result_{iknl} \
{{inames=isrc:itgt}}
""".format(i=iknl)
for iknl in range(len(self.expansions))]
"""
for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
name=self.name,
assumptions="ntargets>=1 and nsources>=1",
default_offset=lp.auto,
fixed_parameters=dict(dim=self.dim),
fixed_parameters={"dim": self.dim},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
for expn in self.expansions:
for expn in self.source_kernels + self.target_kernels:
loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
return loopy_knl
def __call__(self, queue, targets, sources, centers, expansion_radii, **kwargs):
knl = self.get_cached_optimized_kernel()
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources),
centers_is_obj_array=is_obj_array_like(centers))
return knl(queue, src=sources, tgt=targets, center=centers,
return knl(queue, sources=sources, targets=targets, center=centers,
expansion_radii=expansion_radii, **kwargs)
# }}}
# {{{
# {{{ matrix subset generator
class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
class LayerPotentialMatrixSubsetGenerator(LayerPotentialBase):
"""Generator for a subset of the layer potential matrix entries.
.. automethod:: __call__
"""
default_name = "qbx_block"
@property
def default_name(self):
return "qbx_subset"
def get_strength_or_not(self, isrc, kernel_idx):
return 1
......@@ -345,8 +403,10 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
lp.GlobalArg("tgtindices", None, shape="nresult"),
lp.ValueArg("nresult", None)
]
+ [lp.GlobalArg("result_%d" % i, dtype, shape="nresult")
for i, dtype in enumerate(self.value_dtypes)])
+ [
lp.GlobalArg(f"result_{i}", dtype, shape="nresult")
for i, dtype in enumerate(self.value_dtypes)
])
loopy_knl = lp.make_kernel([
"{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}"
......@@ -360,62 +420,84 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
<> itgt = tgtindices[imat]
<> isrc = srcindices[imat]
<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}
<> b[idim] = tgt[idim, itgt] - center[idim, itgt] {dup=idim}
<> a[idim] = center[idim, itgt] - sources[idim, isrc]
<> b[idim] = targets[idim, itgt] - center[idim, itgt] {dup=idim}
<> rscale = expansion_radii[itgt]
"""]
+ loopy_insns + kernel_exprs
+ ["""
result_{i}[imat] = knl_{i}_scaling * pair_result_{i} \
+ [f"""
result_{iknl}[imat] = knl_{iknl}_scaling * pair_result_{iknl} \
{{id_prefix=write_lpot}}
""".format(i=iknl)
for iknl in range(len(self.expansions))]
"""
for iknl in range(len(self.target_kernels))]
+ ["end"],
arguments,
name=self.name,
assumptions="nresult>=1",
default_offset=lp.auto,
silenced_warnings="write_race(write_lpot*)",
fixed_parameters=dict(dim=self.dim),
fixed_parameters={"dim": self.dim},
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
loopy_knl = lp.add_dtypes(loopy_knl,
dict(nsources=np.int32, ntargets=np.int32))
loopy_knl = lp.add_dtypes(
loopy_knl, {"nsources": np.int32, "ntargets": np.int32})
for expn in self.expansions:
loopy_knl = expn.prepare_loopy_kernel(loopy_knl)
for knl in self.source_kernels + self.target_kernels:
loopy_knl = knl.prepare_loopy_kernel(loopy_knl)
return loopy_knl
def get_optimized_kernel(self):
def get_optimized_kernel(self,
targets_is_obj_array, sources_is_obj_array, centers_is_obj_array):
loopy_knl = self.get_kernel()
if targets_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "targets", "sep,C")
if sources_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "sources", "sep,C")
if centers_is_obj_array:
loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C")
loopy_knl = lp.split_iname(loopy_knl, "imat", 1024, outer_tag="g.0")
loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl)
return loopy_knl
def __call__(self, queue, targets, sources, centers, expansion_radii,
index_set, **kwargs):
"""
:arg targets: target point coordinates.
:arg sources: source point coordinates.
:arg centers: QBX target expansion centers.
tgtindices, srcindices, **kwargs):
"""Evaluate a subset of the QBX matrix interactions.
:arg targets: target point coordinates, which can be an object
:class:`~numpy.ndarray`, :class:`list` or :class:`tuple` of
coordinates or a single stacked array.
:arg sources: source point coordinates, which can also be in any of the
formats of the *targets*,
:arg centers: QBX target expansion center coordinates, which can also
be in any of the formats of the *targets*. The number of centers
must match the number of targets.
:arg expansion_radii: radii for each expansion center.
:arg index_set: a :class:`sumpy.tools.MatrixBlockIndexRanges` used
to define the blocks.
:return: a tuple of one-dimensional arrays of kernel evaluations at
target-source pairs described by `index_set`.
:arg srcindices: an array of indices into *sources*.
:arg tgtindices: an array of indices into *targets*, of the same size
as *srcindices*.
:returns: a one-dimensional array of interactions, for each index pair
in (*srcindices*, *tgtindices*)
"""
knl = self.get_cached_optimized_kernel()
knl = self.get_cached_kernel_executor(
targets_is_obj_array=is_obj_array_like(targets),
sources_is_obj_array=is_obj_array_like(sources),
centers_is_obj_array=is_obj_array_like(centers))
return knl(queue,
src=sources,
tgt=targets,
sources=sources,
targets=targets,
center=centers,
expansion_radii=expansion_radii,
tgtindices=index_set.linear_row_indices,
srcindices=index_set.linear_col_indices, **kwargs)
tgtindices=tgtindices,
srcindices=srcindices, **kwargs)
# }}}
......@@ -426,10 +508,12 @@ class LayerPotentialMatrixBlockGenerator(LayerPotentialBase):
def find_jump_term(kernel, arg_provider):
from sumpy.kernel import (
AxisTargetDerivative,
DirectionalSourceDerivative,
DirectionalTargetDerivative,
DerivativeBase)
AxisSourceDerivative,
AxisTargetDerivative,
DerivativeBase,
DirectionalSourceDerivative,
DirectionalTargetDerivative,
)
tgt_derivatives = []
src_derivatives = []
......@@ -441,12 +525,15 @@ def find_jump_term(kernel, arg_provider):
elif isinstance(kernel, DirectionalTargetDerivative):
tgt_derivatives.append(kernel.dir_vec_name)
kernel = kernel.kernel
elif isinstance(kernel, AxisSourceDerivative):
src_derivatives.append(kernel.axis)
kernel = kernel.kernel
elif isinstance(kernel, DirectionalSourceDerivative):
src_derivatives.append(kernel.dir_vec_name)
kernel = kernel.kernel
else:
raise RuntimeError("derivative type '%s' not understood"
% type(kernel))
raise RuntimeError(
f"derivative type '{type(kernel).__name__}' not understood")
tgt_count = len(tgt_derivatives)
src_count = len(src_derivatives)
......@@ -495,7 +582,7 @@ def find_jump_term(kernel, arg_provider):
elif tgt_count == 1:
from warnings import warn
warn("jump terms for mixed derivatives (1 src+1 tgt) only available "
"for the double-layer potential")
"for the double-layer potential", stacklevel=1)
i, = tgt_derivatives
assert isinstance(i, int)
return (
......@@ -503,13 +590,14 @@ def find_jump_term(kernel, arg_provider):
* info.tangent[i]
* info.density_prime)
raise ValueError("don't know jump term for %d "
"target and %d source derivatives" % (tgt_count, src_count))
raise ValueError(
f"Do not know jump term for {tgt_count} target "
f"and {src_count} source derivatives")
# {{{ symbolic argument provider
class _JumpTermSymbolicArgumentProvider(object):
class _JumpTermSymbolicArgumentProvider:
"""This class answers requests by :func:`find_jump_term` for symbolic values
of quantities needed for the computation of the jump terms and tracks what
data was requested. This tracking allows assembling the argument list of the
......@@ -531,77 +619,77 @@ class _JumpTermSymbolicArgumentProvider(object):
self.arguments[self.density_var_name] = \
lp.GlobalArg(self.density_var_name, self.density_dtype,
shape="ntargets", order="C")
return parse("%s[itgt]" % self.density_var_name)
return parse(f"{self.density_var_name}[itgt]")
@property
@memoize_method
def density_prime(self):
prime_var_name = self.density_var_name+"_prime"
self.arguments[prime_var_name] = \
prime_var_name = f"{self.density_var_name}_prime"
self.arguments[prime_var_name] = (
lp.GlobalArg(prime_var_name, self.density_dtype,
shape="ntargets", order="C")
return parse("%s[itgt]" % prime_var_name)
shape="ntargets", order="C"))
return parse(f"{prime_var_name}[itgt]")
@property
@memoize_method
def side(self):
self.arguments["side"] = \
lp.GlobalArg("side", self.geometry_dtype, shape="ntargets")
self.arguments["side"] = (
lp.GlobalArg("side", self.geometry_dtype, shape="ntargets"))
return parse("side[itgt]")
@property
@memoize_method
def normal(self):
self.arguments["normal"] = \
self.arguments["normal"] = (
lp.GlobalArg("normal", self.geometry_dtype,
shape=("ntargets", self.dim), order="C")
shape=("ntargets", self.dim), order="C"))
from pytools.obj_array import make_obj_array
return make_obj_array([
parse("normal[itgt, %d]" % i)
parse(f"normal[itgt, {i}]")
for i in range(self.dim)])
@property
@memoize_method
def tangent(self):
self.arguments["tangent"] = \
self.arguments["tangent"] = (
lp.GlobalArg("tangent", self.geometry_dtype,
shape=("ntargets", self.dim), order="C")
shape=("ntargets", self.dim), order="C"))
from pytools.obj_array import make_obj_array
return make_obj_array([
parse("tangent[itgt, %d]" % i)
parse(f"tangent[itgt, {i}]")
for i in range(self.dim)])
@property
@memoize_method
def mean_curvature(self):
self.arguments["mean_curvature"] = \
self.arguments["mean_curvature"] = (
lp.GlobalArg("mean_curvature",
self.geometry_dtype, shape="ntargets",
order="C")
self.geometry_dtype, shape="ntargets",
order="C"))
return parse("mean_curvature[itgt]")
@property
@memoize_method
def src_derivative_dir(self):
self.arguments["src_derivative_dir"] = \
self.arguments["src_derivative_dir"] = (
lp.GlobalArg("src_derivative_dir",
self.geometry_dtype, shape=("ntargets", self.dim),
order="C")
self.geometry_dtype, shape=("ntargets", self.dim),
order="C"))
from pytools.obj_array import make_obj_array
return make_obj_array([
parse("src_derivative_dir[itgt, %d]" % i)
parse(f"src_derivative_dir[itgt, {i}]")
for i in range(self.dim)])
@property
@memoize_method
def tgt_derivative_dir(self):
self.arguments["tgt_derivative_dir"] = \
self.arguments["tgt_derivative_dir"] = (
lp.GlobalArg("tgt_derivative_dir",
self.geometry_dtype, shape=("ntargets", self.dim),
order="C")
self.geometry_dtype, shape=("ntargets", self.dim),
order="C"))
from pytools.obj_array import make_obj_array
return make_obj_array([
parse("tgt_derivative_dir[itgt, %d]" % i)
parse(f"tgt_derivative_dir[itgt, {i}]")
for i in range(self.dim)])
# }}}
......
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
......@@ -22,17 +23,35 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
__doc__ = """
Symbolic Tools
==============
.. class:: Basic
The expression base class for the "heavy-duty" computer algebra toolkit
in use. Either :class:`sympy.core.basic.Basic` or :class:`symengine.Basic`.
.. autoclass:: SpatialConstant
"""
import six
from six.moves import range
import logging
import math
from typing import TYPE_CHECKING, ClassVar
import numpy as np
from pymbolic.mapper import IdentityMapper as IdentityMapperBase
import sympy as sp
import pymbolic.primitives as prim
from pymbolic.mapper import IdentityMapper as IdentityMapperBase
import logging
logger = logging.getLogger(__name__)
USE_SYMENGINE = False
# {{{ symbolic backend
......@@ -40,28 +59,28 @@ def _find_symbolic_backend():
global USE_SYMENGINE
try:
import symengine # noqa
import symengine # noqa: F401
symengine_found = True
symengine_error = None
except ImportError as import_error:
symengine_found = False
symengine_error = import_error
ALLOWED_BACKENDS = ("sympy", "symengine") # noqa
BACKEND_ENV_VAR = "SUMPY_FORCE_SYMBOLIC_BACKEND" # noqa
allowed_backends = ("sympy", "symengine")
backend_env_var = "SUMPY_FORCE_SYMBOLIC_BACKEND"
import os
backend = os.environ.get(BACKEND_ENV_VAR)
backend = os.environ.get(backend_env_var)
if backend is not None:
if backend not in ALLOWED_BACKENDS:
if backend not in allowed_backends:
raise RuntimeError(
"%s value is unrecognized: '%s' "
"(allowed values are %s)" % (
BACKEND_ENV_VAR,
backend,
", ".join("'%s'" % val for val in ALLOWED_BACKENDS)))
f"{backend_env_var} value is unrecognized: '{backend}' "
"(allowed values are {})".format(
", ".join(f"'{val}'" for val in allowed_backends))
)
if backend == "symengine" and not symengine_found:
raise RuntimeError("could not find SymEngine: %s" % symengine_error)
raise RuntimeError(f"could not find SymEngine: {symengine_error}")
USE_SYMENGINE = (backend == "symengine")
else:
......@@ -72,24 +91,47 @@ _find_symbolic_backend()
# }}}
# Symbolic API common to SymEngine and sympy.
# Before adding a function here, make sure it's present in both modules.
SYMBOLIC_API = """
Add Basic Mul Pow exp sqrt log symbols sympify cos sin atan2 Function Symbol
Derivative Integer Matrix Subs I pi functions""".split()
if USE_SYMENGINE:
import symengine as sym
from pymbolic.interop.symengine import (
PymbolicToSymEngineMapper as PymbolicToSympyMapper,
SymEngineToPymbolicMapper as SympyToPymbolicMapper)
PymbolicToSymEngineMapper as PymbolicToSympyMapperBase,
SymEngineToPymbolicMapper as SympyToPymbolicMapperBase,
)
else:
import sympy as sym
from pymbolic.interop.sympy import (
PymbolicToSympyMapper, SympyToPymbolicMapper)
for _apifunc in SYMBOLIC_API:
globals()[_apifunc] = getattr(sym, _apifunc)
from pymbolic.interop.sympy import ( # type: ignore[assignment]
PymbolicToSympyMapper as PymbolicToSympyMapperBase,
SympyToPymbolicMapper as SympyToPymbolicMapperBase,
)
# Symbolic API common to SymEngine and sympy.
# Before adding a function here, make sure it's present in both modules.
Add = sym.Add
Basic = sym.Basic
Mul = sym.Mul
Pow = sym.Pow
exp = sym.exp
sqrt = sym.sqrt
log = sym.log
symbols = sym.symbols
sympify = sym.sympify
cos = sym.cos
sin = sym.sin
atan2 = sym.atan2
Function = sym.Function
Symbol = sym.Symbol
Derivative = sym.Derivative
Integer = sym.Integer
Rational = sym.Rational
Matrix = sym.Matrix
Subs = sym.Subs
I = sym.I # noqa: E741
pi = sym.pi
functions = sym.functions
Number = sym.Number
Float = sym.Float
def _coeff_isneg(a):
......@@ -98,23 +140,27 @@ def _coeff_isneg(a):
return a.is_Number and a.is_negative
have_unevaluated_expr = False
if not USE_SYMENGINE:
if TYPE_CHECKING or USE_SYMENGINE:
def UnevaluatedExpr(x): # noqa: N802
return x
else:
try:
from sympy import UnevaluatedExpr
have_unevaluated_expr = True
except ImportError:
pass
if not have_unevaluated_expr:
def UnevaluatedExpr(x): # noqa
return x
def UnevaluatedExpr(x): # noqa: N802
return x
if USE_SYMENGINE:
def doit(expr):
return expr
def unevaluated_pow(a, b):
return sym.Pow(a, b)
else:
def doit(expr):
return expr.doit()
def unevaluated_pow(a, b):
return sym.Pow(a, b, evaluate=False)
......@@ -124,18 +170,18 @@ else:
class _DerivativeKiller(IdentityMapperBase):
def map_derivative(self, expr):
from pymbolic import var
return var("d_"+"_".join(expr.variables))(expr.child)
return var("d_{}".format("_".join(expr.variables)))(expr.child)
def map_substitution(self, expr):
return self.rec(expr.child)
def _get_assignments_in_maxima(assignments, prefix=""):
my_variable_names = set(six.iterkeys(assignments))
my_variable_names = set(assignments.keys())
written_assignments = set()
prefix_subst_dict = dict(
(vn, prefix+vn) for vn in my_variable_names)
prefix_subst_dict = {
vn: prefix+vn for vn in my_variable_names}
from pymbolic.maxima import MaximaStringifyMapper
mstr = MaximaStringifyMapper()
......@@ -153,12 +199,12 @@ def _get_assignments_in_maxima(assignments, prefix=""):
if symb.name not in written_assignments:
write_assignment(symb.name)
result.append("%s%s : %s;" % (
result.append("{}{} : {};".format(
prefix, name, mstr(dkill(s2p(
assignments[name].subs(prefix_subst_dict))))))
written_assignments.add(name)
for name in six.iterkeys(assignments):
for name in assignments:
if name not in written_assignments:
write_assignment(name)
......@@ -172,26 +218,26 @@ def checked_cse(exprs, symbols=None):
new_assignments, new_exprs = sym.cse(exprs, **kwargs)
max_old = _get_assignments_in_maxima(dict(
("old_expr%d" % i, expr)
for i, expr in enumerate(exprs)))
new_ass_dict = dict(
("new_expr%d" % i, expr)
for i, expr in enumerate(new_exprs))
max_old = _get_assignments_in_maxima({
f"old_expr{i}": expr
for i, expr in enumerate(exprs)})
new_ass_dict = {
f"new_expr{i}": expr
for i, expr in enumerate(new_exprs)}
for name, val in new_assignments:
new_ass_dict[name.name] = val
max_new = _get_assignments_in_maxima(new_ass_dict)
with open("check.mac", "w") as outf:
outf.write("ratprint:false;\n")
outf.write("%s\n\n" % max_old)
outf.write("%s\n" % max_new)
outf.write(f"{max_old}\n\n")
outf.write(f"{max_new}\n")
for i in range(len(exprs)):
outf.write("print(\"diff in expr %d:\n\");\n" % i)
outf.write("print(ratsimp(old_expr%d - new_expr%d));\n" % (i, i))
outf.write(f'print("diff in expr {i}:\n");\n')
outf.write(f"print(ratsimp(old_expr{i} - new_expr{i}));\n")
from subprocess import check_call
check_call(["maxima", "--very-quiet", "-r", "load(\"check.mac\");"])
check_call(["maxima", "--very-quiet", "-r", 'load("check.mac");'])
return new_assignments, new_exprs
......@@ -208,8 +254,7 @@ def pymbolic_real_norm_2(x):
def make_sym_vector(name, components):
return sym.Matrix(
[sym.Symbol("%s%d" % (name, i)) for i in range(components)])
return sym.Matrix([sym.Symbol(f"{name}{i}") for i in range(components)])
def vector_xreplace(expr, from_vec, to_vec):
......@@ -231,6 +276,64 @@ def find_power_of(base, prod):
return result[power]
@prim.expr_dataclass()
class SpatialConstant(prim.Variable):
"""A symbolic constant to represent a symbolic variable that is spatially constant.
For example the wave-number :math:`k` in the setting of a constant-coefficient
Helmholtz problem. For use in :attr:`sumpy.kernel.ExpressionKernel.expression`.
Any variable occurring there that is not a :class:`~sumpy.symbolic.SpatialConstant`
is assumed to have a spatial dependency.
.. autoattribute:: prefix
.. automethod:: as_sympy
.. automethod:: from_sympy
"""
prefix: ClassVar[str] = "_spatial_constant_"
"""Prefix used in code generation for variables of this type."""
def as_sympy(self) -> sp.Symbol:
"""Convert variable to a :mod:`sympy` expression."""
return sym.Symbol(f"{self.prefix}{self.name}")
@classmethod
def from_sympy(cls, expr: sp.Symbol) -> SpatialConstant:
"""Convert :mod:`sympy` expression to a constant."""
return cls(expr.name[len(cls.prefix):])
class PymbolicToSympyMapper(PymbolicToSympyMapperBase):
def map_spatial_constant(self, expr):
return expr.as_sympy()
class SympyToPymbolicMapper(SympyToPymbolicMapperBase):
def map_Symbol(self, expr): # noqa: N802
if expr.name.startswith(SpatialConstant.prefix):
return SpatialConstant.from_sympy(expr)
return SympyToPymbolicMapperBase.map_Symbol(self, expr)
def map_Pow(self, expr): # noqa: N802
if expr.exp == -1:
return 1/self.rec(expr.base)
else:
return SympyToPymbolicMapperBase.map_Pow(self, expr)
def map_Mul(self, expr): # noqa: N802
num_args = []
den_args = []
for child in expr.args:
if (isinstance(child, Pow)
and isinstance(child.exp, Integer)
and child.exp < 0):
den_args.append(self.rec(child.base)**(-self.rec(child.exp)))
else:
num_args.append(self.rec(child))
return math.prod(num_args) / math.prod(den_args)
class PymbolicToSympyMapperWithSymbols(PymbolicToSympyMapper):
def map_variable(self, expr):
if expr.name == "I":
......@@ -242,8 +345,58 @@ class PymbolicToSympyMapperWithSymbols(PymbolicToSympyMapper):
def map_subscript(self, expr):
if isinstance(expr.aggregate, prim.Variable) and isinstance(expr.index, int):
return sym.Symbol("%s%d" % (expr.aggregate.name, expr.index))
return sym.Symbol(f"{expr.aggregate.name}{expr.index}")
else:
self.raise_conversion_error(expr)
def map_call(self, expr):
if expr.function.name == "hankel_1":
args = [self.rec(param) for param in expr.parameters]
args.append(0)
return Hankel1(*args)
elif expr.function.name == "bessel_j":
args = [self.rec(param) for param in expr.parameters]
args.append(0)
return BesselJ(*args)
else:
return PymbolicToSympyMapper.map_call(self, expr)
import sympy
class _BesselOrHankel(sympy.Function):
"""A symbolic function for BesselJ or Hankel1 functions
that keeps track of the derivatives taken of the function.
Arguments are ``(order, z, nderivs)``.
"""
nargs = (3,)
def fdiff(self, argindex=1):
if argindex in (1, 3):
# we are not differentiating w.r.t order or nderivs
raise ValueError(f"invalid argindex: {argindex}")
order, z, nderivs = self.args
return self.func(order, z, nderivs+1)
class BesselJ(_BesselOrHankel):
pass
class Hankel1(_BesselOrHankel):
pass
_SympyBesselJ = BesselJ
_SympyHankel1 = Hankel1
if not TYPE_CHECKING and USE_SYMENGINE:
def BesselJ(*args): # noqa: N802 # pylint: disable=function-redefined
return sym.sympify(_SympyBesselJ(*args))
def Hankel1(*args): # noqa: N802 # pylint: disable=function-redefined
return sym.sympify(_SympyHankel1(*args))
# vim: fdm=marker
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = """
Copyright (C) 2012 Andreas Kloeckner
Copyright (C) 2018 Alexandru Fikl
Copyright (C) 2020 Isuru Fernando
"""
__license__ = """
......@@ -25,39 +27,127 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import six
from six.moves import range, zip
from pytools import memoize_method, memoize_in
import enum
import logging
import warnings
from abc import ABC, abstractmethod
from collections.abc import Hashable, Sequence
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
import numpy as np
import sumpy.symbolic as sym
import loopy as lp
import pyopencl as cl
import pyopencl.array # noqa
from pymbolic.mapper import WalkMapper
from pytools import memoize_method
from pytools.tag import Tag, tag_dataclass
import loopy as lp
from loopy.version import MOST_RECENT_LANGUAGE_VERSION
import sumpy.symbolic as sym
if TYPE_CHECKING:
import numpy
import pyopencl
from sumpy.kernel import Kernel
import logging
logger = logging.getLogger(__name__)
__doc__ = """
Tools
=====
.. autofunction:: to_complex_dtype
.. autofunction:: is_obj_array_like
.. autofunction:: vector_to_device
.. autofunction:: vector_from_device
.. autoclass:: OrderedSet
Multi-index Helpers
-------------------
.. autofunction:: add_mi
.. autofunction:: mi_factorial
.. autofunction:: mi_increment_axis
.. autofunction:: mi_set_axis
.. autofunction:: mi_power
Symbolic Helpers
----------------
.. autofunction:: add_to_sac
.. autofunction:: gather_arguments
.. autofunction:: gather_source_arguments
.. autofunction:: gather_loopy_arguments
.. autofunction:: gather_loopy_source_arguments
.. autoclass:: ScalingAssignmentTag
.. autoclass:: KernelComputation
.. autoclass:: KernelCacheMixin
.. autofunction:: reduced_row_echelon_form
.. autofunction:: nullspace
FFT
---
.. autofunction:: fft
.. autofunction:: fft_toeplitz_upper_triangular
.. autofunction:: matvec_toeplitz_upper_triangular
.. autoclass:: FFTBackend
:members:
.. autofunction:: loopy_fft
.. autofunction:: get_opencl_fft_app
.. autofunction:: run_opencl_fft
Profiling
---------
.. autofunction:: get_native_event
.. autoclass:: ProfileGetter
.. autoclass:: AggregateProfilingEvent
.. autoclass:: MarkerBasedProfilingEvent
"""
# {{{ multi_index helpers
def add_mi(mi1, mi2):
return tuple(mi1i+mi2i for mi1i, mi2i in zip(mi1, mi2))
def add_mi(mi1: Sequence[int], mi2: Sequence[int]) -> tuple[int, ...]:
# NOTE: these are used a lot and `tuple([])` is faster
return tuple([mi1i + mi2i for mi1i, mi2i in zip(mi1, mi2, strict=True)]) # noqa: C409
def mi_factorial(mi):
from pytools import factorial
def mi_factorial(mi: Sequence[int]) -> int:
import math
result = 1
for mi_i in mi:
result *= factorial(mi_i)
result *= math.factorial(mi_i)
return result
def mi_power(vector, mi, evaluate=True):
def mi_increment_axis(
mi: Sequence[int], axis: int, increment: int
) -> tuple[int, ...]:
new_mi = list(mi)
new_mi[axis] += increment
return tuple(new_mi)
def mi_set_axis(mi: Sequence[int], axis: int, value: int) -> tuple[int, ...]:
new_mi = list(mi)
new_mi[axis] = value
return tuple(new_mi)
def mi_power(
vector: Sequence[Any], mi: Sequence[int],
evaluate: bool = True) -> Any:
result = 1
for mi_i, vec_i in zip(mi, vector):
for mi_i, vec_i in zip(mi, vector, strict=True):
if mi_i == 1:
result *= vec_i
elif evaluate:
......@@ -67,91 +157,35 @@ def mi_power(vector, mi, evaluate=True):
return result
class MiDerivativeTaker(object):
def __init__(self, expr, var_list):
assert isinstance(expr, sym.Basic)
self.var_list = var_list
empty_mi = (0,) * len(var_list)
self.cache_by_mi = {empty_mi: expr}
def mi_dist(self, a, b):
return np.array(a, dtype=int) - np.array(b, dtype=int)
def diff(self, mi):
try:
expr = self.cache_by_mi[mi]
except KeyError:
current_mi = self.get_closest_cached_mi(mi)
expr = self.cache_by_mi[current_mi]
for next_deriv, next_mi in self.get_derivative_taking_sequence(
current_mi, mi):
expr = expr.diff(next_deriv)
self.cache_by_mi[next_mi] = expr
def add_to_sac(sac, expr):
if sac is None:
return expr
from numbers import Number
if isinstance(expr, Number | sym.Number | sym.Symbol):
return expr
def get_derivative_taking_sequence(self, start_mi, end_mi):
current_mi = np.array(start_mi, dtype=int)
for idx, (mi_i, vec_i) in enumerate(
zip(self.mi_dist(end_mi, start_mi), self.var_list)):
for i in range(1, 1 + mi_i):
current_mi[idx] += 1
yield vec_i, tuple(current_mi)
name = sac.assign_temp("temp", expr)
return sym.Symbol(name)
def get_closest_cached_mi(self, mi):
return min((other_mi
for other_mi in self.cache_by_mi.keys()
if (np.array(mi) >= np.array(other_mi)).all()),
key=lambda other_mi: sum(self.mi_dist(mi, other_mi)))
# }}}
class LinearRecurrenceBasedMiDerivativeTaker(MiDerivativeTaker):
"""
The derivative taker for expansions that use
:class:`sumpy.expansion.LinearRecurrenceBasedDerivativeWrangler`
"""
def __init__(self, expr, var_list, wrangler):
super(LinearRecurrenceBasedMiDerivativeTaker, self).__init__(
expr, var_list)
self.wrangler = wrangler
# {{{ get variables
@memoize_method
def diff(self, mi):
"""
:arg mi: a multi-index (tuple) indicating how many x/y derivatives are
to be taken.
"""
try:
expr = self.cache_by_mi[mi]
except KeyError:
from six import iteritems
from sumpy.symbolic import Add
closest_mi = self.get_closest_cached_mi(mi)
expr = self.cache_by_mi[closest_mi]
# Try to reduce the derivative using recurrences first, and if that
# fails fall back to derivative taking.
for next_deriv, next_mi in (
self.get_derivative_taking_sequence(closest_mi, mi)):
recurrence = (
self.wrangler.try_get_recurrence_for_derivative(
next_mi, self.cache_by_mi))
if recurrence is not None:
expr = Add(*tuple(
coeff * self.cache_by_mi[ident]
for ident, coeff in iteritems(recurrence)))
else:
expr = expr.diff(next_deriv)
class GatherAllVariables(WalkMapper):
def __init__(self):
self.vars = set()
self.cache_by_mi[next_mi] = expr
def map_variable(self, expr):
self.vars.add(expr)
return expr
def get_all_variables(expr):
mapper = GatherAllVariables()
mapper(expr)
return mapper.vars
# }}}
......@@ -160,7 +194,7 @@ def build_matrix(op, dtype=None, shape=None):
dtype = dtype or op.dtype
from pytools import ProgressBar
shape = shape or op.shape
rows, cols = shape
_rows, cols = shape
pb = ProgressBar("matrix", cols)
mat = np.zeros(shape, dtype)
......@@ -181,48 +215,52 @@ def build_matrix(op, dtype=None, shape=None):
def vector_to_device(queue, vec):
from pytools.obj_array import with_object_array_or_scalar
from pyopencl.array import to_device
from pytools.obj_array import obj_array_vectorize
def to_dev(ary):
return to_device(queue, ary)
return with_object_array_or_scalar(to_dev, vec)
return obj_array_vectorize(to_dev, vec)
def vector_from_device(queue, vec):
from pytools.obj_array import with_object_array_or_scalar
from pytools.obj_array import obj_array_vectorize
def from_dev(ary):
from numbers import Number
if isinstance(ary, (np.number, Number)):
if isinstance(ary, np.number | Number):
# zero, most likely
return ary
return ary.get(queue=queue)
return with_object_array_or_scalar(from_dev, vec)
return obj_array_vectorize(from_dev, vec)
def _merge_kernel_arguments(dictionary, arg):
# Check for strict equality until there's a usecase
if dictionary.setdefault(arg.name, arg) != arg:
msg = "Merging two different kernel arguments {} and {} with the same name"
raise ValueError(msg.format(arg.loopy_arg, dictionary[arg].loopy_arg))
def gather_arguments(kernel_likes):
result = {}
for knl in kernel_likes:
for arg in knl.get_args():
result[arg.name] = arg
# FIXME: possibly check that arguments match before overwriting
_merge_kernel_arguments(result, arg)
return sorted(six.itervalues(result), key=lambda arg: arg.name)
return sorted(result.values(), key=lambda arg: arg.name)
def gather_source_arguments(kernel_likes):
result = {}
for knl in kernel_likes:
for arg in knl.get_args() + knl.get_source_args():
result[arg.name] = arg
# FIXME: possibly check that arguments match before overwriting
_merge_kernel_arguments(result, arg)
return sorted(six.itervalues(result), key=lambda arg: arg.name)
return sorted(result.values(), key=lambda arg: arg.name)
def gather_loopy_arguments(kernel_likes):
......@@ -235,16 +273,37 @@ def gather_loopy_source_arguments(kernel_likes):
# {{{ KernelComputation
class KernelComputation(object):
"""Common input processing for kernel computations."""
@tag_dataclass
class ScalingAssignmentTag(Tag):
pass
class KernelComputation(ABC):
"""Common input processing for kernel computations.
def __init__(self, ctx, kernels, strength_usage,
value_dtypes, name, options=[], device=None):
.. attribute:: name
.. attribute:: target_kernels
.. attribute:: source_kernels
.. attribute:: strength_usage
.. automethod:: get_kernel
"""
def __init__(self, ctx: Any,
target_kernels: list[Kernel],
source_kernels: list[Kernel],
strength_usage: list[int] | None = None,
value_dtypes: list[numpy.dtype[Any]] | None = None,
name: str | None = None,
device: Any | None = None) -> None:
"""
:arg kernels: list of :class:`sumpy.kernel.Kernel` instances
:class:`sumpy.kernel.TargetDerivative` wrappers should be
:arg target_kernels: list of :class:`~sumpy.kernel.Kernel` instances,
with :class:`sumpy.kernel.DirectionalTargetDerivative` as
the outermost kernel wrappers, if present.
:arg strength_usage: A list of integers indicating which expression
:arg source_kernels: list of :class:`~sumpy.kernel.Kernel` instances
with :class:`~sumpy.kernel.DirectionalSourceDerivative` as the
outermost kernel wrappers, if present.
:arg strength_usage: list of integers indicating which expression
uses which density. This implicitly specifies the
number of density arrays that need to be passed.
Default: all kernels use the same density.
......@@ -254,14 +313,14 @@ class KernelComputation(object):
if value_dtypes is None:
value_dtypes = []
for knl in kernels:
for knl in target_kernels:
if knl.is_complex_valued:
value_dtypes.append(np.complex128)
value_dtypes.append(np.dtype(np.complex128))
else:
value_dtypes.append(np.float64)
value_dtypes.append(np.dtype(np.float64))
if not isinstance(value_dtypes, (list, tuple)):
value_dtypes = [np.dtype(value_dtypes)] * len(kernels)
if not isinstance(value_dtypes, list | tuple):
value_dtypes = [np.dtype(value_dtypes)] * len(target_kernels)
value_dtypes = [np.dtype(vd) for vd in value_dtypes]
# }}}
......@@ -269,9 +328,9 @@ class KernelComputation(object):
# {{{ process strength_usage
if strength_usage is None:
strength_usage = [0] * len(kernels)
strength_usage = list(range(len(source_kernels)))
if len(kernels) != len(strength_usage):
if len(source_kernels) != len(strength_usage):
raise ValueError("exprs and strength_usage must have the same length")
strength_count = max(strength_usage)+1
......@@ -283,286 +342,47 @@ class KernelComputation(object):
self.context = ctx
self.device = device
self.kernels = kernels
self.source_kernels = tuple(source_kernels)
self.target_kernels = tuple(target_kernels)
self.value_dtypes = value_dtypes
self.strength_usage = strength_usage
self.strength_count = strength_count
self.name = name or self.default_name
@property
@abstractmethod
def default_name(self):
pass
def get_kernel_scaling_assignments(self):
from sumpy.symbolic import SympyToPymbolicMapper
sympy_conv = SympyToPymbolicMapper()
import loopy as lp
return [
lp.Assignment(id=None,
assignee="knl_%d_scaling" % i,
lp.Assignment(id=f"knl_{i}_scaling",
assignee=f"knl_{i}_scaling",
expression=sympy_conv(kernel.get_global_scaling_const()),
temp_var_type=lp.Optional(dtype))
temp_var_type=lp.Optional(dtype),
tags=frozenset([ScalingAssignmentTag()]))
for i, (kernel, dtype) in enumerate(
zip(self.kernels, self.value_dtypes))]
# }}}
# {{{
def _to_host(x, queue=None):
if isinstance(x, cl.array.Array):
queue = queue or x.queue
return x.get(queue)
return x
class BlockIndexRanges(object):
"""Convenience class for working with blocks of a global array.
.. attribute:: indices
A list of not necessarily continuous or increasing integers
representing the indices of a global array. The individual blocks are
delimited using :attr:`ranges`.
.. attribute:: ranges
A list of nondecreasing integers used to index into :attr:`indices`.
A block :math:`i` can be retrieved using
`indices[ranges[i]:ranges[i + 1]]`.
.. automethod:: block_shape
.. automethod:: get
.. automethod:: take
"""
def __init__(self, cl_context, indices, ranges):
self.cl_context = cl_context
self.indices = indices
self.ranges = ranges
@property
@memoize_method
def _ranges(self):
with cl.CommandQueue(self.cl_context) as queue:
return _to_host(self.ranges, queue=queue)
@property
def nblocks(self):
return self.ranges.shape[0] - 1
def block_shape(self, i):
return (self._ranges[i + 1] - self._ranges[i],)
def block_indices(self, i):
return self.indices[self._ranges[i]:self._ranges[i + 1]]
def get(self, queue=None):
return BlockIndexRanges(self.cl_context,
_to_host(self.indices, queue=queue),
_to_host(self.ranges, queue=queue))
def take(self, x, i):
"""Return the subset of a global array `x` that is defined by
the :attr:`indices` in block :math:`i`.
"""
return x[self.block_indices(i)]
class MatrixBlockIndexRanges(object):
"""Keep track of different ways to index into matrix blocks.
.. attribute:: row
A :class:`BlockIndexRanges` encapsulating row block indices.
.. attribute:: col
A :class:`BlockIndexRanges` encapsulating column block indices.
.. automethod:: block_shape
.. automethod:: block_take
.. automethod:: get
.. automethod:: take
"""
def __init__(self, cl_context, row, col):
self.cl_context = cl_context
self.row = row
self.col = col
assert self.row.nblocks == self.col.nblocks
self.blkranges = np.cumsum([0] + [
self.row.block_shape(i)[0] * self.col.block_shape(i)[0]
for i in range(self.row.nblocks)])
if isinstance(self.row.indices, cl.array.Array):
with cl.CommandQueue(self.cl_context) as queue:
self.blkranges = \
cl.array.to_device(queue, self.blkranges).with_queue(None)
@property
def nblocks(self):
return self.row.nblocks
def block_shape(self, i):
return self.row.block_shape(i) + self.col.block_shape(i)
def block_indices(self, i):
return (self.row.block_indices(i),
self.col.block_indices(i))
@property
def linear_row_indices(self):
r, _ = self._linear_indices()
return r
@property
def linear_col_indices(self):
_, c = self._linear_indices()
return c
@property
def linear_ranges(self):
return self.blkranges
def get(self, queue=None):
"""Transfer data to the host. Only the initial given data is
transfered, not the arrays returned by :meth:`linear_row_indices` and
friends.
:return: a copy of `self` in which all data lives on the host, i.e.
all :class:`pyopencl.array.Array` instances are replaces by
:class:`numpy.ndarray` instances.
"""
return MatrixBlockIndexRanges(self.cl_context,
row=self.row.get(queue=queue),
col=self.col.get(queue=queue))
def take(self, x, i):
"""Retrieve a block from a global matrix.
:arg x: a 2D :class:`numpy.ndarray`.
:arg i: block index.
:return: requested block from the matrix.
"""
if isinstance(self.row.indices, cl.array.Array) or \
isinstance(self.col.indices, cl.array.Array):
raise ValueError("CL `Array`s are not supported."
"Use MatrixBlockIndexRanges.get() and then view into matrices.")
irow, icol = self.block_indices(i)
return x[np.ix_(irow, icol)]
def block_take(self, x, i):
"""Retrieve a block from a linear representation of the matrix blocks.
A linear representation of the matrix blocks can be obtained, or
should be consistent with
.. code-block:: python
i = index.linear_row_indices()
j = index.linear_col_indices()
linear_blks = global_mat[i, j]
for k in range(index.nblocks):
assert np.allclose(index.block_take(linear_blks, k),
index.take(global_mat, k))
:arg x: a 1D :class:`numpy.ndarray`.
:arg i: block index.
:return: requested block, reshaped into a 2D array.
"""
iblk = np.s_[self.blkranges[i]:self.blkranges[i + 1]]
return x[iblk].reshape(*self.block_shape(i))
@memoize_method
def _linear_indices(self):
"""
:return: a tuple of `(rowindices, colindices)` that can be
used to provide linear indexing into a set of matrix blocks. These
index arrays are just the concatenated Cartesian products of all
the block arrays described by :attr:`row` and :attr:`col`.
They can be used to index directly into a matrix as follows:
.. code-block:: python
mat[rowindices[blkranges[i]:blkranges[i + 1]],
colindices[blkranges[i]:blkranges[i + 1]]]
The same block can be obtained more easily using
zip(self.target_kernels, self.value_dtypes, strict=True))]
.. code-block:: python
index.view(mat, i).reshape(-1)
"""
@memoize_in(self, "block_index_knl")
def _build_index():
loopy_knl = lp.make_kernel([
"{[irange]: 0 <= irange < nranges}",
"{[itgt, isrc]: 0 <= itgt < ntgtblock and 0 <= isrc < nsrcblock}"
],
"""
for irange
<> ntgtblock = tgtranges[irange + 1] - tgtranges[irange]
<> nsrcblock = srcranges[irange + 1] - srcranges[irange]
for itgt, isrc
<> imat = blkranges[irange] + (nsrcblock * itgt + isrc)
rowindices[imat] = tgtindices[tgtranges[irange] + itgt] \
{id_prefix=write_index}
colindices[imat] = srcindices[srcranges[irange] + isrc] \
{id_prefix=write_index}
end
end
""",
[
lp.GlobalArg('blkranges', None, shape="nranges + 1"),
lp.GlobalArg("rowindices", None, shape="nresults"),
lp.GlobalArg("colindices", None, shape="nresults"),
lp.ValueArg("nresults", None),
'...'
],
name="block_index_knl",
default_offset=lp.auto,
assumptions='nranges>=1',
silenced_warnings="write_race(write_index*)",
lang_version=MOST_RECENT_LANGUAGE_VERSION)
loopy_knl = lp.split_iname(loopy_knl, "irange", 128, outer_tag="g.0")
return loopy_knl
with cl.CommandQueue(self.cl_context) as queue:
_, (rowindices, colindices) = _build_index()(queue,
tgtindices=self.row.indices,
srcindices=self.col.indices,
tgtranges=self.row.ranges,
srcranges=self.col.ranges,
blkranges=self.blkranges,
nresults=_to_host(self.blkranges[-1], queue=queue))
return (rowindices.with_queue(None),
colindices.with_queue(None))
@abstractmethod
def get_kernel(self):
pass
# }}}
# {{{ OrderedSet
# Source: http://code.activestate.com/recipes/576694-orderedset/
# Source: https://code.activestate.com/recipes/576694-orderedset/
# Author: Raymond Hettinger
# License: MIT
try:
from collections.abc import MutableSet
except ImportError:
from collections import MutableSet
from collections.abc import MutableSet
class OrderedSet(MutableSet):
......@@ -608,15 +428,15 @@ class OrderedSet(MutableSet):
def pop(self, last=True):
if not self:
raise KeyError('set is empty')
raise KeyError("set is empty")
key = self.end[1][0] if last else self.end[2][0]
self.discard(key)
return key
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self))
return f"{self.__class__.__name__}()"
return f"{self.__class__.__name__}({list(self)!r})"
def __eq__(self, other):
if isinstance(other, OrderedSet):
......@@ -626,33 +446,50 @@ class OrderedSet(MutableSet):
# }}}
class KernelCacheWrapper(object):
class KernelCacheMixin(ABC):
context: cl.Context
name: str
@abstractmethod
def get_cache_key(self) -> tuple[Hashable, ...]:
...
@abstractmethod
def get_kernel(self, **kwargs: Any) -> lp.TranslationUnit:
...
@abstractmethod
def get_optimized_kernel(self, **kwargs: Any) -> lp.TranslationUnit:
...
@memoize_method
def get_cached_optimized_kernel(self, **kwargs):
from sumpy import code_cache, CACHING_ENABLED, OPT_ENABLED
def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase:
from sumpy import CACHING_ENABLED, NO_CACHE_KERNELS, OPT_ENABLED, code_cache
if CACHING_ENABLED:
if CACHING_ENABLED and not (
NO_CACHE_KERNELS and self.name in NO_CACHE_KERNELS):
import loopy.version
from sumpy.version import KERNEL_VERSION
cache_key = (
self.get_cache_key()
+ tuple(sorted(six.iteritems(kwargs)))
+ tuple(sorted(kwargs.items()))
+ (loopy.version.DATA_MODEL_VERSION,)
+ (KERNEL_VERSION,)
+ (OPT_ENABLED,))
try:
result = code_cache[cache_key]
logger.debug("%s: kernel cache hit [key=%s]" % (
self.name, cache_key))
return result
logger.debug("%s: kernel cache hit [key=%s]", self.name, cache_key)
return result.executor(self.context)
except KeyError:
pass
logger.info("%s: kernel cache miss" % self.name)
if CACHING_ENABLED:
logger.info("%s: kernel cache miss [key=%s]" % (
self.name, cache_key))
logger.info("%s: kernel cache miss", self.name)
if CACHING_ENABLED and not (
NO_CACHE_KERNELS and self.name in NO_CACHE_KERNELS):
logger.info("%s: kernel cache miss [key=%s]",
self.name, cache_key)
from pytools import MinRecursionLimit
with MinRecursionLimit(3000):
......@@ -661,55 +498,561 @@ class KernelCacheWrapper(object):
else:
knl = self.get_kernel()
if CACHING_ENABLED:
if CACHING_ENABLED and not (
NO_CACHE_KERNELS and self.name in NO_CACHE_KERNELS):
code_cache.store_if_not_present(cache_key, knl)
return knl
return knl.executor(self.context)
@staticmethod
def _allow_redundant_execution_of_knl_scaling(knl):
from loopy.match import ObjTagged
return lp.add_inames_for_unused_hw_axes(
knl, within=ObjTagged(ScalingAssignmentTag()))
def my_syntactic_subs(expr, subst_dict):
# Workaround for differing substitution semantics between sympy and symengine.
# FIXME: This is a hack.
from sumpy.symbolic import Basic, Subs, Derivative
if not isinstance(expr, Basic):
return expr
KernelCacheWrapper = KernelCacheMixin
elif expr.is_Symbol:
return subst_dict.get(expr, expr)
elif isinstance(expr, Subs):
new_point = tuple(my_syntactic_subs(p, subst_dict) for p in expr.point)
def is_obj_array_like(ary):
return (
isinstance(ary, tuple | list)
or (isinstance(ary, np.ndarray) and ary.dtype.char == "O"))
import six
new_subst_dict = dict(
(var, subs) for var, subs in six.iteritems(subst_dict)
if var not in expr.variables)
new_expr = my_syntactic_subs(expr.expr, new_subst_dict)
# {{{ matrices
if new_point != expr.point or new_expr != expr.expr:
return Subs(new_expr, expr.variables, new_point)
def reduced_row_echelon_form(m, atol=0):
"""Calculates a reduced row echelon form of a
matrix `m`.
return expr
:arg m: a 2D :class:`numpy.ndarray` or a list of lists or a sympy Matrix
:arg atol: absolute tolerance for values to be considered zero
:return: reduced row echelon form as a 2D :class:`numpy.ndarray`
and a list of pivots
"""
elif isinstance(expr, Derivative):
new_expr = my_syntactic_subs(expr.expr, subst_dict)
new_variables = my_syntactic_subs(expr.variables, subst_dict)
mat = np.array(m, dtype=object)
index = 0
nrows = mat.shape[0]
ncols = mat.shape[1]
pivot_cols = []
for i in range(ncols):
if index == nrows:
break
pivot = nrows
for k in range(index, nrows):
symbolic = isinstance(mat[k, i], sym.Basic) and not mat[k, i].is_number
if (symbolic or abs(mat[k, i]) > atol) and pivot == nrows:
pivot = k
# If there's a pivot that's close to 1 use that as it avoids
# having to divide.
# When checking for a number close to 1, we shouldn't consider
# symbolic values
if not symbolic and abs(mat[k, i] - 1) <= atol:
pivot = k
break
if pivot == nrows:
# no nonzero pivot found, next column
continue
if pivot != index:
mat[[pivot, index], :] = mat[[index, pivot], :]
pivot_cols.append(i)
scale = mat[index, i]
if isinstance(scale, int | sym.Integer):
scale = int(scale)
for j in range(mat.shape[1]):
elem = mat[index, j]
if isinstance(scale, int) and isinstance(elem, int | sym.Integer):
quo = int(elem) // scale
if quo * scale == elem:
mat[index, j] = quo
continue
mat[index, j] = sym.sympify(elem)/scale
for j in range(nrows):
if (j == index):
continue
scale = mat[j, i]
if scale != 0:
mat[j, :] = mat[j, :] - mat[index, :]*scale
index = index + 1
return mat, pivot_cols
def nullspace(m, atol=0):
"""Calculates the nullspace of a matrix `m`.
:arg m: a 2D :class:`numpy.ndarray` or a list of lists or a sympy Matrix
:arg atol: absolute tolerance for values to be considered zero
:return: nullspace of `m` as a 2D :class:`numpy.ndarray`
"""
mat, pivot_cols = reduced_row_echelon_form(m, atol=atol)
pivot_cols = list(pivot_cols)
cols = mat.shape[1]
free_vars = [i for i in range(cols) if i not in pivot_cols]
n = []
for free_var in free_vars:
vec = [0]*cols
vec[free_var] = 1
for piv_row, piv_col in enumerate(pivot_cols):
for pos in pivot_cols[piv_row+1:] + [free_var]:
if isinstance(mat[piv_row, pos], sym.Integer):
vec[piv_col] -= int(mat[piv_row, pos])
else:
vec[piv_col] -= mat[piv_row, pos]
n.append(vec)
return np.array(n, dtype=object).T
if new_expr != expr.expr or any(new_var != var for new_var, var in
zip(new_variables, expr.variables)):
return Derivative(new_expr, *new_variables)
# }}}
return expr
# {{{ FFT
def fft(seq, inverse=False, sac=None):
"""
Return the discrete fourier transform of the sequence seq.
seq should be a python iterable with tuples of length 2
corresponding to the real part and imaginary part.
"""
from pymbolic.algorithm import fft as _fft, ifft as _ifft
def wrap(level, expr):
if isinstance(expr, np.ndarray):
res = [wrap(level, a) for a in expr]
return np.array(res, dtype=object).reshape(expr.shape)
return add_to_sac(sac, expr)
if inverse:
return _ifft(np.array(seq), wrap_intermediate_with_level=wrap,
complex_dtype=np.complex128).tolist()
else:
new_args = tuple(my_syntactic_subs(arg, subst_dict) for arg in expr.args)
return _fft(np.array(seq), wrap_intermediate_with_level=wrap,
complex_dtype=np.complex128).tolist()
if any(new_arg != arg for arg, new_arg in zip(expr.args, new_args)):
return expr.func(*new_args)
return expr
def fft_toeplitz_upper_triangular(first_row, x, sac=None):
"""
Returns the matvec of the Toeplitz matrix given by
the first row and the vector x using a Fourier transform
"""
assert len(first_row) == len(x)
n = len(first_row)
v = list(first_row)
v += [0]*(n-1)
x = list(reversed(x))
x += [0]*(n-1)
v_fft = fft(v, sac)
x_fft = fft(x, sac)
res_fft = [add_to_sac(sac, a * b) for a, b in zip(v_fft, x_fft, strict=True)]
res = fft(res_fft, inverse=True, sac=sac)
return list(reversed(res[:n]))
def matvec_toeplitz_upper_triangular(first_row, vector):
n = len(first_row)
assert len(vector) == n
output = [0]*n
for row in range(n):
terms = tuple(first_row[col-row]*vector[col] for col in range(row, n))
output[row] = sym.Add(*terms)
return output
to_complex_type_dict = {
np.complex64: np.complex64,
np.complex128: np.complex128,
np.float32: np.complex64,
np.float64: np.complex128,
}
def to_complex_dtype(dtype):
np_type = np.dtype(dtype).type
try:
return to_complex_type_dict[np_type]
except KeyError as err:
raise RuntimeError(f"Unknown dtype: {dtype}") from err
@dataclass(frozen=True)
class ProfileGetter:
start: int
end: int
def get_native_event(evt):
from pyopencl import Event
return evt if isinstance(evt, Event) else evt.native_event
class AggregateProfilingEvent:
"""An object to hold a list of events and provides compatibility
with some of the functionality of :class:`pyopencl.Event`.
Assumes that the last event waits on all of the previous events.
"""
def __init__(self, events):
self.events = events[:]
self.native_event = get_native_event(events[-1])
@property
def profile(self):
total = sum(evt.profile.end - evt.profile.start for evt in self.events)
end = self.native_event.profile.end
return ProfileGetter(start=end - total, end=end)
def wait(self):
return self.native_event.wait()
class MarkerBasedProfilingEvent:
"""An object to hold two marker events and provides compatibility
with some of the functionality of :class:`pyopencl.Event`.
"""
def __init__(self, *, end_event, start_event):
self.native_event = end_event
self.start_event = start_event
@property
def profile(self):
return ProfileGetter(start=self.start_event.profile.start,
end=self.native_event.profile.end)
def wait(self):
return self.native_event.wait()
def loopy_fft(shape, inverse, complex_dtype, index_dtype=None,
name=None):
from math import pi
from pymbolic import var
from pymbolic.algorithm import find_factors
sign = 1 if not inverse else -1
n = shape[-1]
m = n
factors = []
while m != 1:
N1, m = find_factors(m) # noqa: N806
factors.append(N1)
nfft = n
broadcast_dims = tuple(var(f"j{d}") for d in range(len(shape) - 1))
domains = [
"{[i]: 0<=i<n}",
"{[i2]: 0<=i2<n}",
]
domains += [f"{{[j{d}]: 0<=j{d}<{shape[d]} }}" for d in range(len(shape) - 1)]
x = var("x")
y = var("y")
i = var("i")
i2 = var("i2")
i3 = var("i3")
fixed_parameters = {"const": complex_dtype(sign*(-2j)*pi/n), "n": n}
index = (*broadcast_dims, i2)
insns = [
"exp_table[i] = exp(const * i) {id=exp_table}",
lp.Assignment(
assignee=x[index],
expression=y[index],
id="copy",
happens_after=frozenset(["exp_table"]),
),
]
for ilev, N1 in enumerate(list(reversed(factors))): # noqa: N806
nfft //= N1
N2 = n // (nfft * N1) # noqa: N806
init_happens_after = "copy" if ilev == 0 else f"update_{ilev-1}"
temp = var("temp")
exp_table = var("exp_table")
i = var(f"i_{ilev}")
i2 = var(f"i2_{ilev}")
ifft = var(f"ifft_{ilev}")
iN1 = var(f"iN1_{ilev}") # noqa: N806
iN1_sum = var(f"iN1_sum_{ilev}") # noqa: N806
iN2 = var(f"iN2_{ilev}") # noqa: N806
table_idx = var(f"table_idx_{ilev}")
exp = var(f"exp_{ilev}")
i_bcast = (*broadcast_dims, i)
i2_bcast = (*broadcast_dims, i2)
iN_bcast = (*broadcast_dims, ifft + nfft * (iN1 * N2 + iN2)) # noqa: N806
insns += [
lp.Assignment(
assignee=temp[i],
expression=x[i_bcast],
id=f"copy_{ilev}",
happens_after=frozenset([init_happens_after]),
),
lp.Assignment(
assignee=x[i2_bcast],
expression=0,
id=f"reset_{ilev}",
happens_after=frozenset([f"copy_{ilev}"])),
lp.Assignment(
assignee=table_idx,
expression=nfft*iN1_sum*(iN2 + N2*iN1),
id=f"idx_{ilev}",
happens_after=frozenset([f"reset_{ilev}"]),
temp_var_type=lp.Optional(np.uint32)),
lp.Assignment(
assignee=exp,
expression=exp_table[table_idx % n],
id=f"exp_{ilev}",
happens_after=frozenset([f"idx_{ilev}"]),
within_inames=frozenset({x.name for x in
[*broadcast_dims, iN1_sum, iN1, iN2]}),
temp_var_type=lp.Optional(complex_dtype)),
lp.Assignment(
assignee=x[iN_bcast],
expression=(x[iN_bcast]
+ exp * temp[ifft + nfft * (iN2*N1 + iN1_sum)]),
id=f"update_{ilev}",
happens_after=frozenset([f"exp_{ilev}"])),
]
domains += [
f"[ifft_{ilev}]: 0<=ifft_{ilev}<{nfft}",
f"[iN1_{ilev}]: 0<=iN1_{ilev}<{N1}",
f"[iN1_sum_{ilev}]: 0<=iN1_sum_{ilev}<{N1}",
f"[iN2_{ilev}]: 0<=iN2_{ilev}<{N2}",
f"[i_{ilev}]: 0<=i_{ilev}<{n}",
f"[i2_{ilev}]: 0<=i2_{ilev}<{n}",
]
for idom, dom in enumerate(domains):
if not dom.startswith("{"):
domains[idom] = "{" + dom + "}"
kernel_data = [
lp.GlobalArg("x", shape=shape, is_input=False, is_output=True,
dtype=complex_dtype),
lp.GlobalArg("y", shape=shape, is_input=True, is_output=False,
dtype=complex_dtype),
lp.TemporaryVariable("exp_table", shape=(n,),
dtype=complex_dtype),
lp.TemporaryVariable("temp", shape=(n,),
dtype=complex_dtype),
...
]
if n == 1:
domains = domains[2:]
index = (*broadcast_dims, 0)
insns = [
lp.Assignment(
assignee=x[index],
expression=y[index],
),
]
kernel_data = kernel_data[:2]
elif inverse:
domains += ["{[i3]: 0<=i3<n}"]
index = (*broadcast_dims, i3)
insns += [
lp.Assignment(
assignee=x[index],
expression=x[index] / n,
happens_after=frozenset([f"update_{len(factors) - 1}"]),
),
]
if name is None:
name = f"ifft_{n}" if inverse else f"fft_{n}"
knl = lp.make_kernel(
domains, insns,
kernel_data=kernel_data,
name=name,
fixed_parameters=fixed_parameters,
lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
index_dtype=index_dtype,
)
if broadcast_dims:
knl = lp.split_iname(knl, "j0", 32, inner_tag="l.0", outer_tag="g.0")
knl = lp.add_inames_for_unused_hw_axes(knl)
return knl
class FFTBackend(enum.Enum):
#: FFT backend based on the vkFFT library.
pyvkfft = 1
#: FFT backend based on :mod:`loopy` used as a fallback.
loopy = 2
def _get_fft_backend(queue: pyopencl.CommandQueue) -> FFTBackend:
import os
env_val = os.environ.get("SUMPY_FFT_BACKEND")
if env_val:
if env_val not in ["loopy", "pyvkfft"]:
raise ValueError("Expected 'loopy' or 'pyvkfft' for SUMPY_FFT_BACKEND. "
f"Found {env_val}.")
return FFTBackend[env_val]
try:
import pyvkfft.opencl # noqa: F401
except ImportError:
warnings.warn("VkFFT not found. FFT runs will be slower.", stacklevel=3)
return FFTBackend.loopy
from pyopencl import command_queue_properties
if queue.properties & command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE:
warnings.warn(
"VkFFT does not support out of order queues yet. "
"Falling back to slower implementation.", stacklevel=3)
return FFTBackend.loopy
import platform
import sys
if (sys.platform == "darwin"
and platform.machine() == "x86_64"
and queue.context.devices[0].platform.name
== "Portable Computing Language"):
warnings.warn(
"PoCL miscompiles some VkFFT kernels. "
"See https://github.com/inducer/sumpy/issues/129. "
"Falling back to slower implementation.", stacklevel=3)
return FFTBackend.loopy
return FFTBackend.pyvkfft
def get_opencl_fft_app(
queue: pyopencl.CommandQueue,
shape: tuple[int, ...],
dtype: numpy.dtype[Any],
inverse: bool) -> Any:
"""Setup an object for out-of-place FFT on with given shape and dtype
on given queue.
"""
assert dtype.type in (np.float32, np.float64, np.complex64,
np.complex128)
backend = _get_fft_backend(queue)
if backend == FFTBackend.loopy:
return loopy_fft(shape, inverse=inverse, complex_dtype=dtype.type), backend
elif backend == FFTBackend.pyvkfft:
from pyvkfft.opencl import VkFFTApp
app = VkFFTApp(shape=shape, dtype=dtype, queue=queue, ndim=1, inplace=False)
return app, backend
else:
raise RuntimeError(f"Unsupported FFT backend {backend}")
def run_opencl_fft(
fft_app: tuple[Any, FFTBackend],
queue: pyopencl.CommandQueue,
input_vec: Any,
inverse: bool = False,
wait_for: list[pyopencl.Event] | None = None
) -> tuple[pyopencl.Event, Any]:
"""Runs an FFT on input_vec and returns a :class:`MarkerBasedProfilingEvent`
that indicate the end and start of the operations carried out and the output
vector.
Only supports in-order queues.
"""
app, backend = fft_app
if backend == FFTBackend.loopy:
evt, (output_vec,) = app(queue, y=input_vec, wait_for=wait_for)
return (evt, output_vec)
elif backend == FFTBackend.pyvkfft:
if wait_for is None:
wait_for = []
import pyopencl as cl
import pyopencl.array as cla
if queue.device.platform.name == "NVIDIA CUDA":
# NVIDIA OpenCL gives wrong event profile values with wait_for
# Not passing wait_for will wait for all events queued before
# and therefore correctness is preserved if it's the same queue
for evt in wait_for:
if not evt.command_queue != queue:
raise RuntimeError(
"Different queues not supported with NVIDIA CUDA")
start_evt = cl.enqueue_marker(queue)
else:
start_evt = cl.enqueue_marker(queue, wait_for=wait_for[:])
if app.inplace:
raise RuntimeError("inplace fft is not supported")
else:
output_vec = cla.empty_like(input_vec, queue)
# FIXME: use the public API once
# https://github.com/vincefn/pyvkfft/pull/17 is in
from pyvkfft.opencl import _vkfft_opencl
if inverse: # noqa: SIM108
meth = _vkfft_opencl.ifft
else:
meth = _vkfft_opencl.fft
meth(app.app, int(input_vec.data.int_ptr),
int(output_vec.data.int_ptr), int(queue.int_ptr))
if queue.device.platform.name == "NVIDIA CUDA":
end_evt = cl.enqueue_marker(queue)
else:
end_evt = cl.enqueue_marker(queue, wait_for=[start_evt])
output_vec.add_event(end_evt)
return (MarkerBasedProfilingEvent(end_event=end_evt, start_event=start_evt),
output_vec)
else:
raise RuntimeError(f"Unsupported FFT backend {backend}")
# }}}
# {{{ deprecations
_depr_name_to_replacement_and_obj = {
"KernelCacheWrapper": ("KernelCacheMixin", 2023),
}
def __getattr__(name):
replacement_and_obj = _depr_name_to_replacement_and_obj.get(name)
if replacement_and_obj is not None:
replacement, obj, year = replacement_and_obj
from warnings import warn
warn(f"'sumpy.tools.{name}' is deprecated. "
f"Use '{replacement}' instead. "
f"'sumpy.tools.{name}' will continue to work until {year}.",
DeprecationWarning, stacklevel=2)
return obj
else:
raise AttributeError(name)
# }}}
# vim: fdm=marker
from __future__ import division, absolute_import
from __future__ import annotations
from sumpy.expansion.m2l import M2LTranslationClassFactoryBase
__copyright__ = """
Copyright (C) 2017 Andreas Kloeckner
......@@ -25,18 +28,31 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import six # noqa: F401
from six.moves import range # noqa: F401
from collections.abc import Sequence
from functools import partial
from numbers import Number
from typing import TYPE_CHECKING
from pytools import memoize_method
from numbers import Number
from sumpy.kernel import TargetDerivativeRemover
import numpy as np # noqa: F401
from sumpy.kernel import TargetTransformationRemover
if TYPE_CHECKING:
import pyopencl
from sumpy.kernel import Kernel
from sumpy.visualization import FieldPlotter
import logging
import numpy as np
import loopy as lp # noqa: F401
import pyopencl as cl
import pyopencl.array
import logging
logger = logging.getLogger(__name__)
__doc__ = """
......@@ -57,6 +73,7 @@ These functions manipulate these potentials:
.. autofunction:: combine_inner_outer
.. autofunction:: combine_halfspace
.. autofunction:: combine_halfspace_and_outer
.. autofunction:: l_inf
These functions help with plotting:
......@@ -73,6 +90,7 @@ by users:
.. autoclass:: ExpansionPotentialSource
.. autoclass:: MultipoleExpansion
.. autoclass:: LocalExpansion
.. autoclass:: PotentialExpressionNode
.. autoclass:: Sum
.. autoclass:: Product
.. autoclass:: SchematicVisitor
......@@ -82,24 +100,24 @@ by users:
# {{{ context
class ToyContext(object):
class ToyContext:
"""This class functions as a container for generated code and 'behind-the-scenes'
information.
.. automethod:: __init__
"""
def __init__(self, cl_context, kernel,
def __init__(self, cl_context: pyopencl.Context, kernel: Kernel,
mpole_expn_class=None,
local_expn_class=None,
expansion_factory=None,
extra_source_kwargs=None,
extra_kernel_kwargs=None):
extra_kernel_kwargs=None, m2l_use_fft=None):
self.cl_context = cl_context
self.queue = cl.CommandQueue(self.cl_context)
self.kernel = kernel
self.no_target_deriv_kernel = TargetDerivativeRemover()(kernel)
self.no_target_deriv_kernel = TargetTransformationRemover()(kernel)
if expansion_factory is None:
from sumpy.expansion import DefaultExpansionFactory
......@@ -108,8 +126,25 @@ class ToyContext(object):
mpole_expn_class = \
expansion_factory.get_multipole_expansion_class(kernel)
if local_expn_class is None:
from sumpy.expansion.m2l import (
FFTM2LTranslationClassFactory,
NonFFTM2LTranslationClassFactory,
)
if m2l_use_fft:
m2l_translation_class_factory: M2LTranslationClassFactoryBase = \
FFTM2LTranslationClassFactory()
else:
m2l_translation_class_factory = NonFFTM2LTranslationClassFactory()
local_expn_class = \
expansion_factory.get_local_expansion_class(kernel)
m2l_translation_class = \
m2l_translation_class_factory.get_m2l_translation_class(
kernel, local_expn_class)
local_expn_class = partial(local_expn_class,
m2l_translation=m2l_translation_class())
elif m2l_use_fft is not None:
raise ValueError("local_expn_class and m2l_use_fft are both supplied. "
"Use only one of these arguments")
self.mpole_expn_class = mpole_expn_class
self.local_expn_class = local_expn_class
......@@ -129,35 +164,35 @@ class ToyContext(object):
@memoize_method
def get_p2p(self):
from sumpy.p2p import P2P
return P2P(self.cl_context, [self.kernel], exclude_self=False)
return P2P(self.cl_context, (self.kernel,), exclude_self=False)
@memoize_method
def get_p2m(self, order):
from sumpy import P2EFromSingleBox
return P2EFromSingleBox(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, order),
[self.kernel])
kernels=(self.kernel,))
@memoize_method
def get_p2l(self, order):
from sumpy import P2EFromSingleBox
return P2EFromSingleBox(self.cl_context,
self.local_expn_class(self.no_target_deriv_kernel, order),
[self.kernel])
kernels=(self.kernel,))
@memoize_method
def get_m2p(self, order):
from sumpy import E2PFromSingleBox
return E2PFromSingleBox(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, order),
[self.kernel])
(self.kernel,))
@memoize_method
def get_l2p(self, order):
from sumpy import E2PFromSingleBox
return E2PFromSingleBox(self.cl_context,
self.local_expn_class(self.no_target_deriv_kernel, order),
[self.kernel])
(self.kernel,))
@memoize_method
def get_m2m(self, from_order, to_order):
......@@ -166,10 +201,51 @@ class ToyContext(object):
self.mpole_expn_class(self.no_target_deriv_kernel, from_order),
self.mpole_expn_class(self.no_target_deriv_kernel, to_order))
@memoize_method
def use_translation_classes_dependent_data(self):
l_expn = self.local_expn_class(self.no_target_deriv_kernel, 2)
return l_expn.m2l_translation.use_preprocessing
@memoize_method
def use_fft(self):
l_expn = self.local_expn_class(self.no_target_deriv_kernel, 2)
return l_expn.m2l_translation.use_fft
@memoize_method
def get_m2l(self, from_order, to_order):
from sumpy import E2EFromCSR
return E2EFromCSR(self.cl_context,
from sumpy import E2EFromCSR, M2LUsingTranslationClassesDependentData
if self.use_translation_classes_dependent_data():
m2l_class = M2LUsingTranslationClassesDependentData
else:
m2l_class = E2EFromCSR
return m2l_class(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, from_order),
self.local_expn_class(self.no_target_deriv_kernel, to_order))
@memoize_method
def get_m2l_translation_class_dependent_data_kernel(self, from_order, to_order):
from sumpy import M2LGenerateTranslationClassesDependentData
return M2LGenerateTranslationClassesDependentData(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, from_order),
self.local_expn_class(self.no_target_deriv_kernel, to_order))
@memoize_method
def get_m2l_expansion_size(self, from_order, to_order):
m_expn = self.mpole_expn_class(self.no_target_deriv_kernel, from_order)
l_expn = self.local_expn_class(self.no_target_deriv_kernel, to_order)
return l_expn.m2l_translation.preprocess_multipole_nexprs(l_expn, m_expn)
@memoize_method
def get_m2l_preprocess_mpole_kernel(self, from_order, to_order):
from sumpy import M2LPreprocessMultipole
return M2LPreprocessMultipole(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, from_order),
self.local_expn_class(self.no_target_deriv_kernel, to_order))
@memoize_method
def get_m2l_postprocess_local_kernel(self, from_order, to_order):
from sumpy import M2LPostprocessLocal
return M2LPostprocessLocal(self.cl_context,
self.mpole_expn_class(self.no_target_deriv_kernel, from_order),
self.local_expn_class(self.no_target_deriv_kernel, to_order))
......@@ -186,49 +262,59 @@ class ToyContext(object):
# {{{ helpers
def _p2e(psource, center, rscale, order, p2e, expn_class, expn_kwargs):
source_boxes = np.array([0], dtype=np.int32)
box_source_starts = np.array([0], dtype=np.int32)
box_source_counts_nonchild = np.array(
[psource.points.shape[-1]], dtype=np.int32)
toy_ctx = psource.toy_ctx
queue = toy_ctx.queue
source_boxes = cl.array.to_device(
queue, np.array([0], dtype=np.int32))
box_source_starts = cl.array.to_device(
queue, np.array([0], dtype=np.int32))
box_source_counts_nonchild = cl.array.to_device(
queue, np.array([psource.points.shape[-1]], dtype=np.int32))
center = np.asarray(center)
centers = np.array(center, dtype=np.float64).reshape(
toy_ctx.kernel.dim, 1)
centers = cl.array.to_device(
queue,
np.array(center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1))
evt, (coeffs,) = p2e(toy_ctx.queue,
_evt, (coeffs,) = p2e(toy_ctx.queue,
source_boxes=source_boxes,
box_source_starts=box_source_starts,
box_source_counts_nonchild=box_source_counts_nonchild,
centers=centers,
sources=psource.points,
strengths=psource.weights,
sources=cl.array.to_device(queue, psource.points),
strengths=(cl.array.to_device(queue, psource.weights),),
rscale=rscale,
nboxes=1,
tgt_base_ibox=0,
#flags="print_hl_cl",
out_host=True,
**toy_ctx.extra_source_and_kernel_kwargs)
return expn_class(toy_ctx, center, rscale, order, coeffs[0],
return expn_class(toy_ctx, center, rscale, order, coeffs[0].get(queue),
derived_from=psource, **expn_kwargs)
def _e2p(psource, targets, e2p):
toy_ctx = psource.toy_ctx
queue = toy_ctx.queue
ntargets = targets.shape[-1]
boxes = cl.array.to_device(
queue, np.array([0], dtype=np.int32))
box_target_starts = cl.array.to_device(
queue, np.array([0], dtype=np.int32))
box_target_counts_nonchild = cl.array.to_device(
queue, np.array([ntargets], dtype=np.int32))
boxes = np.array([0], dtype=np.int32)
centers = cl.array.to_device(
queue,
np.array(psource.center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1))
box_target_starts = np.array([0], dtype=np.int32)
box_target_counts_nonchild = np.array([ntargets], dtype=np.int32)
from pytools.obj_array import make_obj_array
toy_ctx = psource.toy_ctx
centers = np.array(psource.center, dtype=np.float64).reshape(
toy_ctx.kernel.dim, 1)
from sumpy.tools import vector_to_device
coeffs = np.array([psource.coeffs])
evt, (pot,) = e2p(
coeffs = cl.array.to_device(queue, np.array([psource.coeffs]))
_evt, (pot,) = e2p(
toy_ctx.queue,
src_expansions=coeffs,
src_base_ibox=0,
......@@ -237,60 +323,169 @@ def _e2p(psource, targets, e2p):
box_target_counts_nonchild=box_target_counts_nonchild,
centers=centers,
rscale=psource.rscale,
targets=targets,
#flags="print_hl_cl",
out_host=True, **toy_ctx.extra_kernel_kwargs)
targets=vector_to_device(queue, make_obj_array(targets)),
**toy_ctx.extra_kernel_kwargs)
return pot
return pot.get(queue)
def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs):
def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs,
extra_kernel_kwargs):
toy_ctx = psource.toy_ctx
target_boxes = np.array([1], dtype=np.int32)
src_box_starts = np.array([0, 1], dtype=np.int32)
src_box_lists = np.array([0], dtype=np.int32)
centers = (np.array(
queue = toy_ctx.queue
target_boxes = cl.array.to_device(
queue, np.array([1], dtype=np.int32))
src_box_starts = cl.array.to_device(
queue, np.array([0, 1], dtype=np.int32))
src_box_lists = cl.array.to_device(
queue, np.array([0], dtype=np.int32))
centers = cl.array.to_device(
queue,
np.array(
[
# box 0: source
psource.center,
# box 1: target
to_center,
],
dtype=np.float64)).T.copy()
coeffs = np.array([psource.coeffs])
evt, (to_coeffs,) = e2e(
toy_ctx.queue,
src_expansions=coeffs,
src_base_ibox=0,
tgt_base_ibox=0,
ntgt_level_boxes=2,
target_boxes=target_boxes,
],
dtype=np.float64).T.copy()
)
coeffs = cl.array.to_device(queue, np.array([psource.coeffs]))
args = {
"queue": toy_ctx.queue,
"src_expansions": coeffs,
"src_base_ibox": 0,
"tgt_base_ibox": 0,
"ntgt_level_boxes": 2,
"target_boxes": target_boxes,
"src_box_starts": src_box_starts,
"src_box_lists": src_box_lists,
"centers": centers,
"src_rscale": psource.rscale,
"tgt_rscale": to_rscale,
**extra_kernel_kwargs,
**toy_ctx.extra_kernel_kwargs,
}
_evt, (to_coeffs,) = e2e(**args)
return expn_class(
toy_ctx, to_center, to_rscale, to_order, to_coeffs[1].get(queue),
derived_from=psource, **expn_kwargs)
src_box_starts=src_box_starts,
src_box_lists=src_box_lists,
centers=centers,
src_rscale=psource.rscale,
tgt_rscale=to_rscale,
def _m2l(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs,
translation_classes_kwargs):
toy_ctx = psource.toy_ctx
queue = toy_ctx.queue
coeffs = cl.array.to_device(queue, np.array([psource.coeffs]))
m2l_use_translation_classes_dependent_data = \
toy_ctx.use_translation_classes_dependent_data()
if m2l_use_translation_classes_dependent_data:
data_kernel = translation_classes_kwargs["data_kernel"]
preprocess_kernel = translation_classes_kwargs["preprocess_kernel"]
postprocess_kernel = translation_classes_kwargs["postprocess_kernel"]
expn_size = translation_classes_kwargs["m2l_expn_size"]
# Preprocess the mpole expansion
preprocessed_src_expansions = cl.array.zeros(queue, (1, expn_size),
dtype=np.complex128)
evt, _ = preprocess_kernel(queue,
src_expansions=coeffs,
preprocessed_src_expansions=preprocessed_src_expansions,
src_rscale=np.float64(psource.rscale),
**toy_ctx.extra_kernel_kwargs)
from sumpy.tools import get_native_event, get_opencl_fft_app, run_opencl_fft
if toy_ctx.use_fft:
fft_app = get_opencl_fft_app(queue, (expn_size,),
dtype=preprocessed_src_expansions.dtype, inverse=False)
ifft_app = get_opencl_fft_app(queue, (expn_size,),
dtype=preprocessed_src_expansions.dtype, inverse=True)
evt, preprocessed_src_expansions = run_opencl_fft(fft_app, queue,
preprocessed_src_expansions, inverse=False, wait_for=[evt])
# Compute translation classes data
m2l_translation_classes_lists = cl.array.to_device(queue,
np.array([0], dtype=np.int32))
dist = np.array(to_center - psource.center, dtype=np.float64)
dim = toy_ctx.kernel.dim
m2l_translation_vectors = cl.array.to_device(queue, dist.reshape(dim, 1))
m2l_translation_classes_dependent_data = cl.array.zeros(queue,
(1, expn_size), dtype=np.complex128)
evt, _ = data_kernel(
queue,
src_rscale=np.float64(psource.rscale),
ntranslation_classes=1,
translation_classes_level_start=0,
m2l_translation_vectors=m2l_translation_vectors,
m2l_translation_classes_dependent_data=(
m2l_translation_classes_dependent_data),
ntranslation_vectors=1,
wait_for=[get_native_event(evt)],
**toy_ctx.extra_kernel_kwargs)
if toy_ctx.use_fft:
evt, m2l_translation_classes_dependent_data = run_opencl_fft(fft_app,
queue, m2l_translation_classes_dependent_data, inverse=False,
wait_for=[evt])
ret = _e2e(psource, to_center, to_rscale, to_order,
e2e, expn_class, expn_kwargs,
{
"src_expansions": preprocessed_src_expansions,
"m2l_translation_classes_lists": m2l_translation_classes_lists,
"m2l_translation_classes_dependent_data": (
m2l_translation_classes_dependent_data),
"translation_classes_level_start": 0,
}
)
# Postprocess the local expansion
local_before = cl.array.to_device(queue, np.array([ret.coeffs]))
to_coeffs = cl.array.zeros(queue, (1, len(data_kernel.tgt_expansion)),
dtype=coeffs.dtype)
if toy_ctx.use_fft:
evt, local_before = run_opencl_fft(ifft_app, queue,
local_before, inverse=True, wait_for=[get_native_event(evt)])
evt, _ = postprocess_kernel(queue=queue,
tgt_expansions_before_postprocessing=local_before,
tgt_expansions=to_coeffs,
src_rscale=np.float64(psource.rscale),
tgt_rscale=np.float64(to_rscale),
wait_for=[get_native_event(evt)],
**toy_ctx.extra_kernel_kwargs)
return expn_class(
toy_ctx, to_center, to_rscale, to_order, to_coeffs.get(queue)[0],
derived_from=psource, **expn_kwargs)
else:
ret = _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class,
expn_kwargs, {})
#flags="print_hl_cl",
out_host=True, **toy_ctx.extra_kernel_kwargs)
return ret
return expn_class(toy_ctx, to_center, to_rscale, to_order, to_coeffs[1],
derived_from=psource, **expn_kwargs)
# }}}
# {{{ potential source classes
class PotentialSource(object):
Number_ish = int | float | complex | np.number
class PotentialSource:
"""A base class for all classes representing potentials that can be
evaluated anywhere in space.
......@@ -307,17 +502,22 @@ class PotentialSource(object):
.. automethod:: __rmul__
"""
def __init__(self, toy_ctx):
def __init__(self, toy_ctx: ToyContext):
self.toy_ctx = toy_ctx
def eval(self, targets):
def eval(self, targets: np.ndarray) -> np.ndarray:
"""
:param targets: An array of shape ``(dim, ntargets)``.
:returns: an array of shape ``(ntargets,)``.
"""
raise NotImplementedError()
def __neg__(self):
def __neg__(self) -> PotentialSource:
return -1*self
def __add__(self, other):
if isinstance(other, (Number, np.number)):
def __add__(self, other: Number_ish | PotentialSource
) -> PotentialSource:
if isinstance(other, Number | np.number):
other = ConstantPotential(self.toy_ctx, other)
elif not isinstance(other, PotentialSource):
return NotImplemented
......@@ -326,14 +526,20 @@ class PotentialSource(object):
__radd__ = __add__
def __sub__(self, other):
def __sub__(self,
other: Number_ish | PotentialSource) -> PotentialSource:
return self.__add__(-other)
def __rsub__(self, other):
# FIXME: mypy says " Forward operator "__sub__" is not callable"
# I don't know what it means. -AK, 2024-07-18
def __rsub__(self, # type:ignore[misc]
other: Number_ish | PotentialSource
) -> PotentialSource:
return (-self).__add__(other)
def __mul__(self, other):
if isinstance(other, (Number, np.number)):
def __mul__(self,
other: Number_ish | PotentialSource) -> PotentialSource:
if isinstance(other, Number | np.number):
other = ConstantPotential(self.toy_ctx, other)
elif not isinstance(other, PotentialSource):
return NotImplemented
......@@ -345,14 +551,16 @@ class PotentialSource(object):
class ConstantPotential(PotentialSource):
"""
Inherits from :class:`PotentialSource`.
.. automethod:: __init__
"""
def __init__(self, toy_ctx, value):
super(ConstantPotential, self).__init__(toy_ctx)
def __init__(self, toy_ctx: ToyContext, value):
super().__init__(toy_ctx)
self.value = np.array(value)
def eval(self, targets):
def eval(self, targets: np.ndarray) -> np.ndarray:
pot = np.empty(targets.shape[-1], dtype=self.value.dtype)
pot.fill(self.value)
return pot
......@@ -360,29 +568,37 @@ class ConstantPotential(PotentialSource):
class OneOnBallPotential(PotentialSource):
"""
A potential that is the characteristic function on a ball.
Inherits from :class:`PotentialSource`.
.. automethod:: __init__
"""
def __init__(self, toy_ctx, center, radius):
super(OneOnBallPotential, self).__init__(toy_ctx)
def __init__(self,
toy_ctx: ToyContext, center: np.ndarray, radius: float) -> None:
super().__init__(toy_ctx)
self.center = np.asarray(center)
self.radius = radius
def eval(self, targets):
def eval(self, targets: np.ndarray) -> np.ndarray:
dist_vec = targets - self.center[:, np.newaxis]
return (np.sum(dist_vec**2, axis=0) < self.radius**2).astype(np.float64)
class HalfspaceOnePotential(PotentialSource):
"""
A potential that is the characteristic function of a halfspace.
.. automethod:: __init__
"""
def __init__(self, toy_ctx, center, axis, side=1):
super(HalfspaceOnePotential, self).__init__(toy_ctx)
def __init__(self, toy_ctx: ToyContext, center: np.ndarray,
axis: int, side: int = 1) -> None:
super().__init__(toy_ctx)
self.center = np.asarray(center)
self.axis = axis
self.side = side
def eval(self, targets):
def eval(self, targets: np.ndarray) -> np.ndarray:
return (
(self.side*(targets[self.axis] - self.center[self.axis])) >= 0
).astype(np.float64)
......@@ -390,6 +606,8 @@ class HalfspaceOnePotential(PotentialSource):
class PointSources(PotentialSource):
"""
Inherits from :class:`PotentialSource`.
.. attribute:: points
``[ndim, npoints]``
......@@ -397,20 +615,25 @@ class PointSources(PotentialSource):
.. automethod:: __init__
"""
def __init__(self, toy_ctx, points, weights, center=None):
super(PointSources, self).__init__(toy_ctx)
def __init__(self,
toy_ctx: ToyContext, points: np.ndarray, weights: np.ndarray,
center: np.ndarray | None = None):
super().__init__(toy_ctx)
self.points = points
self.weights = weights
self._center = center
def eval(self, targets):
evt, (potential,) = self.toy_ctx.get_p2p()(
self.toy_ctx.queue, targets, self.points, [self.weights],
out_host=True,
def eval(self, targets: np.ndarray) -> np.ndarray:
queue = self.toy_ctx.queue
_evt, (potential,) = self.toy_ctx.get_p2p()(
queue,
cl.array.to_device(queue, targets),
cl.array.to_device(queue, self.points),
[cl.array.to_device(queue, self.weights)],
**self.toy_ctx.extra_source_and_kernel_kwargs)
return potential
return potential.get(queue)
@property
def center(self):
......@@ -422,19 +645,23 @@ class PointSources(PotentialSource):
class ExpansionPotentialSource(PotentialSource):
"""
Inherits from :class:`PotentialSource`.
.. attribute:: radius
Not used mathematically. Just for visualization, purely advisory.
.. attribute:: text_kwargs
Passed to :method:`matplotlib.pyplot.annotate`. Used for customizing the
Passed to :func:`matplotlib.pyplot.annotate`. Used for customizing the
expansion label. Changing the label text is supported by passing the
kwarg *s*. Just for visualization, purely advisory.
.. automethod:: __init__
"""
def __init__(self, toy_ctx, center, rscale, order, coeffs, derived_from,
radius=None, expn_style=None, text_kwargs=None):
super(ExpansionPotentialSource, self).__init__(toy_ctx)
super().__init__(toy_ctx)
self.center = np.asarray(center)
self.rscale = rscale
self.order = order
......@@ -452,28 +679,42 @@ class ExpansionPotentialSource(PotentialSource):
class MultipoleExpansion(ExpansionPotentialSource):
def eval(self, targets):
"""
Inherits from :class:`ExpansionPotentialSource`.
"""
def eval(self, targets: np.ndarray) -> np.ndarray:
return _e2p(self, targets, self.toy_ctx.get_m2p(self.order))
class LocalExpansion(ExpansionPotentialSource):
def eval(self, targets):
"""
Inherits from :class:`ExpansionPotentialSource`.
"""
def eval(self, targets: np.ndarray) -> np.ndarray:
return _e2p(self, targets, self.toy_ctx.get_l2p(self.order))
class PotentialExpressionNode(PotentialSource):
def __init__(self, psources):
"""
Inherits from :class:`PotentialSource`.
.. automethod:: __init__
"""
def __init__(self, psources: Sequence[PotentialSource]) -> None:
from pytools import single_valued
super(PotentialExpressionNode, self).__init__(
super().__init__(
single_valued(psource.toy_ctx for psource in psources))
self.psources = psources
@property
def center(self):
def center(self) -> np.ndarray:
for psource in self.psources:
try:
return psource.center
return psource.center # type: ignore[attr-defined]
except AttributeError:
pass
......@@ -481,8 +722,12 @@ class PotentialExpressionNode(PotentialSource):
class Sum(PotentialExpressionNode):
def eval(self, targets):
result = 0
"""
Inherits from :class:`PotentialExpressionNode`.
"""
def eval(self, targets: np.ndarray) -> np.ndarray:
result = np.zeros(targets.shape[1])
for psource in self.psources:
result = result + psource.eval(targets)
......@@ -490,8 +735,12 @@ class Sum(PotentialExpressionNode):
class Product(PotentialExpressionNode):
def eval(self, targets):
result = 1
"""
Inherits from :class:`PotentialExpressionNode`.
"""
def eval(self, targets: np.ndarray) -> np.ndarray:
result = np.zeros(targets.shape[1])
for psource in self.psources:
result = result * psource.eval(targets)
......@@ -500,7 +749,9 @@ class Product(PotentialExpressionNode):
# }}}
def multipole_expand(psource, center, order=None, rscale=1, **expn_kwargs):
def multipole_expand(psource: PotentialSource,
center: np.ndarray, order: int | None = None,
rscale: float = 1, **expn_kwargs) -> MultipoleExpansion:
if isinstance(psource, PointSources):
if order is None:
raise ValueError("order may not be None")
......@@ -514,14 +765,16 @@ def multipole_expand(psource, center, order=None, rscale=1, **expn_kwargs):
return _e2e(psource, center, rscale, order,
psource.toy_ctx.get_m2m(psource.order, order),
MultipoleExpansion, expn_kwargs)
MultipoleExpansion, expn_kwargs, {})
else:
raise TypeError("do not know how to expand '%s'"
% type(psource).__name__)
raise TypeError(f"do not know how to expand '{type(psource).__name__}'")
def local_expand(psource, center, order=None, rscale=1, **expn_kwargs):
def local_expand(
psource: PotentialSource,
center: np.ndarray, order: int | None = None, rscale: Number_ish = 1,
**expn_kwargs) -> LocalExpansion:
if isinstance(psource, PointSources):
if order is None:
raise ValueError("order may not be None")
......@@ -533,9 +786,28 @@ def local_expand(psource, center, order=None, rscale=1, **expn_kwargs):
if order is None:
order = psource.order
return _e2e(psource, center, rscale, order,
psource.toy_ctx.get_m2l(psource.order, order),
LocalExpansion, expn_kwargs)
toy_ctx = psource.toy_ctx
translation_classes_kwargs = {}
m2l_use_translation_classes_dependent_data = \
toy_ctx.use_translation_classes_dependent_data()
if m2l_use_translation_classes_dependent_data:
data_kernel = toy_ctx.get_m2l_translation_class_dependent_data_kernel(
psource.order, order)
preprocess_kernel = toy_ctx.get_m2l_preprocess_mpole_kernel(
psource.order, order)
postprocess_kernel = toy_ctx.get_m2l_postprocess_local_kernel(
psource.order, order)
translation_classes_kwargs["data_kernel"] = data_kernel
translation_classes_kwargs["preprocess_kernel"] = preprocess_kernel
translation_classes_kwargs["postprocess_kernel"] = postprocess_kernel
translation_classes_kwargs["m2l_expn_size"] = \
toy_ctx.get_m2l_expansion_size(psource.order, order)
return _m2l(psource, center, rscale, order,
toy_ctx.get_m2l(psource.order, order),
LocalExpansion, expn_kwargs,
translation_classes_kwargs)
elif isinstance(psource, LocalExpansion):
if order is None:
......@@ -543,22 +815,27 @@ def local_expand(psource, center, order=None, rscale=1, **expn_kwargs):
return _e2e(psource, center, rscale, order,
psource.toy_ctx.get_l2l(psource.order, order),
LocalExpansion, expn_kwargs)
LocalExpansion, expn_kwargs, {})
else:
raise TypeError("do not know how to expand '%s'"
% type(psource).__name__)
raise TypeError(f"do not know how to expand '{type(psource).__name__}'")
def logplot(fp, psource, **kwargs):
def logplot(fp: FieldPlotter, psource: PotentialSource, **kwargs) -> None:
fp.show_scalar_in_matplotlib(
np.log10(np.abs(psource.eval(fp.points) + 1e-15)), **kwargs)
def combine_inner_outer(psource_inner, psource_outer, radius, center=None):
def combine_inner_outer(
psource_inner: PotentialSource,
psource_outer: PotentialSource,
radius: float | None,
center: np.ndarray | None = None) -> PotentialSource:
if center is None:
assert isinstance(psource_inner, ExpansionPotentialSource)
center = psource_inner.center
if radius is None:
assert isinstance(psource_inner, ExpansionPotentialSource)
radius = psource_inner.radius
ball_one = OneOnBallPotential(psource_inner.toy_ctx, center, radius)
......@@ -567,8 +844,11 @@ def combine_inner_outer(psource_inner, psource_outer, radius, center=None):
+ psource_outer * (1 - ball_one))
def combine_halfspace(psource_pos, psource_neg, axis, center=None):
def combine_halfspace(psource_pos: PotentialSource,
psource_neg: PotentialSource, axis: int,
center: np.ndarray | None = None) -> PotentialSource:
if center is None:
assert isinstance(psource_pos, ExpansionPotentialSource)
center = psource_pos.center
halfspace_one = HalfspaceOnePotential(psource_pos.toy_ctx, center, axis)
......@@ -577,12 +857,18 @@ def combine_halfspace(psource_pos, psource_neg, axis, center=None):
+ psource_neg * (1-halfspace_one))
def combine_halfspace_and_outer(psource_pos, psource_neg, psource_outer,
axis, radius=None, center=None):
def combine_halfspace_and_outer(
psource_pos: PotentialSource,
psource_neg: PotentialSource,
psource_outer: PotentialSource,
axis: int, radius: float | None = None,
center: np.ndarray | None = None) -> PotentialSource:
if center is None:
assert isinstance(psource_pos, ExpansionPotentialSource)
center = psource_pos.center
if radius is None:
assert isinstance(psource_pos, ExpansionPotentialSource)
center = psource_pos.radius
return combine_inner_outer(
......@@ -590,8 +876,11 @@ def combine_halfspace_and_outer(psource_pos, psource_neg, psource_outer,
psource_outer, radius, center)
def l_inf(psource, radius, center=None, npoints=100, debug=False):
def l_inf(psource: PotentialSource, radius: float,
center: np.ndarray | None = None, npoints: int = 100,
debug: bool = False) -> np.number:
if center is None:
assert isinstance(psource, ExpansionPotentialSource)
center = psource.center
restr = psource * OneOnBallPotential(psource.toy_ctx, center, radius)
......@@ -613,8 +902,8 @@ def l_inf(psource, radius, center=None, npoints=100, debug=False):
# {{{ schematic visualization
def draw_box(el, eh, **kwargs):
import matplotlib.pyplot as pt
import matplotlib.patches as mpatches
import matplotlib.pyplot as pt
from matplotlib.path import Path
pathdata = [
......@@ -625,7 +914,7 @@ def draw_box(el, eh, **kwargs):
(Path.CLOSEPOLY, (el[0], el[1])),
]
codes, verts = zip(*pathdata)
codes, verts = zip(*pathdata, strict=True)
path = Path(verts, codes)
patch = mpatches.PathPatch(path, **kwargs)
pt.gca().add_patch(patch)
......@@ -643,7 +932,7 @@ def draw_point(loc, **kwargs):
plt.plot(*loc, marker="o", **kwargs)
def draw_annotation(to_pt, from_pt, label, arrowprops={}, **kwargs):
def draw_annotation(to_pt, from_pt, label, arrowprops=None, **kwargs):
"""
:arg to_pt: Head of arrow
:arg from_pt: Tail of arrow
......@@ -651,13 +940,15 @@ def draw_annotation(to_pt, from_pt, label, arrowprops={}, **kwargs):
:arg arrowprops: Passed to arrowprops
:arg kwargs: Passed to annotate
"""
if arrowprops is None:
arrowprops = {}
import matplotlib.pyplot as plt
my_arrowprops = dict(
facecolor="black",
edgecolor="black",
arrowstyle="->")
my_arrowprops = {
"facecolor": "black",
"edgecolor": "black",
"arrowstyle": "->"}
my_arrowprops.update(arrowprops)
......@@ -665,7 +956,7 @@ def draw_annotation(to_pt, from_pt, label, arrowprops={}, **kwargs):
arrowprops=my_arrowprops, **kwargs)
class SchematicVisitor(object):
class SchematicVisitor:
def __init__(self, default_expn_style="circle"):
self.default_expn_style = default_expn_style
......@@ -694,7 +985,7 @@ class SchematicVisitor(object):
elif expn_style == "circle":
draw_circle(psource.center, psource.radius, fill=None)
else:
raise ValueError("unknown expn_style: %s" % self.expn_style)
raise ValueError(f"unknown expn_style: {expn_style}")
if psource.derived_from is None:
return
......@@ -703,17 +994,17 @@ class SchematicVisitor(object):
#
# ------> M
text_kwargs = dict(
verticalalignment="center",
horizontalalignment="center")
text_kwargs = {
"verticalalignment": "center",
"horizontalalignment": "center"}
label = "$%s_{%s}$" % (
label = "${}_{{{}}}$".format(
type(psource).__name__[0].lower().replace("l", "\\ell"),
psource.order)
if psource.text_kwargs is not None:
psource_text_kwargs_copy = psource.text_kwargs.copy()
label = psource_text_kwargs_copy.pop('s', label)
label = psource_text_kwargs_copy.pop("s", label)
text_kwargs.update(psource_text_kwargs_copy)
shrinkB = 0 # noqa
......@@ -721,10 +1012,10 @@ class SchematicVisitor(object):
# Avoid overlapping the tail of the arrow with any expansion labels that
# are present at the tail.
import matplotlib as mpl
font_size = mpl.rcParams['font.size']
font_size = mpl.rcParams["font.size"]
shrinkB = 7/8 * font_size # noqa
arrowprops = dict(shrinkB=shrinkB, arrowstyle="<|-")
arrowprops = {"shrinkB": shrinkB, "arrowstyle": "<|-"}
draw_annotation(psource.derived_from.center, psource.center, label,
arrowprops, **text_kwargs)
......
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = "Copyright (C) 2014 Andreas Kloeckner"
......@@ -22,29 +23,22 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
# {{{ find install- or run-time git revision
from importlib import metadata
from pytools import find_module_git_revision
import os
if os.environ.get("AKPYTHON_EXEC_FROM_WITHIN_WITHIN_SETUP_PY") is not None:
# We're just being exec'd by setup.py. We can't import anything.
_git_rev = None
else:
import sumpy._git_rev as _git_rev_mod
_git_rev = _git_rev_mod.GIT_REVISION
def _parse_version(version: str) -> tuple[tuple[int, ...], str]:
import re
# If we're running from a dev tree, the last install (and hence the most
# recent update of the above git rev) could have taken place very long ago.
from pytools import find_module_git_revision
_runtime_git_rev = find_module_git_revision(__file__, n_levels_up=1)
if _runtime_git_rev is not None:
_git_rev = _runtime_git_rev
m = re.match(r"^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT)
assert m is not None
# }}}
return tuple(int(nr) for nr in m.group(1).split(".")), m.group(2)
VERSION = (2016, 1)
VERSION_STATUS = "beta1"
VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
VERSION_TEXT = metadata.version("sumpy")
VERSION, VERSION_STATUS = _parse_version(VERSION_TEXT)
KERNEL_VERSION = (VERSION, _git_rev, 0)
_GIT_REVISION = find_module_git_revision(__file__, n_levels_up=1)
KERNEL_VERSION = (*VERSION, _GIT_REVISION, 0)
from __future__ import division, absolute_import
from __future__ import annotations
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
......@@ -29,38 +30,25 @@ __doc__ = """
"""
import numpy as np
from six.moves import range
def separate_by_real_and_imag(data, real_only):
from pytools.obj_array import obj_array_imag_copy, obj_array_real_copy
for name, field in data:
from pytools.obj_array import log_shape
ls = log_shape(field)
if ls != () and ls[0] > 1:
assert len(ls) == 1
from pytools.obj_array import (
oarray_real_copy, oarray_imag_copy,
with_object_array_or_scalar)
if field[0].dtype.kind == "c":
if real_only:
yield (name,
with_object_array_or_scalar(oarray_real_copy, field))
else:
yield (name+"_r",
with_object_array_or_scalar(oarray_real_copy, field))
yield (name+"_i",
with_object_array_or_scalar(oarray_imag_copy, field))
else:
yield (name, field)
try:
# Look inside object arrays to get the entry dtype.
entry_dtype = field[0].dtype
except AttributeError:
entry_dtype = field.dtype
assert entry_dtype.kind != "O"
if real_only or entry_dtype.kind != "c":
yield (name, obj_array_real_copy(field))
else:
# ls == ()
if field.dtype.kind == "c":
yield (name+"_r", field.real.copy())
yield (name+"_i", field.imag.copy())
else:
yield (name, field)
yield (f"{name}_r", obj_array_real_copy(field))
yield (f"{name}_i", obj_array_imag_copy(field))
def make_field_plotter_from_bbox(bbox, h, extend_factor=0):
......@@ -88,14 +76,12 @@ def make_field_plotter_from_bbox(bbox, h, extend_factor=0):
from math import ceil
npoints = tuple(
int(ceil(extent[i] / h[i]))
for i in range(dimensions))
npoints = tuple(ceil(extent[i] / h[i]) for i in range(dimensions))
return FieldPlotter(center, extent, npoints)
class FieldPlotter(object):
class FieldPlotter:
"""
.. automethod:: set_matplotlib_limits
.. automethod:: show_scalar_in_matplotlib
......@@ -123,7 +109,9 @@ class FieldPlotter(object):
slice(a[i], b[i], 1j*npoints[i])
for i in range(dim))
mgrid = np.mgrid[mgrid_index]
# np.asarray is technically unneeded, used to placate pylint
# https://github.com/pylint-dev/pylint/issues/9989
mgrid = np.asarray(np.mgrid[mgrid_index])
# (axis, point x idx, point y idx, ...)
self.nd_points = mgrid
......@@ -180,17 +168,19 @@ class FieldPlotter(object):
def show_vector_in_mayavi(self, fld, do_show=True, **kwargs):
c = self.points
from mayavi import mlab
from mayavi import mlab # pylint: disable=import-error
mlab.quiver3d(c[0], c[1], c[2], fld[0], fld[1], fld[2],
**kwargs)
if do_show:
mlab.show()
def write_vtk_file(self, file_name, data, real_only=False):
def write_vtk_file(self, file_name, data, real_only=False, overwrite=False):
from pyvisfile.vtk import write_structured_grid
write_structured_grid(file_name, self.nd_points,
point_data=list(separate_by_real_and_imag(data, real_only)))
point_data=list(separate_by_real_and_imag(data, real_only)),
overwrite=overwrite)
def show_scalar_in_mayavi(self, fld, max_val=None, **kwargs):
if max_val is not None:
......@@ -203,7 +193,7 @@ class FieldPlotter(object):
nd_points = self.nd_points.squeeze()[self._get_nontrivial_dims()]
squeezed_fld = fld.squeeze()
from mayavi import mlab
from mayavi import mlab # pylint: disable=import-error
mlab.surf(nd_points[0], nd_points[1], squeezed_fld, **kwargs)
# vim: foldmethod=marker
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2017 Matt Wala"
......@@ -22,44 +23,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import sys
import logging
logger = logging.getLogger(__name__)
def test_kill_trivial_assignments():
from pymbolic import var
x, y, t0, t1, t2 = [var(s) for s in "x y t0 t1 t2".split()]
assignments = (
("t0", 6),
("t1", -t0),
("t2", 6*x),
("nt", x**y),
# users of trivial assignments
("u0", t0 + 1),
("u1", t1 + 1),
("u2", t2 + 1),
)
import sys
from sumpy.codegen import kill_trivial_assignments
result = kill_trivial_assignments(
assignments,
retain_names=("u0", "u1", "u2"))
import pytest
from pymbolic.primitives import Sum
def _s(*vals):
return Sum(vals)
logger = logging.getLogger(__name__)
assert result == [
('nt', x**y),
('u0', _s(6, 1)),
('u1', _s(-6, 1)),
('u2', _s(6*x, 1))]
# {{{ test_symbolic_assignment_name_uniqueness
def test_symbolic_assignment_name_uniqueness():
# https://gitlab.tiker.net/inducer/sumpy/issues/13
......@@ -77,21 +50,25 @@ def test_symbolic_assignment_name_uniqueness():
assert len(sac.assignments) == 3
# }}}
# {{{ test_line_taylor_coeff_growth
def test_line_taylor_coeff_growth():
# Regression test for LineTaylorLocalExpansion.
# See https://gitlab.tiker.net/inducer/pytential/merge_requests/12
from sumpy.kernel import LaplaceKernel
from sumpy.expansion.local import LineTaylorLocalExpansion
from sumpy.symbolic import make_sym_vector, SympyToPymbolicMapper
import numpy as np
from sumpy.expansion.local import LineTaylorLocalExpansion
from sumpy.kernel import LaplaceKernel
from sumpy.symbolic import SympyToPymbolicMapper, make_sym_vector
order = 10
expn = LineTaylorLocalExpansion(LaplaceKernel(2), order)
avec = make_sym_vector("a", 2)
bvec = make_sym_vector("b", 2)
coeffs = expn.coefficients_from_source(avec, bvec, rscale=1)
coeffs = expn.coefficients_from_source(expn.kernel, avec, bvec, rscale=1)
sym2pymbolic = SympyToPymbolicMapper()
coeffs_pymbolic = [sym2pymbolic(c) for c in coeffs]
......@@ -104,15 +81,16 @@ def test_line_taylor_coeff_growth():
max_order = 2
assert np.polyfit(np.log(indices), np.log(counts), deg=1)[0] < max_order
# }}}
# You can test individual routines by typing
# $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)'
# $ python test_codegen.py 'test_line_taylor_coeff_growth()'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import print_function, division
from __future__ import annotations
__copyright__ = """
Copyright (C) 2017 Matt Wala
......@@ -66,29 +67,29 @@ DAMAGE.
# }}}
import pytest
import sys
from sumpy.symbolic import (
Add, Pow, exp, sqrt, symbols, sympify, cos, sin, Function, USE_SYMENGINE)
import pytest
import sumpy.symbolic as sym
from sumpy.cse import cse, postprocess_for_cse, preprocess_for_cse
if not USE_SYMENGINE:
from sympy.simplify.cse_opts import sub_pre, sub_post
if not sym.USE_SYMENGINE:
from sympy.functions.special.hyper import meijerg
from sympy.simplify import cse_opts
from sympy.simplify.cse_opts import sub_post, sub_pre
from sumpy.cse import (
cse, preprocess_for_cse, postprocess_for_cse)
import logging
w, x, y, z = symbols('w,x,y,z')
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12 = symbols('x:13')
sympyonly = (
pytest.mark.skipif(USE_SYMENGINE, reason="uses a sympy-only feature"))
logger = logging.getLogger(__name__)
w, x, y, z = sym.symbols("w,x,y,z")
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12 = sym.symbols("x:13")
# Dummy "optimization" functions for testing.
sympyonly = (
pytest.mark.skipif(sym.USE_SYMENGINE, reason="uses a sympy-only feature"))
def opt1(expr):
......@@ -99,6 +100,8 @@ def opt2(expr):
return expr*z
# {{{ test_preprocess_for_cse
def test_preprocess_for_cse():
assert preprocess_for_cse(x, [(opt1, None)]) == x + y
assert preprocess_for_cse(x, [(None, opt1)]) == x
......@@ -107,6 +110,10 @@ def test_preprocess_for_cse():
assert preprocess_for_cse(
x, [(opt1, None), (opt2, None)]) == (x + y)*z
# }}}
# {{{ test_postprocess_for_cse
def test_postprocess_for_cse():
assert postprocess_for_cse(x, [(opt1, None)]) == x
......@@ -117,56 +124,78 @@ def test_postprocess_for_cse():
assert postprocess_for_cse(
x, [(None, opt1), (None, opt2)]) == x*z + y
# }}}
# {{{ test_cse_single
def test_cse_single():
# Simple substitution.
e = Add(Pow(x + y, 2), sqrt(x + y))
e = sym.Add(sym.Pow(x + y, 2), sym.sqrt(x + y))
substs, reduced = cse([e])
assert substs == [(x0, x + y)]
assert reduced == [sqrt(x0) + x0**2]
assert reduced == [sym.sqrt(x0) + x0**2]
# }}}
# {{{
@sympyonly
def test_cse_not_possible():
# No substitution possible.
e = Add(x, y)
e = sym.Add(x, y)
substs, reduced = cse([e])
assert substs == []
assert reduced == [x + y]
# issue 6329
eq = (meijerg((1, 2), (y, 4), (5,), [], x)
+ meijerg((1, 3), (y, 4), (5,), [], x))
eq = (
meijerg((1, 2), (y, 4), (5,), [], x) # pylint: disable=possibly-used-before-assignment
+ meijerg((1, 3), (y, 4), (5,), [], x))
assert cse(eq) == ([], [eq])
# }}}
# {{{ test_nested_substitution
def test_nested_substitution():
# Substitution within a substitution.
e = Add(Pow(w*x + y, 2), sqrt(w*x + y))
e = sym.Add(sym.Pow(w*x + y, 2), sym.sqrt(w*x + y))
substs, reduced = cse([e])
assert substs == [(x0, w*x + y)]
assert reduced == [sqrt(x0) + x0**2]
assert reduced == [sym.sqrt(x0) + x0**2]
# }}}
# {{{ test_subtraction_opt
@sympyonly
def test_subtraction_opt():
# Make sure subtraction is optimized.
e = (x - y)*(z - y) + exp((x - y)*(z - y))
e = (x - y)*(z - y) + sym.exp((x - y)*(z - y))
substs, reduced = cse(
[e], optimizations=[(cse_opts.sub_pre, cse_opts.sub_post)])
[e], optimizations=[(cse_opts.sub_pre, cse_opts.sub_post)]) # pylint: disable=possibly-used-before-assignment
assert substs == [(x0, (x - y)*(y - z))]
assert reduced == [-x0 + exp(-x0)]
e = -(x - y)*(z - y) + exp(-(x - y)*(z - y))
assert reduced == [-x0 + sym.exp(-x0)]
e = -(x - y)*(z - y) + sym.exp(-(x - y)*(z - y))
substs, reduced = cse(
[e], optimizations=[(cse_opts.sub_pre, cse_opts.sub_post)])
assert substs == [(x0, (x - y)*(y - z))]
assert reduced == [x0 + exp(x0)]
assert reduced == [x0 + sym.exp(x0)]
# issue 4077
n = -1 + 1/x
e = n/x/(-n)**2 - 1/n/x
assert cse(e, optimizations=[(cse_opts.sub_pre, cse_opts.sub_post)]) == \
([], [0])
assert cse(e, optimizations=[
(cse_opts.sub_pre, cse_opts.sub_post)]
) == ([], [0])
# }}}
# {{{ test_multiple_expressions
def test_multiple_expressions():
e1 = (x + y)*z
e2 = (x + y)*w
......@@ -183,7 +212,7 @@ def test_multiple_expressions():
rsubsts, _ = cse(reversed(l_))
assert substs == rsubsts
assert reduced == [x1, x1 + z, x0]
f = Function("f")
f = sym.Function("f")
l_ = [f(x - z, y - z), x - z, y - z]
substs, reduced = cse(l_)
rsubsts, _ = cse(reversed(l_))
......@@ -197,31 +226,46 @@ def test_multiple_expressions():
assert cse([x*y, z + x*y, x*y*z + 3]) == \
([(x0, x*y)], [x0, z + x0, 3 + x0*z])
# }}}
# {{{ test_issue_4203
def test_issue_4203():
assert cse(sin(x**x)/x**x) == ([(x0, x**x)], [sin(x0)/x0])
assert cse(sym.sin(x**x)/x**x) == ([(x0, x**x)], [sym.sin(x0)/x0])
# }}}
# {{{ test_dont_cse_subs
def test_dont_cse_subs():
f = Function("f")
g = Function("g")
f = sym.Function("f")
g = sym.Function("g")
name_val, (expr,) = cse(f(x+y).diff(x) + g(x+y).diff(x))
name_val, (_expr,) = cse(f(x+y).diff(x) + g(x+y).diff(x))
assert name_val == []
# }}}
# {{{ test_dont_cse_derivative
def test_dont_cse_derivative():
from sumpy.symbolic import Derivative
f = Function("f")
f = sym.Function("f")
deriv = Derivative(f(x+y), x)
deriv = sym.Derivative(f(x+y), x)
name_val, (expr,) = cse(x + y + deriv)
assert name_val == []
assert expr == x + y + deriv
# }}}
# {{{ test_pow_invpow
def test_pow_invpow():
assert cse(1/x**2 + x**2) == \
......@@ -230,39 +274,64 @@ def test_pow_invpow():
([(x0, x**2), (x1, 1/x0)], [x0 + x1*(x1 + 1)])
assert cse(1/x**2 + (1 + 1/x**2)*x**2) == \
([(x0, x**2), (x1, 1/x0)], [x0*(x1 + 1) + x1])
assert cse(cos(1/x**2) + sin(1/x**2)) == \
([(x0, x**(-2))], [sin(x0) + cos(x0)])
assert cse(cos(x**2) + sin(x**2)) == \
([(x0, x**2)], [sin(x0) + cos(x0)])
assert cse(sym.cos(1/x**2) + sym.sin(1/x**2)) == \
([(x0, x**(-2))], [sym.sin(x0) + sym.cos(x0)])
assert cse(sym.cos(x**2) + sym.sin(x**2)) == \
([(x0, x**2)], [sym.sin(x0) + sym.cos(x0)])
assert cse(y/(2 + x**2) + z/x**2/y) == \
([(x0, x**2)], [y/(x0 + 2) + z/(x0*y)])
assert cse(exp(x**2) + x**2*cos(1/x**2)) == \
([(x0, x**2)], [x0*cos(1/x0) + exp(x0)])
assert cse(sym.exp(x**2) + x**2*sym.cos(1/x**2)) == \
([(x0, x**2)], [x0*sym.cos(1/x0) + sym.exp(x0)])
assert cse((1 + 1/x**2)/x**2) == \
([(x0, x**(-2))], [x0*(x0 + 1)])
assert cse(x**(2*y) + x**(-2*y)) == \
([(x0, x**(2*y))], [x0 + 1/x0])
# }}}
# {{{ test_issue_4499
@sympyonly
def test_issue_4499():
# previously, this gave 16 constants
from sympy import S, Tuple
from sympy.abc import a, b
from sympy import Tuple, S
B = Function('B') # noqa
G = Function('G') # noqa
t = Tuple(
*(a, a + S(1)/2, 2*a, b, 2*a - b + 1, (sqrt(z)/2)**(-2*a + 1)*B(2*a
- b, sqrt(z))*B(b - 1, sqrt(z))*G(b)*G(2*a - b + 1),
sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b,
sqrt(z))*G(b)*G(2*a - b + 1), sqrt(z)*(sqrt(z)/2)**(-2*a + 1)*B(b - 1,
sqrt(z))*B(2*a - b + 1, sqrt(z))*G(b)*G(2*a - b + 1),
(sqrt(z)/2)**(-2*a + 1)*B(b, sqrt(z))*B(2*a - b + 1,
sqrt(z))*G(b)*G(2*a - b + 1), 1, 0, S(1)/2, z/2, -b + 1, -2*a + b,
-2*a)) # noqa
B = sym.Function("B") # noqa: N806
G = sym.Function("G") # noqa: N806
t = Tuple(*(
a,
a + S(1)/2,
2*a,
b,
2*a - b + 1,
(sym.sqrt(z)/2)**(-2*a + 1)
* B(2*a-b, sym.sqrt(z))
* B(b - 1, sym.sqrt(z))*G(b)*G(2*a - b + 1),
sym.sqrt(z)*(sym.sqrt(z)/2)**(-2*a + 1)
* B(b, sym.sqrt(z))
* B(2*a - b, sym.sqrt(z))*G(b)*G(2*a - b + 1),
sym.sqrt(z)*(sym.sqrt(z)/2)**(-2*a + 1)
* B(b - 1, sym.sqrt(z))
* B(2*a - b + 1, sym.sqrt(z))*G(b)*G(2*a - b + 1),
(sym.sqrt(z)/2)**(-2*a + 1)
* B(b, sym.sqrt(z))
* B(2*a - b + 1, sym.sqrt(z))*G(b)*G(2*a - b + 1),
1,
0,
S(1)/2,
z/2,
-b + 1,
-2*a + b,
-2*a))
c = cse(t)
assert len(c[0]) == 11
# }}}
# {{{ test_issue_6169
@sympyonly
def test_issue_6169():
......@@ -271,72 +340,95 @@ def test_issue_6169():
assert cse(r) == ([], [r])
# and a check that the right thing is done with the new
# mechanism
assert sub_post(sub_pre((-x - y)*z - x - y)) == -z*(x + y) - x - y
assert sub_post(sub_pre((-x - y)*z - x - y)) == -z*(x + y) - x - y # pylint: disable=possibly-used-before-assignment
# }}}
# {{{ test_cse_Indexed
@sympyonly
def test_cse_Indexed(): # noqa
from sympy import IndexedBase, Idx
def test_cse_indexed():
from sympy import Idx, IndexedBase
len_y = 5
y = IndexedBase('y', shape=(len_y,))
x = IndexedBase('x', shape=(len_y,))
Dy = IndexedBase('Dy', shape=(len_y-1,)) # noqa
i = Idx('i', len_y-1)
y = IndexedBase("y", shape=(len_y,))
x = IndexedBase("x", shape=(len_y,))
i = Idx("i", len_y-1)
expr1 = (y[i+1]-y[i])/(x[i+1]-x[i])
expr2 = 1/(x[i+1]-x[i])
replacements, reduced_exprs = cse([expr1, expr2])
replacements, _reduced_exprs = cse([expr1, expr2])
assert len(replacements) > 0
# }}}
# {{{ test_Piecewise
@sympyonly
def test_Piecewise(): # noqa
from sympy import Piecewise, Eq
def test_piecewise():
from sympy import Eq, Piecewise
f = Piecewise((-z + x*y, Eq(y, 0)), (-z - x*y, True))
ans = cse(f)
actual_ans = ([(x0, -z), (x1, x*y)],
[Piecewise((x0+x1, Eq(y, 0)), (x0 - x1, True))])
assert ans == actual_ans
# }}}
# {{{ test_name_conflict
def test_name_conflict():
z1 = x0 + y
z2 = x2 + x3
l_ = [cos(z1) + z1, cos(z2) + z2, x0 + x2]
l_ = [sym.cos(z1) + z1, sym.cos(z2) + z2, x0 + x2]
substs, reduced = cse(l_)
assert [e.subs(dict(substs)) for e in reduced] == l_
# }}}
# {{{ test_name_conflict_cust_symbols
def test_name_conflict_cust_symbols():
z1 = x0 + y
z2 = x2 + x3
l_ = [cos(z1) + z1, cos(z2) + z2, x0 + x2]
substs, reduced = cse(l_, symbols("x:10"))
l_ = [sym.cos(z1) + z1, sym.cos(z2) + z2, x0 + x2]
substs, reduced = cse(l_, sym.symbols("x:10"))
assert [e.subs(dict(substs)) for e in reduced] == l_
# }}}
# {{{ test_symbols_exhausted_error
def test_symbols_exhausted_error():
l_ = cos(x+y)+x+y+cos(w+y)+sin(w+y)
sym = [x, y, z]
l_ = sym.cos(x+y)+x+y+sym.cos(w+y)+sym.sin(w+y)
s = [x, y, z]
with pytest.raises(ValueError):
print(cse(l_, symbols=sym))
logger.info("cse:\n%s", cse(l_, symbols=s))
# }}}
# {{{ test_issue_7840
@sympyonly
def test_issue_7840():
# daveknippers' example
C393 = sympify( # noqa
'Piecewise((C391 - 1.65, C390 < 0.5), (Piecewise((C391 - 1.65, \
C391 > 2.35), (C392, True)), True))'
C393 = sym.sympify( # noqa: N806
"Piecewise((C391 - 1.65, C390 < 0.5), (Piecewise((C391 - 1.65, \
C391 > 2.35), (C392, True)), True))"
)
C391 = sympify( # noqa
'Piecewise((2.05*C390**(-1.03), C390 < 0.5), (2.5*C390**(-0.625), True))'
C391 = sym.sympify( # noqa: N806
"Piecewise((2.05*C390**(-1.03), C390 < 0.5), (2.5*C390**(-0.625), True))"
)
C393 = C393.subs('C391',C391) # noqa
C393 = C393.subs("C391", C391) # noqa: N806
# simple substitution
sub = {}
sub['C390'] = 0.703451854
sub['C392'] = 1.01417794
sub["C390"] = 0.703451854
sub["C392"] = 1.01417794
ss_answer = C393.subs(sub)
# cse
substitutions, new_eqn = cse(C393)
......@@ -347,7 +439,7 @@ def test_issue_7840():
assert ss_answer == cse_answer
# GitRay's example
expr = sympify(
expr = sym.sympify(
"Piecewise((Symbol('ON'), Equality(Symbol('mode'), Symbol('ON'))), \
(Piecewise((Piecewise((Symbol('OFF'), StrictLessThan(Symbol('x'), \
Symbol('threshold'))), (Symbol('ON'), S.true)), Equality(Symbol('mode'), \
......@@ -359,6 +451,10 @@ def test_issue_7840():
# there should not be any replacements
assert len(substitutions) < 1
# }}}
# {{{ test_recursive_matching
def test_recursive_matching():
assert cse([x+y, 2+x+y, x+y+z, 3+x+y+z]) == \
......@@ -374,12 +470,16 @@ def test_recursive_matching():
assert cse([2*x*x, x*x*y, x*x*y*w, x*x*y*w*x0, x*x*y*w*x2]) == \
([(x1, x**2), (x3, x1*y), (x4, w*x3)], [2*x1, x3, x4, x0*x4, x2*x4])
# }}}
# You can test individual routines by typing
# $ python test_cse.py 'test_recursive_matching()'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import annotations
__copyright__ = "Copyright (C) 2022 Hao Gao"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import os
from functools import partial
import numpy as np
import pytest
import pyopencl as cl
# Note: Do not import mpi4py.MPI object at the module level, because OpenMPI does not
# support recursive invocations.
def set_cache_dir(mpirank):
"""Make each rank use a different cache location to avoid conflict."""
import platformdirs
cache_dir = platformdirs.user_cache_dir("sumpy", "sumpy")
# FIXME: should clean up this directory after running the tests
os.environ["XDG_CACHE_HOME"] = os.path.join(cache_dir, str(mpirank))
# {{{ _test_against_single_rank
def _test_against_single_rank(
dims, nsources, ntargets, dtype, communicate_mpoles_via_allreduce=False):
from mpi4py import MPI
# Get the current rank
comm = MPI.COMM_WORLD
mpi_rank = comm.Get_rank()
set_cache_dir(mpi_rank)
# Configure array context
cl_context = cl.create_some_context()
queue = cl.CommandQueue(cl_context)
def fmm_level_to_order(base_kernel, kernel_arg_set, tree, level):
return max(level, 3)
from boxtree.traversal import FMMTraversalBuilder
traversal_builder = FMMTraversalBuilder(cl_context, well_sep_is_n_away=2)
from sumpy.expansion import DefaultExpansionFactory
from sumpy.kernel import LaplaceKernel
kernel = LaplaceKernel(dims)
expansion_factory = DefaultExpansionFactory()
local_expansion_factory = expansion_factory.get_local_expansion_class(kernel)
local_expansion_factory = partial(local_expansion_factory, kernel)
multipole_expansion_factory = \
expansion_factory.get_multipole_expansion_class(kernel)
multipole_expansion_factory = partial(multipole_expansion_factory, kernel)
from sumpy.fmm import SumpyTreeIndependentDataForWrangler
tree_indep = SumpyTreeIndependentDataForWrangler(
cl_context, multipole_expansion_factory, local_expansion_factory, [kernel])
global_tree_dev = None
sources_weights = cl.array.empty(queue, 0, dtype=dtype)
if mpi_rank == 0:
# Generate random particles and source weights
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(queue, nsources, dims, dtype, seed=15)
targets = p_normal(queue, ntargets, dims, dtype, seed=18)
# FIXME: Use arraycontext instead of raw PyOpenCL arrays
from pyopencl.clrandom import PhiloxGenerator
rng = PhiloxGenerator(cl_context, seed=20)
sources_weights = rng.uniform(queue, nsources, dtype=np.float64)
rng = PhiloxGenerator(cl_context, seed=22)
target_radii = rng.uniform(
queue, ntargets, a=0, b=0.05, dtype=np.float64)
# Build the tree and interaction lists
from boxtree import TreeBuilder
tb = TreeBuilder(cl_context)
global_tree_dev, _ = tb(
queue, sources, targets=targets, target_radii=target_radii,
stick_out_factor=0.25, max_particles_in_box=30, debug=True)
global_trav_dev, _ = traversal_builder(queue, global_tree_dev, debug=True)
from sumpy.fmm import SumpyExpansionWrangler
wrangler = SumpyExpansionWrangler(tree_indep, global_trav_dev, dtype,
fmm_level_to_order)
# Compute FMM with one MPI rank
from boxtree.fmm import drive_fmm
shmem_potential = drive_fmm(wrangler, [sources_weights])
# Compute FMM using the distributed implementation
def wrangler_factory(local_traversal, global_traversal):
from sumpy.distributed import DistributedSumpyExpansionWrangler
return DistributedSumpyExpansionWrangler(
cl_context, comm, tree_indep, local_traversal, global_traversal, dtype,
fmm_level_to_order,
communicate_mpoles_via_allreduce=communicate_mpoles_via_allreduce)
from boxtree.distributed import DistributedFMMRunner
distributed_fmm_info = DistributedFMMRunner(
queue, global_tree_dev, traversal_builder, wrangler_factory, comm=comm)
timing_data = {}
distributed_potential = distributed_fmm_info.drive_dfmm(
[sources_weights], timing_data=timing_data)
assert timing_data
if mpi_rank == 0:
assert shmem_potential.shape == (1,)
assert distributed_potential.shape == (1,)
shmem_potential = shmem_potential[0].get()
distributed_potential = distributed_potential[0].get()
error = (np.linalg.norm(distributed_potential - shmem_potential, ord=np.inf)
/ np.linalg.norm(shmem_potential, ord=np.inf))
print(error)
assert error < 1e-14
@pytest.mark.mpi
@pytest.mark.parametrize(
"num_processes, dims, nsources, ntargets, communicate_mpoles_via_allreduce", [
(4, 3, 10000, 10000, True),
(4, 3, 10000, 10000, False)
]
)
def test_against_single_rank(
num_processes, dims, nsources, ntargets, communicate_mpoles_via_allreduce):
pytest.importorskip("mpi4py")
from boxtree.tools import run_mpi
run_mpi(__file__, num_processes, {
"OMP_NUM_THREADS": 1,
"_SUMPY_TEST_NAME": "against_single_rank",
"_SUMPY_TEST_DIMS": dims,
"_SUMPY_TEST_NSOURCES": nsources,
"_SUMPY_TEST_NTARGETS": ntargets,
"_SUMPY_TEST_MPOLES_ALLREDUCE": communicate_mpoles_via_allreduce
})
# }}}
if __name__ == "__main__":
if "_SUMPY_TEST_NAME" in os.environ:
name = os.environ["_SUMPY_TEST_NAME"]
if name == "against_single_rank":
# Run "test_against_single_rank" test case
dims = int(os.environ["_SUMPY_TEST_DIMS"])
nsources = int(os.environ["_SUMPY_TEST_NSOURCES"])
ntargets = int(os.environ["_SUMPY_TEST_NTARGETS"])
communicate_mpoles_via_allreduce = (
os.environ["_SUMPY_TEST_MPOLES_ALLREDUCE"] == "True")
_test_against_single_rank(
dims, nsources, ntargets, np.float64,
communicate_mpoles_via_allreduce)
else:
raise ValueError(f"Invalid test name: {name!r}")
else:
# You can test individual routines by typing
# $ python test_distributed.py
# 'test_against_single_rank(4, 3, 10000, 10000, False)'
import sys
exec(sys.argv[1])
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2013 Andreas Kloeckner"
......@@ -22,107 +23,150 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from six.moves import range
import logging
import os
import sys
from functools import partial
import numpy as np
import numpy.linalg as la
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from sumpy.kernel import LaplaceKernel, HelmholtzKernel, YukawaKernel
from sumpy.expansion.multipole import (
VolumeTaylorMultipoleExpansion,
H2DMultipoleExpansion, Y2DMultipoleExpansion,
LaplaceConformingVolumeTaylorMultipoleExpansion,
HelmholtzConformingVolumeTaylorMultipoleExpansion)
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
from sumpy.expansion.local import (
H2DLocalExpansion,
LinearPDEConformingVolumeTaylorLocalExpansion,
VolumeTaylorLocalExpansion,
H2DLocalExpansion, Y2DLocalExpansion,
LaplaceConformingVolumeTaylorLocalExpansion,
HelmholtzConformingVolumeTaylorLocalExpansion)
Y2DLocalExpansion,
)
from sumpy.expansion.multipole import (
H2DMultipoleExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion,
VolumeTaylorMultipoleExpansion,
Y2DMultipoleExpansion,
)
from sumpy.fmm import SumpyExpansionWrangler, SumpyTreeIndependentDataForWrangler
from sumpy.kernel import BiharmonicKernel, HelmholtzKernel, LaplaceKernel, YukawaKernel
import pytest
import logging
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ test_sumpy_fmm
try:
import faulthandler
except ImportError:
pass
else:
faulthandler.enable()
@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [
(LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion,
LaplaceConformingVolumeTaylorMultipoleExpansion),
(LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(LaplaceKernel(3), LaplaceConformingVolumeTaylorLocalExpansion,
LaplaceConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), HelmholtzConformingVolumeTaylorLocalExpansion,
HelmholtzConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion),
(HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(HelmholtzKernel(3), HelmholtzConformingVolumeTaylorLocalExpansion,
HelmholtzConformingVolumeTaylorMultipoleExpansion),
(YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion),
@pytest.mark.parametrize(
("use_translation_classes", "use_fft", "fft_backend"), [
(False, False, None),
(True, False, None),
(True, True, "loopy"),
(True, True, "pyvkfft"),
])
def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
logging.basicConfig(level=logging.INFO)
@pytest.mark.parametrize(
("knl", "local_expn_class", "mpole_expn_class",
"order_varies_with_level"), [
(LaplaceKernel(2), VolumeTaylorLocalExpansion,
VolumeTaylorMultipoleExpansion, False),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(LaplaceKernel(3), VolumeTaylorLocalExpansion,
VolumeTaylorMultipoleExpansion, False),
(LaplaceKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(2), VolumeTaylorLocalExpansion,
VolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, False),
(HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, True),
(HelmholtzKernel(3), VolumeTaylorLocalExpansion,
VolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(YukawaKernel(2), Y2DLocalExpansion, Y2DMultipoleExpansion,
False),
])
def test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class,
order_varies_with_level, use_translation_classes, use_fft,
fft_backend, visualize=False):
if fft_backend == "pyvkfft":
pytest.importorskip("pyvkfft")
if visualize:
logging.basicConfig(level=logging.INFO)
if local_expn_class == VolumeTaylorLocalExpansion and use_fft:
pytest.skip("VolumeTaylorExpansion with FFT takes a lot of resources.")
if local_expn_class in [H2DLocalExpansion, Y2DLocalExpansion] and use_fft:
pytest.skip("Fourier/Bessel based expansions with FFT is not supported yet.")
if use_fft:
from unittest.mock import patch
with patch.dict(os.environ, {"SUMPY_FFT_BACKEND": fft_backend}):
_test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class,
order_varies_with_level, use_translation_classes, use_fft,
fft_backend)
else:
_test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class,
order_varies_with_level, use_translation_classes, use_fft,
fft_backend)
def _test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class,
order_varies_with_level, use_translation_classes, use_fft,
fft_backend):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx = actx_factory()
nsources = 1000
ntargets = 300
dtype = np.float64
from boxtree.tools import (
make_normal_particle_array as p_normal)
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
if 1:
offset = np.zeros(knl.dim)
offset[0] = 0.1
targets = (
p_normal(queue, ntargets, knl.dim, dtype, seed=18)
+ offset)
targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18)
del offset
else:
from sumpy.visualization import FieldPlotter
fp = FieldPlotter(np.array([0.5, 0]), extent=3, npoints=200)
from pytools.obj_array import make_obj_array
targets = make_obj_array(
[fp.points[i] for i in range(knl.dim)])
targets = make_obj_array([fp.points[i] for i in range(knl.dim)])
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
tree, _ = tb(queue, sources, targets=targets,
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(ctx)
trav, _ = tbuild(queue, tree, debug=True)
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
# {{{ plot tree
if 0:
host_tree = tree.get()
host_trav = trav.get()
host_tree = tree.get(actx.queue)
host_trav = trav.get(actx.queue)
if 1:
print("src_box", host_tree.find_box_nr_for_source(403))
print("tgt_box", host_tree.find_box_nr_for_target(28))
print(list(host_trav.target_or_target_parent_boxes).index(37))
print(host_trav.get_box_list("sep_bigger", 22))
if 0:
logger.info("src_box: %s", host_tree.find_box_nr_for_source(403))
logger.info("tgt_box: %s", host_tree.find_box_nr_for_target(28))
logger.info("%s",
list(host_trav.target_or_target_parent_boxes).index(37))
logger.info("%s", host_trav.get_box_list("sep_bigger", 22))
from boxtree.visualization import TreePlotter
plotter = TreePlotter(host_tree)
......@@ -135,14 +179,11 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
# }}}
from pyopencl.clrandom import PhiloxGenerator
rng = PhiloxGenerator(ctx, seed=44)
weights = rng.uniform(queue, nsources, dtype=np.float64)
rng = np.random.default_rng(44)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
logger.info("computing direct (reference) result")
from pytools.convergence import PConvergenceVerifier
pconv_verifier = PConvergenceVerifier()
extra_kwargs = {}
......@@ -155,7 +196,7 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
if knl.dim == 3:
order_values = [1, 2]
elif knl.dim == 2 and issubclass(local_expn_class, H2DLocalExpansion):
order_values = [10, 12]
order_values = [4, 5]
elif isinstance(knl, YukawaKernel):
extra_kwargs["lam"] = 2
......@@ -166,175 +207,522 @@ def test_sumpy_fmm(ctx_getter, knl, local_expn_class, mpole_expn_class):
elif knl.dim == 2 and issubclass(local_expn_class, Y2DLocalExpansion):
order_values = [10, 12]
from functools import partial
for order in order_values:
out_kernels = [knl]
target_kernels = [knl]
if use_fft:
from sumpy.expansion.m2l import FFTM2LTranslationClassFactory
m2l_translation_factory = FFTM2LTranslationClassFactory()
else:
from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory
m2l_translation_factory = NonFFTM2LTranslationClassFactory()
from sumpy.fmm import SumpyExpansionWranglerCodeContainer
wcc = SumpyExpansionWranglerCodeContainer(
ctx,
m2l_translation = m2l_translation_factory.get_m2l_translation_class(
knl, local_expn_class)()
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
out_kernels)
wrangler = wcc.get_wrangler(queue, tree, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
kernel_extra_kwargs=extra_kwargs)
partial(local_expn_class, knl, m2l_translation=m2l_translation),
target_kernels)
if order_varies_with_level:
def fmm_level_to_order(kernel, kernel_args, tree, lev):
return order + lev % 2 # noqa: B023
else:
def fmm_level_to_order(kernel, kernel_args, tree, lev):
return order # noqa: B023
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=fmm_level_to_order,
kernel_extra_kwargs=extra_kwargs,
_disable_translation_classes=not use_translation_classes)
from boxtree.fmm import drive_fmm
pot, = drive_fmm(trav, wrangler, weights)
pot, = drive_fmm(wrangler, (weights,))
from sumpy import P2P
p2p = P2P(ctx, out_kernels, exclude_self=False)
evt, (ref_pot,) = p2p(queue, targets, sources, (weights,),
p2p = P2P(actx.context, target_kernels, exclude_self=False)
_evt, (ref_pot,) = p2p(actx.queue, targets, sources, (weights,),
**extra_kwargs)
pot = pot.get()
ref_pot = ref_pot.get()
pot = actx.to_numpy(pot)
ref_pot = actx.to_numpy(ref_pot)
rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
logger.info("order %d -> relative l2 error: %g" % (order, rel_err))
logger.info("order %d -> relative l2 error: %g", order, rel_err)
pconv_verifier.add_data_point(order, rel_err)
print(pconv_verifier)
logger.info("\n%s", pconv_verifier)
pconv_verifier()
# }}}
# {{{ test_coeff_magnitude_rscale
@pytest.mark.parametrize("knl", [LaplaceKernel(2), BiharmonicKernel(2)])
def test_coeff_magnitude_rscale(actx_factory, knl):
"""Checks that the rscale used keeps the coefficient magnitude
difference small
"""
local_expn_class = LinearPDEConformingVolumeTaylorLocalExpansion
mpole_expn_class = LinearPDEConformingVolumeTaylorMultipoleExpansion
actx = actx_factory()
nsources = 1000
ntargets = 300
dtype = np.float64
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
offset = np.zeros(knl.dim)
offset[0] = 0.1
targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18)
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30, debug=True)
def test_sumpy_fmm_timing_data_collection(ctx_getter):
logging.basicConfig(level=logging.INFO)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
ctx = ctx_getter()
queue = cl.CommandQueue(
ctx,
properties=cl.command_queue_properties.PROFILING_ENABLE)
rng = np.random.default_rng(31)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
extra_kwargs = {}
dtype = np.float64
order = 10
if isinstance(knl, HelmholtzKernel):
extra_kwargs["k"] = 0.05
dtype = np.complex128
elif isinstance(knl, YukawaKernel):
extra_kwargs["lam"] = 2
dtype = np.complex128
target_kernels = [knl]
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
target_kernels)
def fmm_level_to_order(kernel, kernel_args, tree, lev):
return order
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=fmm_level_to_order,
kernel_extra_kwargs=extra_kwargs)
weights = wrangler.reorder_sources(weights)
(weights,) = wrangler.distribute_source_weights((weights,), None)
local_result, _ = wrangler.form_locals(
trav.level_start_target_or_target_parent_box_nrs,
trav.target_or_target_parent_boxes,
trav.from_sep_bigger_starts,
trav.from_sep_bigger_lists,
(weights,))
result = actx.to_numpy(
actx.np.abs(wrangler.local_expansions_view(local_result, 5)[1][0])
)
result_ratio = np.max(result) / np.min(result)
assert result_ratio < 10**6, result_ratio
# }}}
# {{{ test_unified_single_and_double
def test_unified_single_and_double(actx_factory, visualize=False):
"""
Test that running one FMM for single layer + double layer gives the
same result as running one FMM for each and adding the results together
at the end
"""
if visualize:
logging.basicConfig(level=logging.INFO)
actx = actx_factory()
knl = LaplaceKernel(2)
local_expn_class = LinearPDEConformingVolumeTaylorLocalExpansion
mpole_expn_class = LinearPDEConformingVolumeTaylorMultipoleExpansion
nsources = 1000
ntargets = 300
dtype = np.float64
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
offset = np.zeros(knl.dim)
offset[0] = 0.1
targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18)
del offset
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources, targets=targets,
max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
rng = np.random.default_rng(44)
weights = (
actx.from_numpy(rng.random(nsources, dtype=np.float64)),
actx.from_numpy(rng.random(nsources, dtype=np.float64))
)
logger.info("computing direct (reference) result")
dtype = np.float64
order = 3
from sumpy.kernel import AxisTargetDerivative, DirectionalSourceDerivative
deriv_knl = DirectionalSourceDerivative(knl, "dir_vec")
target_kernels = [knl, AxisTargetDerivative(0, knl)]
source_kernel_vecs = [[knl], [deriv_knl], [knl, deriv_knl]]
strength_usages = [[0], [1], [0, 1]]
alpha = np.linspace(0, 2*np.pi, nsources, np.float64)
dir_vec = actx.from_numpy(np.vstack([np.cos(alpha), np.sin(alpha)]))
results = []
for source_kernels, strength_usage in zip(
source_kernel_vecs, strength_usages, strict=True):
source_extra_kwargs = {}
if deriv_knl in source_kernels:
source_extra_kwargs["dir_vec"] = dir_vec
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
target_kernels=target_kernels, source_kernels=source_kernels,
strength_usage=strength_usage)
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
source_extra_kwargs=source_extra_kwargs)
from boxtree.fmm import drive_fmm
pot = drive_fmm(wrangler, weights)
results.append(np.array([actx.to_numpy(pot[0]), actx.to_numpy(pot[1])]))
ref_pot = results[0] + results[1]
pot = results[2]
rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf)
assert rel_err < 1e-12
# }}}
# {{{ test_sumpy_fmm_timing_data_collection
@pytest.mark.parametrize("use_fft", [True, False])
def test_sumpy_fmm_timing_data_collection(ctx_factory, use_fft, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
import pyopencl as cl
from sumpy.array_context import PyOpenCLArrayContext
ctx = ctx_factory()
queue = cl.CommandQueue(ctx,
properties=cl.command_queue_properties.PROFILING_ENABLE)
actx = PyOpenCLArrayContext(queue)
nsources = 500
dtype = np.float64
from boxtree.tools import (
make_normal_particle_array as p_normal)
from boxtree.tools import make_normal_particle_array as p_normal
knl = LaplaceKernel(2)
local_expn_class = VolumeTaylorLocalExpansion
mpole_expn_class = VolumeTaylorMultipoleExpansion
order = 1
sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
tree, _ = tb(queue, sources,
max_particles_in_box=30, debug=True)
tree, _ = tb(actx.queue, sources, max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(ctx)
trav, _ = tbuild(queue, tree, debug=True)
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
rng = np.random.default_rng(44)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
from pyopencl.clrandom import PhiloxGenerator
rng = PhiloxGenerator(ctx)
weights = rng.uniform(queue, nsources, dtype=np.float64)
target_kernels = [knl]
out_kernels = [knl]
if use_fft:
from sumpy.expansion.m2l import FFTM2LTranslationClassFactory
m2l_translation_factory = FFTM2LTranslationClassFactory()
else:
from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory
m2l_translation_factory = NonFFTM2LTranslationClassFactory()
from functools import partial
m2l_translation = m2l_translation_factory.get_m2l_translation_class(
knl, local_expn_class)()
from sumpy.fmm import SumpyExpansionWranglerCodeContainer
wcc = SumpyExpansionWranglerCodeContainer(
ctx,
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
out_kernels)
partial(local_expn_class, knl, m2l_translation=m2l_translation),
target_kernels)
wrangler = wcc.get_wrangler(queue, tree, dtype,
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order)
from boxtree.fmm import drive_fmm
timing_data = {}
pot, = drive_fmm(trav, wrangler, weights, timing_data=timing_data)
print(timing_data)
_pot, = drive_fmm(wrangler, (weights,), timing_data=timing_data)
logger.info("timing_data:\n%s", timing_data)
assert timing_data
def test_sumpy_fmm_exclude_self(ctx_getter):
logging.basicConfig(level=logging.INFO)
def test_sumpy_fmm_exclude_self(actx_factory, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx = actx_factory()
nsources = 500
dtype = np.float64
from boxtree.tools import (
make_normal_particle_array as p_normal)
from boxtree.tools import make_normal_particle_array as p_normal
knl = LaplaceKernel(2)
local_expn_class = VolumeTaylorLocalExpansion
mpole_expn_class = VolumeTaylorMultipoleExpansion
order = 10
sources = p_normal(queue, nsources, knl.dim, dtype, seed=15)
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tb = TreeBuilder(actx.context)
tree, _ = tb(queue, sources,
max_particles_in_box=30, debug=True)
tree, _ = tb(actx.queue, sources, max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(ctx)
trav, _ = tbuild(queue, tree, debug=True)
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
from pyopencl.clrandom import PhiloxGenerator
rng = PhiloxGenerator(ctx)
weights = rng.uniform(queue, nsources, dtype=np.float64)
rng = np.random.default_rng(44)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
target_to_source = np.arange(tree.ntargets, dtype=np.int32)
target_to_source = actx.from_numpy(np.arange(tree.ntargets, dtype=np.int32))
self_extra_kwargs = {"target_to_source": target_to_source}
out_kernels = [knl]
from functools import partial
target_kernels = [knl]
from sumpy.fmm import SumpyExpansionWranglerCodeContainer
wcc = SumpyExpansionWranglerCodeContainer(
ctx,
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
out_kernels,
target_kernels,
exclude_self=True)
wrangler = wcc.get_wrangler(queue, tree, dtype,
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
self_extra_kwargs=self_extra_kwargs)
from boxtree.fmm import drive_fmm
pot, = drive_fmm(trav, wrangler, weights)
pot, = drive_fmm(wrangler, (weights,))
from sumpy import P2P
p2p = P2P(ctx, out_kernels, exclude_self=True)
evt, (ref_pot,) = p2p(queue, sources, sources, (weights,),
p2p = P2P(actx.context, target_kernels, exclude_self=True)
_evt, (ref_pot,) = p2p(actx.queue, sources, sources, (weights,),
**self_extra_kwargs)
pot = pot.get()
ref_pot = ref_pot.get()
pot = actx.to_numpy(pot)
ref_pot = actx.to_numpy(ref_pot)
rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
logger.info("order %d -> relative l2 error: %g" % (order, rel_err))
logger.info("order %d -> relative l2 error: %g", order, rel_err)
assert np.isclose(rel_err, 0, atol=1e-7)
# }}}
# {{{ test_sumpy_axis_source_derivative
def test_sumpy_axis_source_derivative(actx_factory, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
actx = actx_factory()
nsources = 500
dtype = np.float64
from boxtree.tools import make_normal_particle_array as p_normal
knl = LaplaceKernel(2)
local_expn_class = VolumeTaylorLocalExpansion
mpole_expn_class = VolumeTaylorMultipoleExpansion
order = 10
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources,
max_particles_in_box=30, debug=True)
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
rng = np.random.default_rng(12)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
target_to_source = actx.from_numpy(np.arange(tree.ntargets, dtype=np.int32))
self_extra_kwargs = {"target_to_source": target_to_source}
from sumpy.kernel import AxisSourceDerivative, AxisTargetDerivative
pots = []
for tgt_knl, src_knl in [
(AxisTargetDerivative(0, knl), knl),
(knl, AxisSourceDerivative(0, knl))]:
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
target_kernels=[tgt_knl],
source_kernels=[src_knl],
exclude_self=True)
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
self_extra_kwargs=self_extra_kwargs)
from boxtree.fmm import drive_fmm
pot, = drive_fmm(wrangler, (weights,))
pots.append(actx.to_numpy(pot))
rel_err = la.norm(pots[0] + pots[1]) / la.norm(pots[0])
logger.info("order %d -> relative l2 error: %g", order, rel_err)
assert np.isclose(rel_err, 0, atol=1e-5)
# }}}
# {{{ test_sumpy_target_point_multiplier
@pytest.mark.parametrize("deriv_axes", [(), (0,), (1,)])
def test_sumpy_target_point_multiplier(actx_factory, deriv_axes, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
actx = actx_factory()
nsources = 500
dtype = np.float64
from boxtree.tools import make_normal_particle_array as p_normal
knl = LaplaceKernel(2)
local_expn_class = VolumeTaylorLocalExpansion
mpole_expn_class = VolumeTaylorMultipoleExpansion
order = 5
sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15)
from boxtree import TreeBuilder
tb = TreeBuilder(actx.context)
tree, _ = tb(actx.queue, sources,
max_particles_in_box=30, debug=True)
# You can test individual routines by typing
# $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)'
from boxtree.traversal import FMMTraversalBuilder
tbuild = FMMTraversalBuilder(actx.context)
trav, _ = tbuild(actx.queue, tree, debug=True)
rng = np.random.default_rng(12)
weights = actx.from_numpy(rng.random(nsources, dtype=np.float64))
target_to_source = actx.from_numpy(np.arange(tree.ntargets, dtype=np.int32))
self_extra_kwargs = {"target_to_source": target_to_source}
from sumpy.kernel import AxisTargetDerivative, TargetPointMultiplier
tgt_knls = [TargetPointMultiplier(0, knl), knl, knl]
for axis in deriv_axes:
tgt_knls[0] = AxisTargetDerivative(axis, tgt_knls[0])
tgt_knls[1] = AxisTargetDerivative(axis, tgt_knls[1])
tree_indep = SumpyTreeIndependentDataForWrangler(
actx.context,
partial(mpole_expn_class, knl),
partial(local_expn_class, knl),
target_kernels=tgt_knls,
source_kernels=[knl],
exclude_self=True)
wrangler = SumpyExpansionWrangler(tree_indep, trav, dtype,
fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order,
self_extra_kwargs=self_extra_kwargs)
from boxtree.fmm import drive_fmm
pot0, pot1, pot2 = drive_fmm(wrangler, (weights,))
pot0, pot1, pot2 = actx.to_numpy(pot0), actx.to_numpy(pot1), actx.to_numpy(pot2)
if deriv_axes == (0,):
ref_pot = pot1 * actx.to_numpy(sources[0]) + pot2
else:
ref_pot = pot1 * actx.to_numpy(sources[0])
rel_err = la.norm(pot0 - ref_pot) / la.norm(ref_pot)
logger.info("order %d -> relative l2 error: %g", order, rel_err)
assert np.isclose(rel_err, 0, atol=1e-5)
# }}}
"""
You can test individual routines by typing
$ python test/test_fmm.py 'test_sumpy_fmm(_acf, LaplaceKernel(2),
VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion,
order_varies_with_level=False, use_translation_classes=True, use_fft=True,
fft_backend="pyvkfft", visualize=True)'
"""
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
......@@ -22,71 +23,88 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from six.moves import range
import logging
import sys
from functools import partial
import numpy as np
import numpy.linalg as la
import sys
import pytest
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from sumpy.expansion.multipole import (
VolumeTaylorMultipoleExpansion, H2DMultipoleExpansion,
VolumeTaylorMultipoleExpansionBase,
LaplaceConformingVolumeTaylorMultipoleExpansion,
HelmholtzConformingVolumeTaylorMultipoleExpansion)
from sumpy.expansion.local import (
VolumeTaylorLocalExpansion, H2DLocalExpansion,
LaplaceConformingVolumeTaylorLocalExpansion,
HelmholtzConformingVolumeTaylorLocalExpansion)
from sumpy.kernel import (LaplaceKernel, HelmholtzKernel, AxisTargetDerivative,
DirectionalSourceDerivative)
from arraycontext import pytest_generate_tests_for_array_contexts
from pytools.convergence import PConvergenceVerifier
from pytools.obj_array import make_obj_array
import sumpy.symbolic as sym
import sumpy.toys as t
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
from sumpy.expansion.local import (
H2DLocalExpansion,
LinearPDEConformingVolumeTaylorLocalExpansion,
VolumeTaylorLocalExpansion,
)
from sumpy.expansion.m2l import (
FFTM2LTranslationClassFactory,
NonFFTM2LTranslationClassFactory,
)
from sumpy.expansion.multipole import (
H2DMultipoleExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion,
VolumeTaylorMultipoleExpansion,
VolumeTaylorMultipoleExpansionBase,
)
from sumpy.kernel import (
AxisTargetDerivative,
BiharmonicKernel,
DirectionalSourceDerivative,
HelmholtzKernel,
LaplaceKernel,
StokesletKernel,
)
import logging
logger = logging.getLogger(__name__)
try:
import faulthandler
except ImportError:
pass
else:
faulthandler.enable()
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ test_p2p
@pytest.mark.parametrize("exclude_self", (True, False))
def test_p2p(ctx_getter, exclude_self):
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
def test_p2p(actx_factory, exclude_self):
actx = actx_factory()
dimensions = 3
n = 5000
from sumpy.p2p import P2P
lknl = LaplaceKernel(dimensions)
knl = P2P(ctx,
knl = P2P(actx.context,
[lknl, AxisTargetDerivative(0, lknl)],
exclude_self=exclude_self)
targets = np.random.rand(dimensions, n)
sources = targets if exclude_self else np.random.rand(dimensions, n)
rng = np.random.default_rng(42)
targets = rng.random(size=(dimensions, n))
sources = targets if exclude_self else rng.random(size=(dimensions, n))
strengths = np.ones(n, dtype=np.float64)
extra_kwargs = {}
if exclude_self:
extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32)
extra_kwargs["target_to_source"] = (
actx.from_numpy(np.arange(n, dtype=np.int32)))
evt, (potential, x_derivative) = knl(
queue, targets, sources, [strengths],
out_host=True, **extra_kwargs)
_evt, (potential, _x_derivative) = knl(
actx.queue,
actx.from_numpy(targets),
actx.from_numpy(sources),
[actx.from_numpy(strengths)],
**extra_kwargs)
potential = actx.to_numpy(potential)
potential_ref = np.empty_like(potential)
targets = targets.T
sources = sources.T
for itarg in range(n):
......@@ -103,30 +121,153 @@ def test_p2p(ctx_getter, exclude_self):
potential_ref *= 1/(4*np.pi)
rel_err = la.norm(potential - potential_ref)/la.norm(potential_ref)
print(rel_err)
logger.info("error: %.12e", rel_err)
assert rel_err < 1e-3
# }}}
# {{{ test_p2e_multiple
@pytest.mark.parametrize(("base_knl", "expn_class"), [
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorMultipoleExpansion),
])
def test_p2e_multiple(actx_factory, base_knl, expn_class):
order = 4
actx = actx_factory()
nsources = 100
extra_kwargs = {}
if isinstance(base_knl, HelmholtzKernel):
if base_knl.allow_evanescent:
extra_kwargs["k"] = 0.2 * (0.707 + 0.707j)
else:
extra_kwargs["k"] = 0.2
if isinstance(base_knl, StokesletKernel):
extra_kwargs["mu"] = 0.2
source_kernels = [
DirectionalSourceDerivative(base_knl, "dir_vec"),
base_knl,
]
knl = base_knl
expn = expn_class(knl, order=order)
from sumpy import P2EFromSingleBox
rng = np.random.default_rng(14)
center = np.array([2, 1, 0][:knl.dim], np.float64)
sources = actx.from_numpy(
0.7 * (-0.5 + rng.random(size=(knl.dim, nsources), dtype=np.float64))
+ center[:, np.newaxis])
strengths = [
actx.from_numpy(np.ones(nsources, dtype=np.float64) * (1/nsources)),
actx.from_numpy(np.ones(nsources, dtype=np.float64) * (2/nsources))
]
source_boxes = actx.from_numpy(np.array([0], dtype=np.int32))
box_source_starts = actx.from_numpy(np.array([0], dtype=np.int32))
box_source_counts_nonchild = (
actx.from_numpy(np.array([nsources], dtype=np.int32)))
alpha = np.linspace(0, 2*np.pi, nsources, np.float64)
dir_vec = actx.from_numpy(np.vstack([np.cos(alpha), np.sin(alpha)]))
from sumpy.expansion.local import LocalExpansionBase
if issubclass(expn_class, LocalExpansionBase):
loc_center = np.array([5.5, 0.0, 0.0][:knl.dim]) + center
centers = np.array(loc_center, dtype=np.float64).reshape(knl.dim, 1)
else:
centers = (np.array([0.0, 0.0, 0.0][:knl.dim],
dtype=np.float64).reshape(knl.dim, 1)
+ center[:, np.newaxis])
centers = actx.from_numpy(centers)
rscale = 0.5 # pick something non-1
# apply p2e at the same time
p2e = P2EFromSingleBox(actx.context, expn,
kernels=source_kernels,
strength_usage=[0, 1])
_evt, (mpoles,) = p2e(actx.queue,
source_boxes=source_boxes,
box_source_starts=box_source_starts,
box_source_counts_nonchild=box_source_counts_nonchild,
centers=centers,
sources=sources,
strengths=strengths,
nboxes=1,
tgt_base_ibox=0,
rscale=rscale,
dir_vec=dir_vec,
**extra_kwargs)
actual_result = actx.to_numpy(mpoles)
# apply p2e separately
expected_result = np.zeros_like(actual_result)
for i, source_kernel in enumerate(source_kernels):
extra_source_kwargs = extra_kwargs.copy()
if isinstance(source_kernel, DirectionalSourceDerivative):
extra_source_kwargs["dir_vec"] = dir_vec
p2e = P2EFromSingleBox(actx.context, expn,
kernels=[source_kernel], strength_usage=[i])
_evt, (mpoles,) = p2e(actx.queue,
source_boxes=source_boxes,
box_source_starts=box_source_starts,
box_source_counts_nonchild=box_source_counts_nonchild,
centers=centers,
sources=sources,
strengths=strengths,
nboxes=1,
tgt_base_ibox=0,
rscale=rscale,
**extra_source_kwargs)
mpoles = actx.to_numpy(mpoles)
expected_result += mpoles
norm = la.norm(actual_result - expected_result)/la.norm(expected_result)
assert norm < 1e-12
# }}}
# {{{ test_p2e2p
@pytest.mark.parametrize("order", [4])
@pytest.mark.parametrize(("base_knl", "expn_class"), [
(LaplaceKernel(2), VolumeTaylorLocalExpansion),
(LaplaceKernel(2), VolumeTaylorMultipoleExpansion),
(LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion),
(LaplaceKernel(2), LaplaceConformingVolumeTaylorMultipoleExpansion),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), VolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), VolumeTaylorLocalExpansion),
(HelmholtzKernel(2), HelmholtzConformingVolumeTaylorLocalExpansion),
(HelmholtzKernel(2), HelmholtzConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion),
(HelmholtzKernel(2), LinearPDEConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), H2DLocalExpansion),
(HelmholtzKernel(2), H2DMultipoleExpansion),
(DirectionalSourceDerivative(BiharmonicKernel(2), "dir_vec"),
VolumeTaylorMultipoleExpansion),
(DirectionalSourceDerivative(BiharmonicKernel(2), "dir_vec"),
VolumeTaylorLocalExpansion),
(HelmholtzKernel(2, allow_evanescent=True), VolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2, allow_evanescent=True), VolumeTaylorLocalExpansion),
(HelmholtzKernel(2, allow_evanescent=True),
HelmholtzConformingVolumeTaylorLocalExpansion),
LinearPDEConformingVolumeTaylorLocalExpansion),
(HelmholtzKernel(2, allow_evanescent=True),
HelmholtzConformingVolumeTaylorMultipoleExpansion),
LinearPDEConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2, allow_evanescent=True), H2DLocalExpansion),
(HelmholtzKernel(2, allow_evanescent=True), H2DMultipoleExpansion),
])
......@@ -134,17 +275,8 @@ def test_p2p(ctx_getter, exclude_self):
False,
True
])
# Sample: test_p2e2p(cl._csc, LaplaceKernel(2), VolumeTaylorLocalExpansion, 4, False)
def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
#logging.basicConfig(level=logging.INFO)
from sympy.core.cache import clear_cache
clear_cache()
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
np.random.seed(17)
def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative):
actx = actx_factory()
res = 100
nsources = 100
......@@ -155,22 +287,24 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
extra_kwargs["k"] = 0.2 * (0.707 + 0.707j)
else:
extra_kwargs["k"] = 0.2
if isinstance(base_knl, StokesletKernel):
extra_kwargs["mu"] = 0.2
if with_source_derivative:
knl = DirectionalSourceDerivative(base_knl, "dir_vec")
else:
knl = base_knl
out_kernels = [
target_kernels = [
knl,
AxisTargetDerivative(0, knl),
]
expn = expn_class(knl, order=order)
from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P
p2e = P2EFromSingleBox(ctx, expn, out_kernels)
e2p = E2PFromSingleBox(ctx, expn, out_kernels)
p2p = P2P(ctx, out_kernels, exclude_self=False)
from sumpy import P2P, E2PFromSingleBox, P2EFromSingleBox
p2e = P2EFromSingleBox(actx.context, expn, kernels=[knl])
e2p = E2PFromSingleBox(actx.context, expn, kernels=target_kernels)
p2p = P2P(actx.context, target_kernels, exclude_self=False)
from pytools.convergence import EOCRecorder
eoc_rec_pot = EOCRecorder()
......@@ -182,67 +316,70 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
else:
h_values = [1/2, 1/3, 1/5]
center = np.array([2, 1], np.float64)
sources = (0.7*(-0.5+np.random.rand(knl.dim, nsources).astype(np.float64))
+ center[:, np.newaxis])
rng = np.random.default_rng(19)
center = np.array([2, 1, 0][:knl.dim], np.float64)
sources = actx.from_numpy(
0.7 * (-0.5 + rng.random((knl.dim, nsources), dtype=np.float64))
+ center[:, np.newaxis])
strengths = np.ones(nsources, dtype=np.float64) * (1/nsources)
strengths = actx.from_numpy(np.ones(nsources, dtype=np.float64) / nsources)
source_boxes = np.array([0], dtype=np.int32)
box_source_starts = np.array([0], dtype=np.int32)
box_source_counts_nonchild = np.array([nsources], dtype=np.int32)
source_boxes = actx.from_numpy(np.array([0], dtype=np.int32))
box_source_starts = actx.from_numpy(np.array([0], dtype=np.int32))
box_source_counts_nonchild = (
actx.from_numpy(np.array([nsources], dtype=np.int32)))
extra_source_kwargs = extra_kwargs.copy()
if with_source_derivative:
if isinstance(knl, DirectionalSourceDerivative):
alpha = np.linspace(0, 2*np.pi, nsources, np.float64)
dir_vec = np.vstack([np.cos(alpha), np.sin(alpha)])
extra_source_kwargs["dir_vec"] = dir_vec
extra_source_kwargs["dir_vec"] = actx.from_numpy(dir_vec)
from sumpy.visualization import FieldPlotter
for h in h_values:
if issubclass(expn_class, LocalExpansionBase):
loc_center = np.array([5.5, 0.0]) + center
loc_center = np.array([5.5, 0.0, 0.0][:knl.dim]) + center
centers = np.array(loc_center, dtype=np.float64).reshape(knl.dim, 1)
fp = FieldPlotter(loc_center, extent=h, npoints=res)
else:
eval_center = np.array([1/h, 0.0]) + center
eval_center = np.array([1/h, 0.0, 0.0][:knl.dim]) + center
fp = FieldPlotter(eval_center, extent=0.1, npoints=res)
centers = (
np.array([0.0, 0.0], dtype=np.float64).reshape(knl.dim, 1)
+ center[:, np.newaxis])
centers = (np.array([0.0, 0.0, 0.0][:knl.dim],
dtype=np.float64).reshape(knl.dim, 1)
+ center[:, np.newaxis])
targets = fp.points
centers = actx.from_numpy(centers)
targets = actx.from_numpy(make_obj_array(fp.points))
rscale = 0.5 # pick something non-1
# {{{ apply p2e
evt, (mpoles,) = p2e(queue,
_evt, (mpoles,) = p2e(actx.queue,
source_boxes=source_boxes,
box_source_starts=box_source_starts,
box_source_counts_nonchild=box_source_counts_nonchild,
centers=centers,
sources=sources,
strengths=strengths,
strengths=(strengths,),
nboxes=1,
tgt_base_ibox=0,
rscale=rscale,
#flags="print_hl_cl",
out_host=True, **extra_source_kwargs)
**extra_source_kwargs)
# }}}
# {{{ apply e2p
ntargets = targets.shape[-1]
ntargets = fp.points.shape[-1]
box_target_starts = np.array([0], dtype=np.int32)
box_target_counts_nonchild = np.array([ntargets], dtype=np.int32)
box_target_starts = actx.from_numpy(np.array([0], dtype=np.int32))
box_target_counts_nonchild = (
actx.from_numpy(np.array([ntargets], dtype=np.int32)))
evt, (pot, grad_x, ) = e2p(
queue,
_evt, (pot, grad_x, ) = e2p(
actx.queue,
src_expansions=mpoles,
src_base_ibox=0,
target_boxes=source_boxes,
......@@ -251,19 +388,20 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
centers=centers,
targets=targets,
rscale=rscale,
#flags="print_hl_cl",
out_host=True, **extra_kwargs)
**extra_kwargs)
pot = actx.to_numpy(pot)
grad_x = actx.to_numpy(grad_x)
# }}}
# {{{ compute (direct) reference solution
evt, (pot_direct, grad_x_direct, ) = p2p(
queue,
_evt, (pot_direct, grad_x_direct, ) = p2p(
actx.queue,
targets, sources, (strengths,),
out_host=True,
**extra_source_kwargs)
pot_direct = actx.to_numpy(pot_direct)
grad_x_direct = actx.to_numpy(grad_x_direct)
err_pot = la.norm((pot - pot_direct)/res**2)
err_grad_x = la.norm((grad_x - grad_x_direct)/res**2)
......@@ -297,11 +435,11 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
eoc_rec_pot.add_data_point(h, err_pot)
eoc_rec_grad_x.add_data_point(h, err_grad_x)
print(expn_class, knl, order)
print("POTENTIAL:")
print(eoc_rec_pot)
print("X TARGET DERIVATIVE:")
print(eoc_rec_grad_x)
logger.info("expn_cls %s knl %s order %d", expn_class, knl, order)
logger.info("POTENTIAL:")
logger.info("%s", eoc_rec_pot)
logger.info("X TARGET DERIVATIVE:")
logger.info("%s", eoc_rec_grad_x)
tgt_order = order + 1
if issubclass(expn_class, LocalExpansionBase):
......@@ -322,6 +460,10 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
slack += 1
grad_slack += 2
if isinstance(base_knl, DirectionalSourceDerivative):
slack += 1
grad_slack += 2
if isinstance(base_knl, HelmholtzKernel):
if base_knl.allow_evanescent:
slack += 0.5
......@@ -334,40 +476,60 @@ def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative):
assert eoc_rec_pot.order_estimate() > tgt_order - slack
assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack
@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [
(LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(LaplaceKernel(2), LaplaceConformingVolumeTaylorLocalExpansion,
LaplaceConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), HelmholtzConformingVolumeTaylorLocalExpansion,
HelmholtzConformingVolumeTaylorMultipoleExpansion),
(HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion)
# }}}
# {{{ test_translations
@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class, use_fft", [
(LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion,
False),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, True),
(LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion,
False),
(LaplaceKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(LaplaceKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, True),
(HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion,
False),
(HelmholtzKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion,
False),
(HelmholtzKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
(HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, False),
(StokesletKernel(2, 0, 0), VolumeTaylorLocalExpansion,
VolumeTaylorMultipoleExpansion, False),
(StokesletKernel(2, 0, 0), LinearPDEConformingVolumeTaylorLocalExpansion,
LinearPDEConformingVolumeTaylorMultipoleExpansion, False),
])
def test_translations(ctx_getter, knl, local_expn_class, mpole_expn_class):
logging.basicConfig(level=logging.INFO)
def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class,
use_fft, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
from sympy.core.cache import clear_cache
clear_cache()
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
np.random.seed(17)
actx = actx_factory()
res = 20
nsources = 15
out_kernels = [knl]
extra_kwargs = {}
if isinstance(knl, HelmholtzKernel):
extra_kwargs["k"] = 0.05
if isinstance(knl, StokesletKernel):
extra_kwargs["mu"] = 0.05
# Just to make sure things also work away from the origin
origin = np.array([2, 1], np.float64)
sources = (0.7*(-0.5+np.random.rand(knl.dim, nsources).astype(np.float64))
+ origin[:, np.newaxis])
rng = np.random.default_rng(18)
origin = np.array([2, 1, 0][:knl.dim], np.float64)
sources = (
0.7 * (-0.5 + rng.random((knl.dim, nsources), dtype=np.float64))
+ origin[:, np.newaxis])
strengths = np.ones(nsources, dtype=np.float64) * (1/nsources)
pconv_verifier_p2m2p = PConvergenceVerifier()
......@@ -377,18 +539,20 @@ def test_translations(ctx_getter, knl, local_expn_class, mpole_expn_class):
from sumpy.visualization import FieldPlotter
eval_offset = np.array([5.5, 0.0])
eval_offset = np.array([5.5, 0.0, 0][:knl.dim])
fp = FieldPlotter(eval_offset + origin, extent=0.3, npoints=res)
targets = fp.points
centers = (np.array(
[
# box 0: particles, first mpole here
[0, 0],
[0, 0, 0][:knl.dim],
# box 1: second mpole here
np.array([-0.2, 0.1], np.float64),
np.array([-0.2, 0.1, 0][:knl.dim], np.float64),
# box 2: first local here
eval_offset + np.array([0.3, -0.2], np.float64),
eval_offset + np.array([0.3, -0.2, 0][:knl.dim], np.float64),
# box 3: second local and eval here
eval_offset
......@@ -397,228 +561,287 @@ def test_translations(ctx_getter, knl, local_expn_class, mpole_expn_class):
del eval_offset
from sumpy.expansion import VolumeTaylorExpansionBase
if isinstance(knl, HelmholtzKernel) and \
issubclass(local_expn_class, VolumeTaylorExpansionBase):
# FIXME: Embarrassing--but we run out of memory for higher orders.
orders = [2, 3]
else:
if knl.dim == 2: # noqa: SIM108
orders = [2, 3, 4]
nboxes = centers.shape[-1]
def eval_at(e2p, source_box_nr, rscale):
e2p_target_boxes = np.array([source_box_nr], dtype=np.int32)
# These are indexed by global box numbers.
e2p_box_target_starts = np.array([0, 0, 0, 0], dtype=np.int32)
e2p_box_target_counts_nonchild = np.array([0, 0, 0, 0],
dtype=np.int32)
e2p_box_target_counts_nonchild[source_box_nr] = ntargets
evt, (pot,) = e2p(
queue,
src_expansions=mpoles,
src_base_ibox=0,
target_boxes=e2p_target_boxes,
box_target_starts=e2p_box_target_starts,
box_target_counts_nonchild=e2p_box_target_counts_nonchild,
centers=centers,
targets=targets,
rscale=rscale,
out_host=True, **extra_kwargs
)
else:
orders = [3, 4, 5]
return pot
if use_fft:
m2l_factory = FFTM2LTranslationClassFactory()
else:
m2l_factory = NonFFTM2LTranslationClassFactory()
m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)()
toy_ctx = t.ToyContext(
actx.context,
kernel=knl,
local_expn_class=partial(local_expn_class,
m2l_translation=m2l_translation),
mpole_expn_class=mpole_expn_class,
extra_kernel_kwargs=extra_kwargs,
)
p = t.PointSources(toy_ctx, sources, weights=strengths)
p2p = p.eval(targets)
m1_rscale = 0.5
m2_rscale = 0.25
l1_rscale = 0.5
l2_rscale = 0.25
for order in orders:
m_expn = mpole_expn_class(knl, order=order)
l_expn = local_expn_class(knl, order=order)
print(centers[:, 0].shape)
p2m = t.multipole_expand(p, centers[:, 0], order, m1_rscale)
p2m2p = p2m.eval(targets)
err = la.norm((p2m2p - p2p) / res**2)
err = err / (la.norm(p2p) / res**2)
pconv_verifier_p2m2p.add_data_point(order, err)
from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR
p2m = P2EFromSingleBox(ctx, m_expn)
m2m = E2EFromCSR(ctx, m_expn, m_expn)
m2p = E2PFromSingleBox(ctx, m_expn, out_kernels)
m2l = E2EFromCSR(ctx, m_expn, l_expn)
l2l = E2EFromCSR(ctx, l_expn, l_expn)
l2p = E2PFromSingleBox(ctx, l_expn, out_kernels)
p2p = P2P(ctx, out_kernels, exclude_self=False)
p2m2m = t.multipole_expand(p2m, centers[:, 1], order, m2_rscale)
p2m2m2p = p2m2m.eval(targets)
err = la.norm((p2m2m2p - p2p)/res**2)
err = err / (la.norm(p2p) / res**2)
pconv_verifier_p2m2m2p.add_data_point(order, err)
fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res)
targets = fp.points
p2m2m2l = t.local_expand(p2m2m, centers[:, 2], order, l1_rscale)
p2m2m2l2p = p2m2m2l.eval(targets)
err = la.norm((p2m2m2l2p - p2p)/res**2)
err = err / (la.norm(p2p) / res**2)
pconv_verifier_p2m2m2l2p.add_data_point(order, err)
# {{{ compute (direct) reference solution
p2m2m2l2l = t.local_expand(p2m2m2l, centers[:, 3], order, l2_rscale)
p2m2m2l2l2p = p2m2m2l2l.eval(targets)
err = la.norm((p2m2m2l2l2p - p2p)/res**2)
err = err / (la.norm(p2p) / res**2)
pconv_verifier_full.add_data_point(order, err)
evt, (pot_direct,) = p2p(
queue,
targets, sources, (strengths,),
out_host=True, **extra_kwargs)
for name, verifier in [
("p2m2p", pconv_verifier_p2m2p),
("p2m2m2p", pconv_verifier_p2m2m2p),
("p2m2m2l2p", pconv_verifier_p2m2m2l2p),
("full", pconv_verifier_full),
]:
logger.info(30*"-")
logger.info("name: %s", name)
logger.info(30*"-")
logger.info("result: %s", verifier)
logger.info(30*"-")
# }}}
verifier()
m1_rscale = 0.5
m2_rscale = 0.25
l1_rscale = 0.5
l2_rscale = 0.25
# }}}
# {{{ apply P2M
p2m_source_boxes = np.array([0], dtype=np.int32)
# {{{ test_m2m_and_l2l_exprs_simpler
# These are indexed by global box numbers.
p2m_box_source_starts = np.array([0, 0, 0, 0], dtype=np.int32)
p2m_box_source_counts_nonchild = np.array([nsources, 0, 0, 0],
dtype=np.int32)
@pytest.mark.parametrize("order", [4])
@pytest.mark.parametrize(("base_knl", "local_expn_class", "mpole_expn_class"), [
(LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion),
])
@pytest.mark.parametrize("with_source_derivative", [
False,
True
])
def test_m2m_and_l2l_exprs_simpler(base_knl, local_expn_class, mpole_expn_class,
order, with_source_derivative):
extra_kwargs = {}
if isinstance(base_knl, HelmholtzKernel):
if base_knl.allow_evanescent:
extra_kwargs["k"] = 0.2 * (0.707 + 0.707j)
else:
extra_kwargs["k"] = 0.2
if isinstance(base_knl, StokesletKernel):
extra_kwargs["mu"] = 0.2
evt, (mpoles,) = p2m(queue,
source_boxes=p2m_source_boxes,
box_source_starts=p2m_box_source_starts,
box_source_counts_nonchild=p2m_box_source_counts_nonchild,
centers=centers,
sources=sources,
strengths=strengths,
nboxes=nboxes,
rscale=m1_rscale,
if with_source_derivative:
knl = DirectionalSourceDerivative(base_knl, "dir_vec")
else:
knl = base_knl
tgt_base_ibox=0,
mpole_expn = mpole_expn_class(knl, order=order)
local_expn = local_expn_class(knl, order=order)
#flags="print_hl_wrapper",
out_host=True, **extra_kwargs)
dvec = sym.make_sym_vector("d", knl.dim)
src_coeff_exprs = [sym.Symbol(f"src_coeff{i}") for i in range(len(mpole_expn))]
# }}}
src_rscale = 3
tgt_rscale = 2
ntargets = targets.shape[-1]
faster_m2m = mpole_expn.translate_from(mpole_expn, src_coeff_exprs, src_rscale,
dvec, tgt_rscale)
slower_m2m = mpole_expn.translate_from(mpole_expn, src_coeff_exprs, src_rscale,
dvec, tgt_rscale, _fast_version=False)
pot = eval_at(m2p, 0, m1_rscale)
for expr1, expr2 in zip(faster_m2m, slower_m2m, strict=True):
assert float(sym.doit(expr1 - expr2).expand()) == 0.0
err = la.norm((pot - pot_direct)/res**2)
err = err / (la.norm(pot_direct) / res**2)
faster_l2l = local_expn.translate_from(local_expn, src_coeff_exprs, src_rscale,
dvec, tgt_rscale)
slower_l2l = local_expn.translate_from(local_expn, src_coeff_exprs, src_rscale,
dvec, tgt_rscale, _fast_version=False)
for expr1, expr2 in zip(faster_l2l, slower_l2l, strict=True):
assert float(sym.doit(expr1 - expr2).expand()) == 0.0
pconv_verifier_p2m2p.add_data_point(order, err)
# }}}
# {{{ apply M2M
m2m_target_boxes = np.array([1], dtype=np.int32)
m2m_src_box_starts = np.array([0, 1], dtype=np.int32)
m2m_src_box_lists = np.array([0], dtype=np.int32)
# {{{ test toeplitz
evt, (mpoles,) = m2m(queue,
src_expansions=mpoles,
src_base_ibox=0,
tgt_base_ibox=0,
ntgt_level_boxes=mpoles.shape[0],
def _m2l_translate_simple(tgt_expansion, src_expansion, src_coeff_exprs, src_rscale,
dvec, tgt_rscale):
if not tgt_expansion.use_rscale:
src_rscale = 1
tgt_rscale = 1
target_boxes=m2m_target_boxes,
from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansionBase
if not isinstance(src_expansion, VolumeTaylorMultipoleExpansionBase):
return 1
src_box_starts=m2m_src_box_starts,
src_box_lists=m2m_src_box_lists,
centers=centers,
# We know the general form of the multipole expansion is:
#
# coeff0 * diff(kernel, mi0) + coeff1 * diff(kernel, mi1) + ...
#
# To get the local expansion coefficients, we take derivatives of
# the multipole expansion.
taker = src_expansion.kernel.get_derivative_taker(dvec, src_rscale, sac=None)
src_rscale=m1_rscale,
tgt_rscale=m2_rscale,
from sumpy.tools import add_mi
#flags="print_hl_cl",
out_host=True, **extra_kwargs)
result = []
for deriv in tgt_expansion.get_coefficient_identifiers():
local_result = []
for coeff, term in zip(
src_coeff_exprs,
src_expansion.get_coefficient_identifiers(), strict=True):
# }}}
kernel_deriv = taker.diff(add_mi(deriv, term)) / src_rscale**sum(deriv)
pot = eval_at(m2p, 1, m2_rscale)
local_result.append(
coeff * kernel_deriv * tgt_rscale**sum(deriv))
result.append(sym.Add(*local_result))
err = la.norm((pot - pot_direct)/res**2)
err = err / (la.norm(pot_direct) / res**2)
return result
pconv_verifier_p2m2m2p.add_data_point(order, err)
# {{{ apply M2L
def test_m2l_toeplitz():
dim = 3
knl = LaplaceKernel(dim)
m2l_target_boxes = np.array([2], dtype=np.int32)
m2l_src_box_starts = np.array([0, 1], dtype=np.int32)
m2l_src_box_lists = np.array([1], dtype=np.int32)
local_expn_class = LinearPDEConformingVolumeTaylorLocalExpansion
mpole_expn_class = LinearPDEConformingVolumeTaylorMultipoleExpansion
m2l_factory = NonFFTM2LTranslationClassFactory()
m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)()
evt, (mpoles,) = m2l(queue,
src_expansions=mpoles,
src_base_ibox=0,
tgt_base_ibox=0,
ntgt_level_boxes=mpoles.shape[0],
local_expn = local_expn_class(knl, order=5, m2l_translation=m2l_translation)
mpole_expn = mpole_expn_class(knl, order=5)
target_boxes=m2l_target_boxes,
src_box_starts=m2l_src_box_starts,
src_box_lists=m2l_src_box_lists,
centers=centers,
dvec = sym.make_sym_vector("d", dim)
src_rscale=m2_rscale,
tgt_rscale=l1_rscale,
rng = np.random.default_rng(44)
src_coeff_exprs = list(1 + rng.standard_normal(len(mpole_expn)))
src_rscale = 2.0
tgt_rscale = 1.0
#flags="print_hl_cl",
out_host=True, **extra_kwargs)
expected_output = _m2l_translate_simple(
local_expn, mpole_expn, src_coeff_exprs,
src_rscale, dvec, tgt_rscale)
actual_output = local_expn.translate_from(
mpole_expn, src_coeff_exprs,
src_rscale, dvec, tgt_rscale, sac=None)
# }}}
replace_dict = {d: rng.random() for d in dvec}
for sym_a, sym_b in zip(expected_output, actual_output, strict=True):
num_a = sym_a.xreplace(replace_dict)
num_b = sym_b.xreplace(replace_dict)
pot = eval_at(l2p, 2, l1_rscale)
assert abs(num_a - num_b)/abs(num_a) < 1e-10
err = la.norm((pot - pot_direct)/res**2)
err = err / (la.norm(pot_direct) / res**2)
# }}}
pconv_verifier_p2m2m2l2p.add_data_point(order, err)
# {{{ apply L2L
# {{{ test_m2m_compressed
l2l_target_boxes = np.array([3], dtype=np.int32)
l2l_src_box_starts = np.array([0, 1], dtype=np.int32)
l2l_src_box_lists = np.array([2], dtype=np.int32)
@pytest.mark.parametrize("dim", [2, 3])
@pytest.mark.parametrize("order", [2, 4, 6])
def test_m2m_compressed_error_helmholtz(actx_factory, dim, order):
from sumpy import toys
evt, (mpoles,) = l2l(queue,
src_expansions=mpoles,
src_base_ibox=0,
tgt_base_ibox=0,
ntgt_level_boxes=mpoles.shape[0],
actx = actx_factory()
target_boxes=l2l_target_boxes,
src_box_starts=l2l_src_box_starts,
src_box_lists=l2l_src_box_lists,
centers=centers,
knl = HelmholtzKernel(dim)
extra_kernel_kwargs = {"k": 5}
src_rscale=l1_rscale,
tgt_rscale=l2_rscale,
mpole_expn_classes = [LinearPDEConformingVolumeTaylorMultipoleExpansion,
VolumeTaylorMultipoleExpansion]
local_expn_classes = [LinearPDEConformingVolumeTaylorLocalExpansion,
VolumeTaylorLocalExpansion]
#flags="print_hl_wrapper",
out_host=True, **extra_kwargs)
eval_center = np.array([0.1, 0.0, 0.0][:dim]).reshape(dim, 1)
mpole_center = np.array([0.0, 0.0, 0.0][:dim]).reshape(dim, 1)
# }}}
ntargets_per_dim = 10
nsources_per_dim = 10
pot = eval_at(l2p, 3, l2_rscale)
sources_grid = np.meshgrid(*[np.linspace(0, 1, nsources_per_dim)
for _ in range(dim)])
sources_grid = np.ndarray.flatten(np.array(sources_grid)).reshape(dim, -1)
err = la.norm((pot - pot_direct)/res**2)
err = err / (la.norm(pot_direct) / res**2)
targets_grid = np.meshgrid(*[np.linspace(0, 1, ntargets_per_dim)
for _ in range(dim)])
targets_grid = np.ndarray.flatten(np.array(targets_grid)).reshape(dim, -1)
pconv_verifier_full.add_data_point(order, err)
targets = eval_center - 0.1*(targets_grid - 0.5)
for name, verifier in [
("p2m2p", pconv_verifier_p2m2p),
("p2m2m2p", pconv_verifier_p2m2m2p),
("p2m2m2l2p", pconv_verifier_p2m2m2l2p),
("full", pconv_verifier_full),
]:
print(30*"-")
print(name)
print(30*"-")
print(verifier)
print(30*"-")
verifier()
from pytools.convergence import EOCRecorder
eoc_rec = EOCRecorder()
h_values = 2.0**np.arange(-7, -3)
for h in h_values:
sources = (h*(-0.5+sources_grid.astype(np.float64)) + mpole_center)
second_center = mpole_center + h/2
furthest_source = np.max(np.abs(sources - mpole_center))
m2m_vals = [0, 0]
for i, (mpole_expn_class, local_expn_class) in \
enumerate(zip(mpole_expn_classes, local_expn_classes, strict=True)):
tctx = toys.ToyContext(
actx.context,
knl,
extra_kernel_kwargs=extra_kernel_kwargs,
local_expn_class=local_expn_class,
mpole_expn_class=mpole_expn_class,
)
pt_src = toys.PointSources(
tctx,
sources,
np.ones(sources.shape[-1])
)
mexp = toys.multipole_expand(pt_src,
center=mpole_center.reshape(dim),
order=order,
rscale=h)
mexp2 = toys.multipole_expand(mexp,
center=second_center.reshape(dim),
order=order,
rscale=h)
m2m_vals[i] = mexp2.eval(targets)
err = np.linalg.norm(m2m_vals[1] - m2m_vals[0]) \
/ np.linalg.norm(m2m_vals[1])
eoc_rec.add_data_point(furthest_source, err)
logger.info("\n%s", eoc_rec)
assert eoc_rec.order_estimate() >= order + 1
# }}}
# You can test individual routines by typing
# $ python test_kernels.py 'test_p2p(cl.create_some_context)'
# $ python test_kernels.py 'test_p2p(_acf, True)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2018 Alexandru Fikl"
......@@ -22,247 +23,252 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import sys
import numpy as np
import numpy.linalg as la
import pytest
import pyopencl as cl
import pyopencl.array # noqa
from arraycontext import pytest_generate_tests_for_array_contexts
from sumpy.tools import vector_to_device
from sumpy.tools import MatrixBlockIndexRanges
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
import pytest
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
import logging
logger = logging.getLogger(__name__)
try:
import faulthandler
except ImportError:
pass
else:
faulthandler.enable()
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
def _build_geometry(queue, n, mode, target_radius=1.0):
# parametrize circle
t = np.linspace(0.0, 2.0 * np.pi, n, endpoint=False)
unit_circle = np.exp(1j * t)
unit_circle = np.array([unit_circle.real, unit_circle.imag])
def _build_geometry(actx, ntargets, nsources, mode, target_radius=1.0):
# source points
t = np.linspace(0.0, 2.0 * np.pi, nsources, endpoint=False)
sources = np.array([np.cos(t), np.sin(t)])
# create density
# density
sigma = np.cos(mode * t)
# create sources and targets
h = 2.0 * np.pi / n
targets = target_radius * unit_circle
sources = unit_circle
# target points
t = np.linspace(0.0, 2.0 * np.pi, ntargets, endpoint=False)
targets = target_radius * np.array([np.cos(t), np.sin(t)])
# target centers and expansion radii
h = 2.0 * np.pi * target_radius / ntargets
radius = 7.0 * h
centers = unit_circle * (1.0 - radius)
expansion_radii = radius * np.ones(n)
return (cl.array.to_device(queue, targets),
cl.array.to_device(queue, sources),
vector_to_device(queue, centers),
cl.array.to_device(queue, expansion_radii),
cl.array.to_device(queue, sigma))
def _build_block_index(queue, nnodes, nblks, factor):
indices = np.arange(0, nnodes)
ranges = np.arange(0, nnodes + 1, nnodes // nblks)
if abs(factor - 1.0) < 1.0e-14:
ranges_ = ranges
indices_ = indices
centers = (1.0 - radius) * targets
expansion_radii = np.full(ntargets, radius)
return (actx.from_numpy(targets),
actx.from_numpy(sources),
actx.from_numpy(centers),
actx.from_numpy(expansion_radii),
actx.from_numpy(sigma))
def _build_subset_indices(actx, ntargets, nsources, factor):
tgtindices = np.arange(0, ntargets)
srcindices = np.arange(0, nsources)
rng = np.random.default_rng()
if abs(factor - 1.0) > 1.0e-14:
tgtindices = rng.choice(tgtindices,
size=int(factor * ntargets), replace=False)
srcindices = rng.choice(srcindices,
size=int(factor * nsources), replace=False)
else:
indices_ = np.empty(ranges.shape[0] - 1, dtype=np.object)
for i in range(ranges.shape[0] - 1):
iidx = indices[np.s_[ranges[i]:ranges[i + 1]]]
indices_[i] = np.sort(np.random.choice(iidx,
size=int(factor * len(iidx)), replace=False))
rng.shuffle(tgtindices)
rng.shuffle(srcindices)
ranges_ = np.cumsum([0] + [r.shape[0] for r in indices_])
indices_ = np.hstack(indices_)
tgtindices, srcindices = np.meshgrid(tgtindices, srcindices)
return (
actx.freeze(actx.from_numpy(tgtindices.ravel())),
actx.freeze(actx.from_numpy(srcindices.ravel())))
from sumpy.tools import BlockIndexRanges
return BlockIndexRanges(queue.context,
cl.array.to_device(queue, indices_).with_queue(None),
cl.array.to_device(queue, ranges_).with_queue(None))
# {{{ test_qbx_direct
@pytest.mark.parametrize('factor', [1.0, 0.6])
@pytest.mark.parametrize('lpot_id', [1, 2])
def test_qbx_direct(ctx_getter, factor, lpot_id):
logging.basicConfig(level=logging.INFO)
@pytest.mark.parametrize("factor", [1.0, 0.6])
@pytest.mark.parametrize("lpot_id", [1, 2])
def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx = actx_factory()
ndim = 2
nblks = 10
order = 12
mode_nr = 25
from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative
from sumpy.kernel import DirectionalSourceDerivative, LaplaceKernel
if lpot_id == 1:
knl = LaplaceKernel(ndim)
base_knl = LaplaceKernel(ndim)
knl = base_knl
elif lpot_id == 2:
knl = LaplaceKernel(ndim)
knl = DirectionalSourceDerivative(knl, dir_vec_name="dsource_vec")
base_knl = LaplaceKernel(ndim)
knl = DirectionalSourceDerivative(base_knl, dir_vec_name="dsource_vec")
else:
raise ValueError("unknow lpot_id")
raise ValueError(f"unknown lpot_id: {lpot_id}")
from sumpy.expansion.local import LineTaylorLocalExpansion
lknl = LineTaylorLocalExpansion(knl, order)
expn = LineTaylorLocalExpansion(knl, order)
from sumpy.qbx import LayerPotential
lpot = LayerPotential(ctx, [lknl])
lpot = LayerPotential(actx.context, expansion=expn, source_kernels=(knl,),
target_kernels=(base_knl,))
from sumpy.qbx import LayerPotentialMatrixGenerator
mat_gen = LayerPotentialMatrixGenerator(ctx, [lknl])
mat_gen = LayerPotentialMatrixGenerator(actx.context,
expansion=expn,
source_kernels=(knl,),
target_kernels=(base_knl,))
from sumpy.qbx import LayerPotentialMatrixBlockGenerator
blk_gen = LayerPotentialMatrixBlockGenerator(ctx, [lknl])
from sumpy.qbx import LayerPotentialMatrixSubsetGenerator
blk_gen = LayerPotentialMatrixSubsetGenerator(actx.context,
expansion=expn,
source_kernels=(knl,),
target_kernels=(base_knl,))
for n in [200, 300, 400]:
targets, sources, centers, expansion_radii, sigma = \
_build_geometry(queue, n, mode_nr, target_radius=1.2)
_build_geometry(actx, n, n, mode_nr, target_radius=1.2)
h = 2 * np.pi / n
strengths = (sigma * h,)
tgtindices = _build_block_index(queue, n, nblks, factor)
srcindices = _build_block_index(queue, n, nblks, factor)
index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
tgtindices, srcindices = _build_subset_indices(actx,
ntargets=n, nsources=n, factor=factor)
extra_kwargs = {}
if lpot_id == 2:
extra_kwargs["dsource_vec"] = \
vector_to_device(queue, np.ones((ndim, n)))
from pytools.obj_array import make_obj_array
extra_kwargs["dsource_vec"] = (
actx.from_numpy(make_obj_array(np.ones((ndim, n))))
)
_, (result_lpot,) = lpot(queue,
_, (result_lpot,) = lpot(actx.queue,
targets=targets,
sources=sources,
centers=centers,
expansion_radii=expansion_radii,
strengths=strengths, **extra_kwargs)
result_lpot = result_lpot.get()
result_lpot = actx.to_numpy(result_lpot)
_, (mat,) = mat_gen(queue,
_, (mat,) = mat_gen(actx.queue,
targets=targets,
sources=sources,
centers=centers,
expansion_radii=expansion_radii, **extra_kwargs)
mat = mat.get()
result_mat = mat.dot(strengths[0].get())
mat = actx.to_numpy(mat)
result_mat = mat @ actx.to_numpy(strengths[0])
_, (blk,) = blk_gen(queue,
_, (blk,) = blk_gen(actx.queue,
targets=targets,
sources=sources,
centers=centers,
expansion_radii=expansion_radii,
index_set=index_set, **extra_kwargs)
blk = blk.get()
tgtindices=tgtindices,
srcindices=srcindices, **extra_kwargs)
blk = actx.to_numpy(blk)
rowindices = index_set.linear_row_indices.get(queue)
colindices = index_set.linear_col_indices.get(queue)
tgtindices = actx.to_numpy(tgtindices)
srcindices = actx.to_numpy(srcindices)
eps = 1.0e-10 * la.norm(result_lpot)
assert la.norm(result_mat - result_lpot) < eps
assert la.norm(blk - mat[rowindices, colindices]) < eps
assert la.norm(blk - mat[tgtindices, srcindices]) < eps
# }}}
# {{{ test_p2p_direct
@pytest.mark.parametrize("exclude_self", [True, False])
@pytest.mark.parametrize("factor", [1.0, 0.6])
@pytest.mark.parametrize('lpot_id', [1, 2])
def test_p2p_direct(ctx_getter, exclude_self, factor, lpot_id):
logging.basicConfig(level=logging.INFO)
@pytest.mark.parametrize("lpot_id", [1, 2])
def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False):
if visualize:
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx = actx_factory()
ndim = 2
nblks = 10
mode_nr = 25
from sumpy.kernel import LaplaceKernel, DirectionalSourceDerivative
from sumpy.kernel import DirectionalSourceDerivative, LaplaceKernel
if lpot_id == 1:
lknl = LaplaceKernel(ndim)
elif lpot_id == 2:
lknl = LaplaceKernel(ndim)
lknl = DirectionalSourceDerivative(lknl, dir_vec_name="dsource_vec")
else:
raise ValueError("unknow lpot_id")
raise ValueError(f"unknown lpot_id: '{lpot_id}'")
from sumpy.p2p import P2P
lpot = P2P(ctx, [lknl], exclude_self=exclude_self)
lpot = P2P(actx.context, [lknl], exclude_self=exclude_self)
from sumpy.p2p import P2PMatrixGenerator
mat_gen = P2PMatrixGenerator(ctx, [lknl], exclude_self=exclude_self)
mat_gen = P2PMatrixGenerator(actx.context, [lknl], exclude_self=exclude_self)
from sumpy.p2p import P2PMatrixBlockGenerator
blk_gen = P2PMatrixBlockGenerator(ctx, [lknl], exclude_self=exclude_self)
from sumpy.p2p import P2PMatrixSubsetGenerator
blk_gen = P2PMatrixSubsetGenerator(
actx.context, [lknl], exclude_self=exclude_self)
for n in [200, 300, 400]:
targets, sources, _, _, sigma = \
_build_geometry(queue, n, mode_nr, target_radius=1.2)
targets, sources, _, _, sigma = (
_build_geometry(actx, n, n, mode_nr, target_radius=1.2))
h = 2 * np.pi / n
strengths = (sigma * h,)
tgtindices = _build_block_index(queue, n, nblks, factor)
srcindices = _build_block_index(queue, n, nblks, factor)
index_set = MatrixBlockIndexRanges(ctx, tgtindices, srcindices)
tgtindices, srcindices = _build_subset_indices(actx,
ntargets=n, nsources=n, factor=factor)
extra_kwargs = {}
if exclude_self:
extra_kwargs["target_to_source"] = \
cl.array.arange(queue, 0, n, dtype=np.int)
extra_kwargs["target_to_source"] = (
actx.from_numpy(np.arange(n, dtype=np.int32))
)
if lpot_id == 2:
extra_kwargs["dsource_vec"] = \
vector_to_device(queue, np.ones((ndim, n)))
from pytools.obj_array import make_obj_array
extra_kwargs["dsource_vec"] = (
actx.from_numpy(make_obj_array(np.ones((ndim, n)))))
_, (result_lpot,) = lpot(queue,
_, (result_lpot,) = lpot(actx.queue,
targets=targets,
sources=sources,
strength=strengths, **extra_kwargs)
result_lpot = result_lpot.get()
result_lpot = actx.to_numpy(result_lpot)
_, (mat,) = mat_gen(queue,
_, (mat,) = mat_gen(actx.queue,
targets=targets,
sources=sources, **extra_kwargs)
mat = mat.get()
result_mat = mat.dot(strengths[0].get())
mat = actx.to_numpy(mat)
result_mat = mat @ actx.to_numpy(strengths[0])
_, (blk,) = blk_gen(queue,
_, (blk,) = blk_gen(actx.queue,
targets=targets,
sources=sources,
index_set=index_set, **extra_kwargs)
blk = blk.get()
tgtindices=tgtindices,
srcindices=srcindices, **extra_kwargs)
blk = actx.to_numpy(blk)
tgtindices = actx.to_numpy(tgtindices)
srcindices = actx.to_numpy(srcindices)
eps = 1.0e-10 * la.norm(result_lpot)
assert la.norm(result_mat - result_lpot) < eps
assert la.norm(blk - mat[tgtindices, srcindices]) < eps
index_set = index_set.get(queue)
for i in range(index_set.nblocks):
assert la.norm(index_set.block_take(blk, i)
- index_set.take(mat, i)) < eps
# }}}
# You can test individual routines by typing
# $ python test_kernels.py 'test_p2p(cl.create_some_context)'
# $ python test_matrixgen.py 'test_p2p_direct(_acf, True, 1.0, 1, visualize=True)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2017 Andreas Kloeckner"
......@@ -22,64 +23,121 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import numpy as np
import numpy.linalg as la
import logging
import sys
import sumpy.toys as t
from collections.abc import Callable
from dataclasses import dataclass
from typing import Any
import numpy as np
import numpy.linalg as la
import pytest
import pyopencl as cl # noqa: F401
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
from pytools import Record
from sumpy.kernel import (LaplaceKernel, HelmholtzKernel,
BiharmonicKernel, YukawaKernel)
from arraycontext import pytest_generate_tests_for_array_contexts
import sumpy.symbolic as sym
import sumpy.toys as t
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
from sumpy.expansion import (
FullExpansionTermsWrangler,
LinearPDEBasedExpansionTermsWrangler,
)
from sumpy.expansion.diff_op import (
as_scalar_pde,
concat,
curl,
diff,
divergence,
gradient,
laplacian,
make_identity_diff_op,
)
from sumpy.kernel import (
BiharmonicKernel,
ElasticityKernel,
ExpressionKernel,
HelmholtzKernel,
LaplaceKernel,
LineOfCompressionKernel,
StokesletKernel,
StressletKernel,
YukawaKernel,
)
# {{{ pde check for kernels
class BiharmonicKernelInfo:
def __init__(self, dim):
self.kernel = BiharmonicKernel(dim)
self.extra_kwargs = {}
logger = logging.getLogger(__name__)
@staticmethod
def pde_func(cp, pot):
return cp.laplace(cp.laplace(pot))
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
nderivs = 4
# {{{ pde check for kernels
class YukawaKernelInfo:
def __init__(self, dim, lam):
self.kernel = YukawaKernel(dim)
self.lam = lam
self.extra_kwargs = {"lam": lam}
class KernelInfo:
def __init__(self, kernel, **kwargs):
self.kernel = kernel
self.extra_kwargs = kwargs
diff_op = self.kernel.get_pde_as_diff_op()
assert len(diff_op.eqs) == 1
eq = diff_op.eqs[0]
self.eq = eq
def pde_func(self, cp, pot):
return cp.laplace(pot) - self.lam**2*pot
subs_dict = {sym.Symbol(k): v for k, v in self.extra_kwargs.items()}
result = 0
for ident, coeff in self.eq.items():
lresult = pot
for axis, nderivs in enumerate(ident.mi):
lresult = cp.diff(axis, lresult, nderivs)
result += lresult*float(sym.sympify(coeff).xreplace(subs_dict))
return result
nderivs = 2
@property
def nderivs(self):
return max(sum(ident.mi) for ident in self.eq)
@pytest.mark.parametrize("knl_info", [
BiharmonicKernelInfo(2),
BiharmonicKernelInfo(3),
YukawaKernelInfo(2, 5),
KernelInfo(BiharmonicKernel(2)),
KernelInfo(BiharmonicKernel(3)),
KernelInfo(YukawaKernel(2), lam=5),
KernelInfo(YukawaKernel(3), lam=5),
KernelInfo(LaplaceKernel(2)),
KernelInfo(LaplaceKernel(3)),
KernelInfo(HelmholtzKernel(2), k=5),
KernelInfo(HelmholtzKernel(3), k=5),
KernelInfo(StokesletKernel(2, 0, 1), mu=5),
KernelInfo(StokesletKernel(2, 1, 1), mu=5),
KernelInfo(StokesletKernel(3, 0, 1), mu=5),
KernelInfo(StokesletKernel(3, 1, 1), mu=5),
KernelInfo(StressletKernel(2, 0, 0, 0), mu=5),
KernelInfo(StressletKernel(2, 0, 0, 1), mu=5),
KernelInfo(StressletKernel(3, 0, 0, 0), mu=5),
KernelInfo(StressletKernel(3, 0, 0, 1), mu=5),
KernelInfo(StressletKernel(3, 0, 1, 2), mu=5),
KernelInfo(ElasticityKernel(2, 0, 1), mu=5, nu=0.2),
KernelInfo(ElasticityKernel(2, 0, 0), mu=5, nu=0.2),
KernelInfo(ElasticityKernel(3, 0, 1), mu=5, nu=0.2),
KernelInfo(ElasticityKernel(3, 0, 0), mu=5, nu=0.2),
KernelInfo(LineOfCompressionKernel(3, 0), mu=5, nu=0.2),
KernelInfo(LineOfCompressionKernel(3, 1), mu=5, nu=0.2),
])
def test_pde_check_kernels(ctx_factory, knl_info, order=5):
def test_pde_check_kernels(actx_factory, knl_info, order=5):
actx = actx_factory()
dim = knl_info.kernel.dim
tctx = t.ToyContext(ctx_factory(), knl_info.kernel,
tctx = t.ToyContext(actx.context, knl_info.kernel,
extra_source_kwargs=knl_info.extra_kwargs)
rng = np.random.default_rng(42)
pt_src = t.PointSources(
tctx,
np.random.rand(dim, 50) - 0.5,
rng.random(size=(dim, 50)) - 0.5,
np.ones(50))
from pytools.convergence import EOCRecorder
from sumpy.point_calculus import CalculusPatch
eoc_rec = EOCRecorder()
......@@ -92,17 +150,20 @@ def test_pde_check_kernels(ctx_factory, knl_info, order=5):
err = la.norm(pde)
eoc_rec.add_data_point(h, err)
print(eoc_rec)
logger.info("eoc:\n%s", eoc_rec)
assert eoc_rec.order_estimate() > order - knl_info.nderivs + 1 - 0.1
# }}}
# {{{ test_pde_check
@pytest.mark.parametrize("dim", [1, 2, 3])
def test_pde_check(dim, order=4):
from sumpy.point_calculus import CalculusPatch
from pytools.convergence import EOCRecorder
from sumpy.point_calculus import CalculusPatch
for iaxis in range(dim):
eoc_rec = EOCRecorder()
for h in [0.1, 0.01, 0.001]:
......@@ -113,15 +174,19 @@ def test_pde_check(dim, order=4):
err = la.norm(df_num-df_true)
eoc_rec.add_data_point(h, err)
print(eoc_rec)
logger.info("eoc:\n%s", eoc_rec)
assert eoc_rec.order_estimate() > order-2-0.1
# }}}
# {{{ test_order_finder
@dataclass(frozen=True)
class FakeTree:
def __init__(self, dimensions, root_extent, stick_out_factor):
self.dimensions = dimensions
self.root_extent = root_extent
self.stick_out_factor = stick_out_factor
dimensions: int
root_extent: float
stick_out_factor: float
@pytest.mark.parametrize("knl", [
......@@ -136,7 +201,7 @@ def test_order_finder(knl):
orders = [
ofind(knl, frozenset([("k", 5)]), tree, level)
for level in range(30)]
print(orders)
logger.info("orders: %s", orders)
# Order should not increase with level
assert (np.diff(orders) <= 0).all()
......@@ -155,11 +220,13 @@ def test_fmmlib_order_finder(knl):
orders = [
ofind(knl, frozenset([("k", 5)]), tree, level)
for level in range(30)]
print(orders)
logger.info("orders: %s", orders)
# Order should not increase with level
assert (np.diff(orders) <= 0).all()
# }}}
# {{{ expansion toys p2e2e2p test cases
......@@ -168,39 +235,21 @@ def approx_convergence_factor(orders, errors):
return np.exp(poly[0])
class P2E2E2PTestCase(Record):
@dataclass(frozen=True)
class P2E2E2PTestCase:
source: np.ndarray
target: np.ndarray
center1: np.ndarray
center2: np.ndarray
expansion1: Callable[..., Any]
expansion2: Callable[..., Any]
conv_factor: str
m2l_use_fft: bool = False
@property
def dim(self):
return len(self.source)
@staticmethod
def eval(expr, source, center1, center2, target):
from pymbolic import parse, evaluate
context = {
"s": source,
"c1": center1,
"c2": center2,
"t": target,
"norm": la.norm}
return evaluate(parse(expr), context)
def __init__(self,
source, center1, center2, target, expansion1, expansion2, conv_factor):
if isinstance(conv_factor, str):
conv_factor = self.eval(conv_factor, source, center1, center2, target)
Record.__init__(self,
source=source,
center1=center1,
center2=center2,
target=target,
expansion1=expansion1,
expansion2=expansion2,
conv_factor=conv_factor)
P2E2E2P_TEST_CASES = (
# local to local, 3D
......@@ -232,34 +281,55 @@ P2E2E2P_TEST_CASES = (
expansion1=t.multipole_expand,
expansion2=t.local_expand,
conv_factor="norm(t-c2)/(norm(c2-c1)-norm(c1-s))"),
# multipole to local, 3D with FFT
P2E2E2PTestCase(
source=np.array([-2., 2., 1.]),
center1=np.array([-2., 5., 3.]),
center2=np.array([0., 0., 0.]),
target=np.array([0., 0., -1]),
expansion1=t.multipole_expand,
expansion2=t.local_expand,
m2l_use_fft=True,
conv_factor="norm(t-c2)/(norm(c2-c1)-norm(c1-s))"),
)
# }}}
# {{{ test_toy_p2e2e2p
ORDERS_P2E2E2P = (3, 4, 5)
RTOL_P2E2E2P = 1e-2
@pytest.mark.parametrize("case", P2E2E2P_TEST_CASES)
def test_toy_p2e2e2p(ctx_getter, case):
def test_toy_p2e2e2p(actx_factory, case):
dim = case.dim
src = case.source.reshape(dim, -1)
tgt = case.target.reshape(dim, -1)
if not 0 <= case.conv_factor <= 1:
from pymbolic import evaluate, parse
case_conv_factor = evaluate(parse(case.conv_factor), {
"s": case.source,
"c1": case.center1,
"c2": case.center2,
"t": case.target,
"norm": la.norm,
})
if not 0 <= case_conv_factor <= 1:
raise ValueError(
"convergence factor not in valid range: %e" % case.conv_factor)
f"convergence factor not in valid range: {case_conv_factor}")
from sumpy.expansion.local import VolumeTaylorLocalExpansion
from sumpy.expansion.multipole import VolumeTaylorMultipoleExpansion
from sumpy.expansion import VolumeTaylorExpansionFactory
cl_ctx = ctx_getter()
ctx = t.ToyContext(cl_ctx,
actx = actx_factory()
ctx = t.ToyContext(actx.context,
LaplaceKernel(dim),
VolumeTaylorMultipoleExpansion,
VolumeTaylorLocalExpansion)
expansion_factory=VolumeTaylorExpansionFactory(),
m2l_use_fft=case.m2l_use_fft)
errors = []
......@@ -273,18 +343,257 @@ def test_toy_p2e2e2p(ctx_getter, case):
errors.append(np.abs(pot_actual - pot_p2e2e2p))
conv_factor = approx_convergence_factor(1 + np.array(ORDERS_P2E2E2P), errors)
assert conv_factor <= min(1, case.conv_factor * (1 + RTOL_P2E2E2P)), \
(conv_factor, case.conv_factor * (1 + RTOL_P2E2E2P))
assert conv_factor <= min(1, case_conv_factor * (1 + RTOL_P2E2E2P)), \
(conv_factor, case_conv_factor * (1 + RTOL_P2E2E2P))
# }}}
# {{{ test_cse_matvec
def test_cse_matvec():
from sumpy.expansion import CSEMatVecOperator
input_coeffs = [
[(0, 2)],
[],
[(1, 1)],
[(1, 9)],
]
output_coeffs = [
[],
[(0, 3)],
[],
[(2, 7), (1, 5)],
]
op = CSEMatVecOperator(input_coeffs, output_coeffs, shape=(4, 2))
m = np.array([[2, 0], [6, 0], [0, 1], [30, 16]])
rng = np.random.default_rng(42)
vec = rng.random(2)
expected_result = m @ vec
actual_result = op.matvec(vec)
assert np.allclose(expected_result, actual_result)
vec = rng.random(4)
expected_result = m.T @ vec
actual_result = op.transpose_matvec(vec)
assert np.allclose(expected_result, actual_result)
# }}}
# {{{ test_diff_op_stokes
def test_diff_op_stokes():
from sumpy.symbolic import Function, symbols
diff_op = make_identity_diff_op(3, 4)
u = diff_op[:3]
p = diff_op[3]
pde = concat(laplacian(u) - gradient(p), divergence(u))
actual_output = pde.to_sym()
x, y, z = syms = symbols("x0, x1, x2")
funcs = symbols("f0, f1, f2, f3", cls=Function)
u, v, w, p = (f(*syms) for f in funcs)
eq1 = u.diff(x, x) + u.diff(y, y) + u.diff(z, z) - p.diff(x)
eq2 = v.diff(x, x) + v.diff(y, y) + v.diff(z, z) - p.diff(y)
eq3 = w.diff(x, x) + w.diff(y, y) + w.diff(z, z) - p.diff(z)
eq4 = u.diff(x) + v.diff(y) + w.diff(z)
expected_output = [eq1, eq2, eq3, eq4]
assert expected_output == actual_output
# }}}
# {{{ test_as_scalar_pde_stokes
def test_as_scalar_pde_stokes():
diff_op = make_identity_diff_op(3, 4)
u = diff_op[:3]
p = diff_op[3]
pde = concat(laplacian(u) - gradient(p), divergence(u))
# velocity components in Stokes should satisfy Biharmonic
for i in range(3):
logger.info("pde\n%s", as_scalar_pde(pde, i))
logger.info("\n%s", laplacian(laplacian(u[i])))
assert as_scalar_pde(pde, i) == laplacian(laplacian(u[0]))
# pressure should satisfy Laplace
assert as_scalar_pde(pde, 3) == laplacian(u[0])
# }}}
# {{{ test_as_scalar_pde_maxwell
def test_as_scalar_pde_maxwell():
from sumpy.symbolic import symbols
op = make_identity_diff_op(3, 6, time_dependent=True)
E = op[:3] # noqa: N806
B = op[3:] # noqa: N806
mu, epsilon = symbols("mu, epsilon")
t = (0, 0, 0, 1)
pde = concat(curl(E) + diff(B, t), curl(B) - mu*epsilon*diff(E, t),
divergence(E), divergence(B))
as_scalar_pde(pde, 3)
for i in range(6):
assert as_scalar_pde(pde, i) == \
laplacian(op[0]) - mu*epsilon*diff(diff(op[0], t), t)
# }}}
# {{{ test_as_scalar_pde_elasticity
def test_as_scalar_pde_elasticity():
# Ref: https://doi.org/10.1006/jcph.1996.0102
diff_op = make_identity_diff_op(2, 5)
sigma_x = diff_op[0]
sigma_y = diff_op[1]
tau = diff_op[2]
u = diff_op[3]
v = diff_op[4]
# Use numeric values as the expressions grow exponentially large otherwise
from sumpy.symbolic import symbols
lam, mu = symbols("lam, mu")
x = (1, 0)
y = (0, 1)
exprs = [
diff(sigma_x, x) + diff(tau, y),
diff(tau, x) + diff(sigma_y, y),
sigma_x - (lam + 2*mu)*diff(u, x) - lam*diff(v, y),
sigma_y - (lam + 2*mu)*diff(v, y) - lam*diff(u, x),
tau - mu*(diff(u, y) + diff(v, x)),
]
pde = concat(*exprs)
assert pde.order == 1
for i in range(5):
scalar_pde = as_scalar_pde(pde, i)
assert scalar_pde == laplacian(laplacian(diff_op[0]))
assert scalar_pde.order == 4
# }}}
# {{{ test_elasticity_new
def test_elasticity_new():
from pickle import dumps, loads
stokes_knl = StokesletKernel(3, 0, 1, "mu1", 0.5)
stokes_knl2 = ElasticityKernel(3, 0, 1, "mu1", 0.5)
elasticity_knl = ElasticityKernel(3, 0, 1, "mu1", "nu")
elasticity_helper_knl = LineOfCompressionKernel(3, 0, "mu1", "nu")
assert isinstance(stokes_knl2, StokesletKernel)
assert stokes_knl == stokes_knl2
assert loads(dumps(stokes_knl)) == stokes_knl
for knl in [elasticity_knl, elasticity_helper_knl]:
assert not isinstance(knl, StokesletKernel)
assert loads(dumps(knl)) == knl
# }}}
# {{{ test_weird_kernel
w = make_identity_diff_op(2)
pdes = [
diff(w, (1, 1)) + diff(w, (2, 0)),
diff(w, (1, 1)) + diff(w, (0, 2)),
]
@pytest.mark.parametrize("pde", pdes)
def test_weird_kernel(pde):
class MyKernel(ExpressionKernel):
def __init__(self):
super().__init__(dim=2, expression=1, global_scaling_const=1,
is_complex_valued=False)
def get_pde_as_diff_op(self):
return pde
from functools import reduce
from operator import mul
from sumpy.expansion import LinearPDEConformingVolumeTaylorExpansion
knl = MyKernel()
order = 10
expn = LinearPDEConformingVolumeTaylorExpansion(kernel=knl,
order=order, use_rscale=False)
coeffs = expn.get_coefficient_identifiers()
fft_size = reduce(mul, map(max, *coeffs), 1)
assert fft_size == order
# }}}
# {{{ test_get_storage_index
class StorageIndexTestKernel(ExpressionKernel):
def __init__(self, dim, max_mi):
super().__init__(dim=dim, expression=1, global_scaling_const=1,
is_complex_valued=False)
self._max_mi = max_mi
def get_pde_as_diff_op(self):
w = make_identity_diff_op(self.dim)
pde = diff(w, tuple(self._max_mi))
return pde
@pytest.mark.parametrize("order", [6])
@pytest.mark.parametrize("knl", [
LaplaceKernel(2),
LaplaceKernel(3),
StorageIndexTestKernel(2, (3, 0)),
StorageIndexTestKernel(2, (0, 3)),
StorageIndexTestKernel(3, (3, 0, 0)),
StorageIndexTestKernel(3, (0, 3, 0)),
StorageIndexTestKernel(3, (0, 0, 3)),
BiharmonicKernel(2),
BiharmonicKernel(3),
])
@pytest.mark.parametrize("compressed", (True, False))
def test_get_storage_index(order, knl, compressed):
dim = knl.dim
if compressed:
wrangler = LinearPDEBasedExpansionTermsWrangler(order, dim, knl=knl)
else:
wrangler = FullExpansionTermsWrangler(order, dim)
for i, mi in enumerate(wrangler.get_coefficient_identifiers()):
assert i == wrangler.get_storage_index(mi)
# }}}
# You can test individual routines by typing
# $ python test_misc.py 'test_p2p(cl.create_some_context)'
# $ python test_misc.py 'test_pde_check_kernels(_acf,
# KernelInfo(HelmholtzKernel(2), k=5), order=5)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import division, absolute_import, print_function
from __future__ import annotations
__copyright__ = "Copyright (C) 2017 Matt Wala"
......@@ -22,31 +23,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import numpy as np
import logging
import sys
import pyopencl as cl
from pyopencl.tools import ( # noqa
pytest_generate_tests_for_pyopencl as pytest_generate_tests)
import numpy as np
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
from sumpy.expansion.local import LineTaylorLocalExpansion, VolumeTaylorLocalExpansion
import logging
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
try:
import faulthandler
except ImportError:
pass
else:
faulthandler.enable()
# {{{ test_direct_qbx_vs_eigval
def test_direct(ctx_getter):
# This evaluates a single layer potential on a circle.
logging.basicConfig(level=logging.INFO)
@pytest.mark.parametrize("expn_class", [
LineTaylorLocalExpansion,
VolumeTaylorLocalExpansion,
])
def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False):
"""This evaluates a single layer potential on a circle using a known
eigenvalue/eigenvector combination.
"""
if visualize:
logging.basicConfig(level=logging.INFO)
ctx = ctx_getter()
queue = cl.CommandQueue(ctx)
actx = actx_factory()
from sumpy.kernel import LaplaceKernel
lknl = LaplaceKernel(2)
......@@ -54,9 +63,11 @@ def test_direct(ctx_getter):
order = 12
from sumpy.qbx import LayerPotential
from sumpy.expansion.local import LineTaylorLocalExpansion
lpot = LayerPotential(ctx, [LineTaylorLocalExpansion(lknl, order)])
lpot = LayerPotential(actx.context,
expansion=expn_class(lknl, order),
target_kernels=(lknl,),
source_kernels=(lknl,))
mode_nr = 25
......@@ -76,34 +87,118 @@ def test_direct(ctx_getter):
h = 2 * np.pi / n
targets = unit_circle
sources = unit_circle
targets = actx.from_numpy(unit_circle)
sources = actx.from_numpy(unit_circle)
radius = 7 * h
centers = unit_circle * (1 - radius)
expansion_radii = np.ones(n) * radius
centers = actx.from_numpy((1 - radius) * unit_circle)
expansion_radii = actx.from_numpy(radius * np.ones(n))
strengths = (actx.from_numpy(sigma * h),)
strengths = (sigma * h,)
evt, (result_qbx,) = lpot(queue, targets, sources, centers, strengths,
_evt, (result_qbx,) = lpot(
actx.queue,
targets, sources, centers, strengths,
expansion_radii=expansion_radii)
result_qbx = actx.to_numpy(result_qbx)
eocrec.add_data_point(h, np.max(np.abs(result_ref - result_qbx)))
print(eocrec)
logger.info("eoc:\n%s", eocrec)
slack = 1.5
assert eocrec.order_estimate() > order - slack
# }}}
# {{{ test_direct_qbx_vs_eigval_with_tgt_deriv
@pytest.mark.parametrize("expn_class", [
LineTaylorLocalExpansion,
VolumeTaylorLocalExpansion,
])
def test_direct_qbx_vs_eigval_with_tgt_deriv(
actx_factory, expn_class, visualize=False):
"""This evaluates a single layer potential on a circle using a known
eigenvalue/eigenvector combination.
"""
if visualize:
logging.basicConfig(level=logging.INFO)
actx = actx_factory()
from sumpy.kernel import AxisTargetDerivative, LaplaceKernel
lknl = LaplaceKernel(2)
order = 8
from sumpy.qbx import LayerPotential
lpot_dx = LayerPotential(actx.context, expansion=expn_class(lknl, order),
target_kernels=(AxisTargetDerivative(0, lknl),), source_kernels=(lknl,))
lpot_dy = LayerPotential(actx.context, expansion=expn_class(lknl, order),
target_kernels=(AxisTargetDerivative(1, lknl),), source_kernels=(lknl,))
mode_nr = 15
from pytools.convergence import EOCRecorder
eocrec = EOCRecorder()
for n in [200, 300, 400]:
t = np.linspace(0, 2 * np.pi, n, endpoint=False)
unit_circle = np.exp(1j * t)
unit_circle = np.array([unit_circle.real, unit_circle.imag])
sigma = np.cos(mode_nr * t)
# eigval = 1/(2*mode_nr)
eigval = 0.5
result_ref = eigval * sigma
h = 2 * np.pi / n
targets = actx.from_numpy(unit_circle)
sources = actx.from_numpy(unit_circle)
radius = 7 * h
centers = actx.from_numpy((1 - radius) * unit_circle)
expansion_radii = actx.from_numpy(radius * np.ones(n))
strengths = (actx.from_numpy(sigma * h),)
_evt, (result_qbx_dx,) = lpot_dx(
actx.queue,
targets, sources, centers, strengths,
expansion_radii=expansion_radii)
_evt, (result_qbx_dy,) = lpot_dy(
actx.queue,
targets, sources, centers, strengths,
expansion_radii=expansion_radii)
result_qbx_dx = actx.to_numpy(result_qbx_dx)
result_qbx_dy = actx.to_numpy(result_qbx_dy)
normals = unit_circle
result_qbx = normals[0] * result_qbx_dx + normals[1] * result_qbx_dy
eocrec.add_data_point(h, np.max(np.abs(result_ref - result_qbx)))
if expn_class is not LineTaylorLocalExpansion:
logger.info("eoc:\n%s", eocrec)
slack = 1.5
assert eocrec.order_estimate() > order - slack
# }}}
# You can test individual routines by typing
# $ python test_kernels.py 'test_p2p(cl.create_some_context)'
# $ python test_qbx.py 'test_direct_qbx_vs_eigval(_acf, LineTaylorLocalExpansion)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
from pytest import main
main([__file__])
pytest.main([__file__])
# vim: fdm=marker
from __future__ import annotations
__copyright__ = "Copyright (C) 2020 Isuru Fernando"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import logging
import sys
import numpy as np
import pytest
from arraycontext import pytest_generate_tests_for_array_contexts
import sumpy.symbolic as sym
from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401
from sumpy.tools import (
fft,
fft_toeplitz_upper_triangular,
loopy_fft,
matvec_toeplitz_upper_triangular,
)
logger = logging.getLogger(__name__)
pytest_generate_tests = pytest_generate_tests_for_array_contexts([
PytestPyOpenCLArrayContextFactory,
])
# {{{ test_matvec_fft
def test_matvec_fft():
k = 5
rng = np.random.default_rng(42)
v = rng.random(k)
x = rng.random(k)
fft = fft_toeplitz_upper_triangular(v, x)
matvec = matvec_toeplitz_upper_triangular(v, x)
for i in range(k):
assert abs(fft[i] - matvec[i]) < 1e-14
# }}}
# {{{ test_matvec_fft_small_floats
def test_matvec_fft_small_floats():
k = 5
v = sym.make_sym_vector("v", k)
x = sym.make_sym_vector("x", k)
fft = fft_toeplitz_upper_triangular(v, x)
for expr in fft:
for f in expr.atoms(sym.Float):
if f == 0:
continue
assert abs(f) > 1e-10
# }}}
# {{{ test_fft
@pytest.mark.parametrize("size", [1, 2, 7, 10, 30, 210])
def test_fft(actx_factory, size):
actx = actx_factory()
inp = np.arange(size, dtype=np.complex64)
inp_dev = actx.from_numpy(inp)
out = fft(inp)
fft_func = loopy_fft(inp.shape, inverse=False, complex_dtype=inp.dtype.type)
_evt, (out_dev,) = fft_func(actx.queue, y=inp_dev)
assert np.allclose(actx.to_numpy(out_dev), out)
# }}}
# You can test individual routines by typing
# $ python test_tools.py 'test_fft(_acf, 30)'
if __name__ == "__main__":
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
pytest.main([__file__])
# vim: fdm=marker