diff --git a/examples/layerpot-qbmx.py b/examples/layerpot-qbmx.py new file mode 100644 index 0000000000000000000000000000000000000000..72b4f888c5d0927da27031ad726a4d03d7714363 --- /dev/null +++ b/examples/layerpot-qbmx.py @@ -0,0 +1,126 @@ +from __future__ import division, absolute_import + +enable_mayavi = 0 +if enable_mayavi: + from mayavi import mlab # noqa + +import numpy as np +import pyopencl as cl +from sumpy.visualization import FieldPlotter +from sumpy.kernel import one_kernel_2d, LaplaceKernel, HelmholtzKernel # noqa + +from pytential import bind, sym + +import faulthandler +from six.moves import range +faulthandler.enable() + +import logging +logger = logging.getLogger(__name__) + +cl_ctx = cl.create_some_context() +queue = cl.CommandQueue(cl_ctx) + +target_order = 16 +qbx_order = 3 +nelements = 60 +mode_nr = 3 + +k = 0 +if k: + kernel = HelmholtzKernel(2) + kernel_kwargs = {"k": sym.var("k")} +else: + kernel = LaplaceKernel(2) + kernel_kwargs = {} +#kernel = OneKernel() + +from meshmode.mesh.generation import ( # noqa + make_curve_mesh, starfish, ellipse, drop) +mesh = make_curve_mesh( + #lambda t: ellipse(1, t), + starfish, + np.linspace(0, 1, nelements+1), + target_order) + +from pytential.qbx import QBMXLayerPotentialSource +from meshmode.discretization import Discretization +from meshmode.discretization.poly_element import \ + InterpolatoryQuadratureSimplexGroupFactory + +pre_density_discr = Discretization( + cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order)) + +qbx, _ = QBMXLayerPotentialSource( + pre_density_discr, 4*target_order, qbx_order, + fmm_order=qbx_order+3).with_refinement() + +density_discr = qbx.density_discr + +nodes = density_discr.nodes().with_queue(queue) + +angle = cl.clmath.atan2(nodes[1], nodes[0]) + +d = sym.Derivative() +#op = d.nabla[0] * d(sym.S(kernel, sym.var("sigma"))) +op = sym.D(kernel, sym.var("sigma"), **kernel_kwargs, qbx_forced_limit=1) +#op = sym.S(kernel, sym.var("sigma"), **kernel_kwargs) + +sigma = cl.clmath.cos(mode_nr*angle) +if 0: + sigma = 0*angle + from random import randrange + for i in range(5): + sigma[randrange(len(sigma))] = 1 + +if isinstance(kernel, HelmholtzKernel): + sigma = sigma.astype(np.complex128) + +bound_bdry_op = bind(qbx, op) +#mlab.figure(bgcolor=(1, 1, 1)) +if 1: + fplot = FieldPlotter(np.zeros(2), extent=5, npoints=1000) + from pytential.target import PointsTarget + + targets_dev = cl.array.to_device(queue, fplot.points) + fld_in_vol = bind( + (qbx, PointsTarget(targets_dev)), + op)(queue, sigma=sigma, k=k).get() + + if enable_mayavi: + fplot.show_scalar_in_mayavi(fld_in_vol.real, max_val=5) + else: + fplot.write_vtk_file( + "potential.vts", + [ + ("potential", fld_in_vol) + ] + ) + +if 0: + def apply_op(density): + return bound_bdry_op( + queue, sigma=cl.array.to_device(queue, density), k=k).get() + + from sumpy.tools import build_matrix + n = len(sigma) + mat = build_matrix(apply_op, dtype=np.float64, shape=(n, n)) + + import matplotlib.pyplot as pt + pt.imshow(mat) + pt.colorbar() + pt.show() + +if enable_mayavi: + # {{{ plot boundary field + + fld_on_bdry = bound_bdry_op(queue, sigma=sigma, k=k).get() + + nodes_host = density_discr.nodes().get(queue=queue) + mlab.points3d(nodes_host[0], nodes_host[1], fld_on_bdry.real, scale_factor=0.03) + + # }}} + +if enable_mayavi: + mlab.colorbar() + mlab.show() diff --git a/pytential/qbx/__init__.py b/pytential/qbx/__init__.py index a18196f401ce355cd5c42f2df0416cf5bc08eb88..f5f68a905684ef1c8cbbe91b073842529cacae28 100644 --- a/pytential/qbx/__init__.py +++ b/pytential/qbx/__init__.py @@ -38,9 +38,10 @@ import pyopencl as cl import logging logger = logging.getLogger(__name__) - __doc__ = """ +.. autoclass:: QBXLayerPotentialSourceBase .. autoclass:: QBXLayerPotentialSource +.. autoclass:: QBMXLayerPotentialSource .. autoclass:: QBXTargetAssociationFailedException """ @@ -120,10 +121,10 @@ def get_multipole_expansion_class(base_kernel): return VolumeTaylorMultipoleExpansion -# {{{ QBX layer potential source +# {{{ base class for layer potential sources -class QBXLayerPotentialSource(LayerPotentialSource): - """A source discretization for a QBX layer potential. +class QBXLayerPotentialSourceBase(LayerPotentialSource): + """A source discretization for a QBX-like layer potential. .. attribute :: density_discr .. attribute :: qbx_order @@ -191,7 +192,7 @@ class QBXLayerPotentialSource(LayerPotentialSource): ): # FIXME Could/should share wrangler and geometry kernels # if no relevant changes have been made. - return QBXLayerPotentialSource( + return self.__class__( density_discr=density_discr or self.density_discr, fine_order=( fine_order if fine_order is not None else self.fine_order), @@ -249,7 +250,7 @@ class QBXLayerPotentialSource(LayerPotentialSource): from pytential.qbx.refinement import QBXLayerPotentialSourceRefiner refiner = QBXLayerPotentialSourceRefiner(self.cl_context) from meshmode.discretization.poly_element import ( - InterpolatoryQuadratureSimplexGroupFactory) + InterpolatoryQuadratureSimplexGroupFactory) if target_order is None: target_order = self.density_discr.groups[0].order lpot, connection = refiner(self, @@ -316,12 +317,15 @@ class QBXLayerPotentialSource(LayerPotentialSource): return tuple(panels[d, :] for d in range(mesh.ambient_dim)) @memoize_method - def panel_sizes(self, last_dim_length="nsources"): + def panel_sizes(self, last_dim_length="nsources", discr_type="coarse"): assert last_dim_length in ("nsources", "ncenters", "npanels") + assert discr_type in ("coarse", "fine") # To get the panel size this does the equivalent of (∫ 1 ds)**(1/dim). # FIXME: Kernel optimizations - discr = self.density_discr + discr = (self.density_discr + if discr_type == "coarse" + else self.fine_density_discr) if last_dim_length == "nsources" or last_dim_length == "ncenters": knl = lp.make_kernel( @@ -373,17 +377,50 @@ class QBXLayerPotentialSource(LayerPotentialSource): src1=panel_sizes, src2=panel_sizes) return panel_sizes.with_queue(None) - @memoize_method - def centers(self, sign): - adim = self.density_discr.ambient_dim - dim = self.density_discr.dim + def _centers_impl(self, sign, discr_type="coarse"): + discr = (self.density_discr + if discr_type == "coarse" + else self.fine_density_discr) + + adim = discr.ambient_dim + dim = discr.dim from pytential import sym, bind with cl.CommandQueue(self.cl_context) as queue: - nodes = bind(self.density_discr, sym.nodes(adim))(queue) - normals = bind(self.density_discr, sym.normal(adim, dim=dim))(queue) - panel_sizes = self.panel_sizes().with_queue(queue) - return (nodes + normals * sign * panel_sizes / 2).as_vector(np.object) + nodes = bind(discr, sym.nodes(adim))(queue) + normals = bind(discr, sym.normal(adim, dim=dim))(queue) + #panel_sizes = self.panel_sizes(discr_type=discr_type).with_queue(queue) + + discr = (self.density_discr + if discr_type == "coarse" + else self.fine_density_discr) + + area_elements = bind( + discr, + sym.area_element(ambient_dim=discr.ambient_dim, dim=discr.dim) + )(queue) + avg_area_el = (cl.array.sum(area_elements)/len(area_elements)).get()[()] + + centers = ( + (nodes + normals * sign * avg_area_el) + .as_vector(np.object)) + + if 0: + from meshmode.discretization.visualization import draw_curve + draw_curve(discr) + import matplotlib.pyplot as plt + plt.plot(centers[0].get(), centers[1].get(), 'o') + plt.show() + + return centers + + @memoize_method + def centers(self, sign): + return self._centers_impl(sign) + + @memoize_method + def fine_centers(self, sign): + return self._centers_impl(sign, discr_type="fine") @memoize_method def weights_and_area_elements(self): @@ -404,14 +441,6 @@ class QBXLayerPotentialSource(LayerPotentialSource): return (area_element.with_queue(queue)*qweight).with_queue(None) - def preprocess_optemplate(self, name, discretizations, expr): - """ - :arg name: The symbolic name for *self*, which the preprocessor - should use to find which expressions it is allowed to modify. - """ - from pytential.symbolic.mappers import QBXPreprocessor - return QBXPreprocessor(name, discretizations)(expr) - def op_group_features(self, expr): from sumpy.kernel import AxisTargetDerivativeRemover result = ( @@ -429,8 +458,8 @@ class QBXLayerPotentialSource(LayerPotentialSource): if not self.refined_for_global_qbx: from warnings import warn warn( - "Executing global QBX without refinement. " - "This is unlikely to work.") + "Executing global QBX without refinement. " + "This is unlikely to work.") def evaluate_wrapper(expr): value = evaluate(expr) @@ -443,6 +472,83 @@ class QBXLayerPotentialSource(LayerPotentialSource): return func(queue, insn, bound_expr, evaluate_wrapper) + def exec_layer_potential_insn_fmm(self): + raise NotImplementedError() + + def exec_layer_potential_insn_direct(self): + raise NotImplementedError() + + # {{{ helpers for setting up FMM + + def get_base_kernel(self, kernels): + base_kernel = None + + from sumpy.kernel import AxisTargetDerivativeRemover + for knl in kernels: + candidate_base_kernel = AxisTargetDerivativeRemover()(knl) + + if base_kernel is None: + base_kernel = candidate_base_kernel + else: + assert base_kernel == candidate_base_kernel + + return base_kernel + + def get_value_dtype(self, base_kernel, strengths): + if base_kernel.is_complex_valued or strengths.dtype.kind == "c": + return self.complex_dtype + return self.real_dtype + + def get_extra_kwargs_dictionaries( + self, queue, evaluate, insn, user_source_ids, out_kernels): + # This contains things like the Helmholtz parameter k or + # the normal directions for double layers. + + def reorder_sources(source_array): + if isinstance(source_array, cl.array.Array): + return (source_array + .with_queue(queue) + [user_source_ids] + .with_queue(None)) + else: + return source_array + + kernel_extra_kwargs = {} + source_extra_kwargs = {} + + from sumpy.tools import gather_arguments, gather_source_arguments + from pytools.obj_array import with_object_array_or_scalar + for func, var_dict in [ + (gather_arguments, kernel_extra_kwargs), + (gather_source_arguments, source_extra_kwargs), + ]: + for arg in func(out_kernels): + var_dict[arg.name] = with_object_array_or_scalar( + reorder_sources, + evaluate(insn.kernel_arguments[arg.name])) + + return source_extra_kwargs, kernel_extra_kwargs + + def preprocess_optemplate(self, name, discretizations, expr): + """ + :arg name: The symbolic name for *self*, which the preprocessor + should use to find which expressions it is allowed to modify. + """ + from pytential.symbolic.mappers import QBXPreprocessor + return QBXPreprocessor(name, discretizations)(expr) + + # }}} + +# }}} + + +# {{{ QBX layer potential source + +class QBXLayerPotentialSource(QBXLayerPotentialSourceBase): + + def __init__(self, *args, **kwargs): + QBXLayerPotentialSourceBase.__init__(self, *args, **kwargs) + @property @memoize_method def qbx_fmm_code_getter(self): @@ -450,7 +556,6 @@ class QBXLayerPotentialSource(LayerPotentialSource): return QBXFMMGeometryCodeGetter(self.cl_context, self.ambient_dim, debug=self.debug) - @memoize_method def qbx_fmm_geometry_data(self, target_discrs_and_qbx_sides): """ :arg target_discrs_and_qbx_sides: @@ -532,54 +637,16 @@ class QBXLayerPotentialSource(LayerPotentialSource): # {{{ get expansion wrangler - base_kernel = None - out_kernels = [] - - from sumpy.kernel import AxisTargetDerivativeRemover - for knl in insn.kernels: - candidate_base_kernel = AxisTargetDerivativeRemover()(knl) - - if base_kernel is None: - base_kernel = candidate_base_kernel - else: - assert base_kernel == candidate_base_kernel - - out_kernels = tuple(knl for knl in insn.kernels) - - if base_kernel.is_complex_valued or strengths.dtype.kind == "c": - value_dtype = self.complex_dtype - else: - value_dtype = self.real_dtype - - # {{{ build extra_kwargs dictionaries - - # This contains things like the Helmholtz parameter k or - # the normal directions for double layers. - - def reorder_sources(source_array): - if isinstance(source_array, cl.array.Array): - return (source_array - .with_queue(queue) - [geo_data.tree().user_source_ids] - .with_queue(None)) - else: - return source_array - - kernel_extra_kwargs = {} - source_extra_kwargs = {} - - from sumpy.tools import gather_arguments, gather_source_arguments - from pytools.obj_array import with_object_array_or_scalar - for func, var_dict in [ - (gather_arguments, kernel_extra_kwargs), - (gather_source_arguments, source_extra_kwargs), - ]: - for arg in func(out_kernels): - var_dict[arg.name] = with_object_array_or_scalar( - reorder_sources, - evaluate(insn.kernel_arguments[arg.name])) - - # }}} + base_kernel = self.get_base_kernel(insn.kernels) + out_kernels = tuple(insn.kernels) + value_dtype = self.get_value_dtype(base_kernel, strengths) + source_extra_kwargs, kernel_extra_kwargs = ( + self.get_extra_kwargs_dictionaries( + queue, + evaluate, + insn, + geo_data.tree().user_source_ids, + out_kernels)) wrangler = self.expansion_wrangler_code_container( base_kernel, out_kernels).get_wrangler( @@ -606,8 +673,8 @@ class QBXLayerPotentialSource(LayerPotentialSource): # {{{ execute global QBX - from pytential.qbx.fmm import drive_fmm - all_potentials_on_every_tgt = drive_fmm(wrangler, strengths) + from pytential.qbx.fmm import drive_qbx_fmm + all_potentials_on_every_tgt = drive_qbx_fmm(wrangler, strengths) # }}} @@ -801,9 +868,152 @@ class QBXLayerPotentialSource(LayerPotentialSource): # }}} +# {{{ QBMX layer potential source + +class QBMXLayerPotentialSource(QBXLayerPotentialSourceBase): + + def __init__(self, *args, **kwargs): + QBXLayerPotentialSourceBase.__init__(self, *args, **kwargs) + + # {{{ fmm-based execution + + @property + @memoize_method + def qbx_fmm_code_getter(self): + from pytential.qbx.geometry import QBXFMMGeometryCodeGetter + return QBXFMMGeometryCodeGetter(self.cl_context, self.ambient_dim, + debug=self.debug) + + def qbmx_fmm_geometry_data(self, target_discrs, center_side): + """ + :arg target_discrs: a list of *discr*, + where *discr* is a + :class:`meshmode.discretization.Discretization` + or + :class:`pytential.target.TargetBase` + instance + """ + from pytential.qbx.geometry import QBMXFMMGeometryData + + return QBMXFMMGeometryData(self.qbx_fmm_code_getter, + self, center_side, target_discrs, + debug=self.debug) + + @memoize_method + def expansion_wrangler_code_container(self, base_kernel, out_kernels): + mpole_expn_class = get_multipole_expansion_class(base_kernel) + local_expn_class = get_local_expansion_class(base_kernel) + + from functools import partial + fmm_mpole_factory = partial(mpole_expn_class, base_kernel) + fmm_local_factory = partial(local_expn_class, base_kernel) + qbx_mpole_factory = partial(mpole_expn_class, base_kernel) + + from pytential.qbx.fmm import \ + QBMXExpansionWranglerCodeContainer + return QBMXExpansionWranglerCodeContainer( + self.cl_context, + fmm_mpole_factory, fmm_local_factory, qbx_mpole_factory, + out_kernels) + + def exec_compute_potential_insn_fmm(self, queue, insn, bound_expr, evaluate): + from collections import defaultdict + # maps (interior/exterior) -> list of target discretizations + side_to_targets = defaultdict(list) + # maps (interior/exterior) -> list of outputs for the given discretization + side_to_outputs = defaultdict(list) + # maps (tgt_name, limit) -> list of outputs for the given discretization + tgt_name_and_side_to_outputs = defaultdict(list) + + for o in insn.outputs: + qbx_forced_limit = o.qbx_forced_limit + if qbx_forced_limit is None: + qbx_forced_limit = 0 + + if qbx_forced_limit == 0: + raise NotImplementedError( + "Target side (interior/exterior) " + "must be specified to use QBMX - " + "this needs to be fixed eventually.") + + side = 1 if qbx_forced_limit > 0 else -1 + + key = (o.target_name, o.qbx_forced_limit) + processed = key in tgt_name_and_side_to_outputs + + tgt_name_and_side_to_outputs[key].append(o) + + if processed: + continue + + target_discr = bound_expr.places[o.target_name] + if isinstance(target_discr, LayerPotentialSource): + target_discr = target_discr.density_discr + + side_to_targets[side].append(target_discr) + side_to_outputs[side].append(tgt_name_and_side_to_outputs[key]) + + strengths = (evaluate(insn.density).with_queue(queue) + * self.weights_and_area_elements()) + + result = [] + + for tgt_side in (-1, 1): + relevant_target_discrs = side_to_targets[tgt_side] + + if len(relevant_target_discrs) == 0: + continue + + geo_data = self.qbmx_fmm_geometry_data( + relevant_target_discrs, center_side=-tgt_side) + + # {{{ get wrangler + + base_kernel = self.get_base_kernel(insn.kernels) + out_kernels = tuple(insn.kernels) + value_dtype = self.get_value_dtype(base_kernel, strengths) + source_extra_kwargs, kernel_extra_kwargs = ( + self.get_extra_kwargs_dictionaries( + queue, + evaluate, + insn, + geo_data.tree().user_source_ids, + out_kernels)) + + wrangler = self.expansion_wrangler_code_container( + base_kernel, out_kernels).get_wrangler( + queue, geo_data, value_dtype, + self.qbx_order, + self.fmm_level_to_order, + source_extra_kwargs=source_extra_kwargs, + kernel_extra_kwargs=kernel_extra_kwargs) + + # }}} + + from pytential.qbx.fmm import drive_qbmx_fmm + all_potentials_on_tgt_side = drive_qbmx_fmm(wrangler, strengths) + + for tgt_number, tgt_output_set in (enumerate(side_to_outputs[tgt_side])): + + tgt_slice = slice(*geo_data.target_info().target_discr_starts[ + tgt_number:tgt_number+2]) + + for o in tgt_output_set: + result.append( + (o.name, + all_potentials_on_tgt_side[o.kernel_index][tgt_slice])) + + return result, [] + +# }}} + +# }}} + + __all__ = ( QBXLayerPotentialSource, QBXTargetAssociationFailedException, + QBMXLayerPotentialSource ) # vim: fdm=marker diff --git a/pytential/qbx/fmm.py b/pytential/qbx/fmm.py index e3508b7089d2875c12a77079e763201300b5a30a..46880c0d5604b0e1ccf8bfe2f0cde9d578969d51 100644 --- a/pytential/qbx/fmm.py +++ b/pytential/qbx/fmm.py @@ -30,7 +30,9 @@ import pyopencl.array # noqa from sumpy.fmm import SumpyExpansionWranglerCodeContainer, SumpyExpansionWrangler from pytools import memoize_method -from pytential.qbx.interactions import P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P +from pytential.qbx.interactions import ( + P2QBXLFromCSR, M2QBXL, L2QBXL, QBXL2P, P2QBXM, QBXM2M, QBXM2PFromCSR, + QBXM2LFromCSR) import logging logger = logging.getLogger(__name__) @@ -38,10 +40,13 @@ logger = logging.getLogger(__name__) __doc__ = """ .. autoclass:: QBXExpansionWranglerCodeContainer +.. autoclass:: QBMXExpansionWranglerCodeContainer .. autoclass:: QBXExpansionWrangler +.. autoclass:: QBMXExpansionWrangler -.. autofunction:: drive_fmm +.. autofunction:: drive_qbx_fmm +.. autofunction:: drive_qbmx_fmm """ @@ -126,10 +131,10 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), # {{{ data vector utilities - def potential_zeros(self): - """This ought to be called ``non_qbx_potential_zeros``, but since + def output_zeros(self): + """This ought to be called ``non_qbx_output_zeros``, but since it has to override the superclass's behavior to integrate seamlessly, - it needs to be called just :meth:`potential_zeros`. + it needs to be called just :meth:`output_zeros`. """ nqbtl = self.geo_data.non_qbx_box_target_lists() @@ -142,10 +147,10 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), dtype=self.dtype) for k in self.code.out_kernels]) - def full_potential_zeros(self): + def full_output_zeros(self): # The superclass generates a full field of zeros, for all # (not just non-QBX) targets. - return SumpyExpansionWrangler.potential_zeros(self) + return SumpyExpansionWrangler.output_zeros(self) def qbx_local_expansion_zeros(self): order = self.qbx_order @@ -311,7 +316,7 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), return qbx_expansions def eval_qbx_expansions(self, qbx_expansions): - pot = self.full_potential_zeros() + pot = self.full_output_zeros() geo_data = self.geo_data if len(geo_data.global_qbx_centers()) == 0: @@ -342,12 +347,257 @@ QBXFMMGeometryData.non_qbx_box_target_lists`), # }}} + +class QBMXExpansionWranglerCodeContainer(SumpyExpansionWranglerCodeContainer): + def __init__(self, cl_context, + multipole_expansion_factory, local_expansion_factory, + qbx_multipole_expansion_factory, out_kernels): + SumpyExpansionWranglerCodeContainer.__init__(self, + cl_context, multipole_expansion_factory, local_expansion_factory, + out_kernels) + + self.qbx_multipole_expansion_factory = qbx_multipole_expansion_factory + + @memoize_method + def qbx_multipole_expansion(self, order): + return self.qbx_multipole_expansion_factory(order) + + @memoize_method + def p2qbxm(self, order): + return P2QBXM(self.cl_context, + self.qbx_multipole_expansion(order)) + + @memoize_method + def qbxm2m(self, source_order, target_order): + return QBXM2M(self.cl_context, + self.qbx_multipole_expansion_factory(source_order), + self.multipole_expansion_factory(target_order)) + + @memoize_method + def qbxm2p(self, order): + return QBXM2PFromCSR(self.cl_context, + self.qbx_multipole_expansion(order), + self.out_kernels) + + @memoize_method + def qbxm2l(self, source_order, target_order): + return QBXM2LFromCSR(self.cl_context, + self.qbx_multipole_expansion_factory(source_order), + self.local_expansion_factory(target_order)) + + def get_wrangler(self, queue, geo_data, dtype, + qbx_order, fmm_level_to_order, + source_extra_kwargs={}, + kernel_extra_kwargs=None): + return QBMXExpansionWrangler(self, queue, geo_data, + dtype, + qbx_order, fmm_level_to_order, + source_extra_kwargs, + kernel_extra_kwargs) + + +class QBMXExpansionWrangler(SumpyExpansionWrangler): + """A specialized implementation of the + :class:`boxtree.fmm.ExpansionWranglerInterface` for the QBMX FMM. + + .. rubric:: QBMX-specific methods + + .. automethod:: form_global_qbx_multipoles + + .. automethod:: translate_qbx_multipole_to_box_multipole + + .. automethod:: eval_qbx_multipoles_direct + """ + + def __init__(self, code_container, queue, geo_data, dtype, + qbx_order, fmm_level_to_order, + source_extra_kwargs, kernel_extra_kwargs): + SumpyExpansionWrangler.__init__(self, + code_container, queue, geo_data.tree(), + dtype, fmm_level_to_order, source_extra_kwargs, kernel_extra_kwargs) + + self.qbx_order = qbx_order + self.geo_data = geo_data + + # {{{ data vector utilities + + def qbx_multipole_expansion_zeros(self): + order = self.qbx_order + qbx_m_expn = self.code.qbx_multipole_expansion(order) + + return cl.array.zeros( + self.queue, + (self.geo_data.center_info().ncenters, + len(qbx_m_expn)), + dtype=self.dtype) + + def reorder_sources(self, source_array): + return (source_array + .with_queue(self.queue) + [self.tree.user_source_ids] + .with_queue(None)) + + # }}} + + # {{{ source/target dispatch + + def box_source_list_kwargs(self): + return dict( + box_source_starts=self.tree.box_source_starts, + box_source_counts_nonchild=( + self.tree.box_source_counts_nonchild), + sources=self.tree.sources) + + def box_target_list_kwargs(self): + return dict( + box_target_starts=self.tree.box_target_starts, + box_target_counts_nonchild=( + self.tree.box_target_counts_nonchild), + targets=self.tree.targets) + + # }}} + + # {{{ qbx-related + + def form_global_qbx_multipoles(self, src_weights): + multipole_exps = self.qbx_multipole_expansion_zeros() + + geo_data = self.geo_data + kwargs = self.extra_kwargs.copy() + p2qbxm = self.code.p2qbxm(self.qbx_order) + reordered_sources = geo_data.lpot_sources_in_tree_order() + + evt, (result,) = p2qbxm( + self.queue, + qbx_centers=self.tree.sources, + sources=reordered_sources, + strengths=src_weights, + qbx_expansions=multipole_exps, + + **kwargs) + + assert multipole_exps is result + result.add_event(evt) + + return result + + def translate_qbx_multipoles_to_box_multipole( + self, level_start_source_box_nrs, source_boxes, qbx_expansions): + tree = self.geo_data.tree() + mpoles = self.multipole_expansion_zeros() + + wait_for = mpoles.events + + kwargs = self.kernel_extra_kwargs.copy() + kwargs.update(self.box_source_list_kwargs()) + + for lev in range(tree.nlevels): + qbxm2m = self.code.qbxm2m( + self.qbx_order, + self.level_orders[lev]) + + start, stop = level_start_source_box_nrs[lev:lev+2] + if start == stop: + continue + + level_start_ibox, mpoles_view = \ + self.multipole_expansions_view(mpoles, lev) + + evt, (mpoles_res,) = qbxm2m( + self.queue, + source_boxes=source_boxes[start:stop], + centers=self.tree.box_centers, + qbx_expansions=qbx_expansions, + src_expansions=mpoles_view, + src_base_ibox=level_start_ibox, + + wait_for=wait_for, + + **kwargs) + + wait_for = [evt] + assert mpoles_res is mpoles_view + + mpoles.add_event(evt) + + return mpoles + + def eval_qbx_multipoles_direct(self, target_boxes, source_box_starts, + source_box_lists, qbx_multipoles): + pot = self.output_zeros() + + kwargs = self.extra_kwargs.copy() + kwargs.update(self.box_source_list_kwargs()) + kwargs.update(self.box_target_list_kwargs()) + + evt, pot_res = self.code.qbxm2p(self.qbx_order)(self.queue, + target_boxes=target_boxes, + source_box_starts=source_box_starts, + source_box_lists=source_box_lists, + src_expansions=qbx_multipoles, + + result=pot, + + **kwargs) + + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i + pot_i.add_event(evt) + + return pot + + def form_locals_from_qbx_multipoles(self, + level_start_target_or_target_parent_box_nrs, + target_or_target_parent_boxes, starts, lists, qbx_multipoles): + local_exps = self.local_expansion_zeros() + + kwargs = self.extra_kwargs.copy() + + kwargs.update(self.box_source_list_kwargs()) + + events = [] + for lev in range(self.tree.nlevels): + start, stop = \ + level_start_target_or_target_parent_box_nrs[lev:lev+2] + if start == stop: + continue + + qbxm2l = self.code.qbxm2l(self.qbx_order, self.level_orders[lev]) + + target_level_start_ibox, target_local_exps_view = \ + self.multipole_expansions_view(local_exps, lev) + + evt, (result,) = qbxm2l( + self.level_queues[lev], + target_boxes=target_or_target_parent_boxes[start:stop], + source_box_starts=starts[start:stop+1], + source_box_lists=lists, + centers=self.tree.box_centers, + src_expansions=qbx_multipoles, + + tgt_expansions=target_local_exps_view, + tgt_base_ibox=target_level_start_ibox, + + wait_for=events, + + **kwargs) + + assert result is target_local_exps_view + events.append(evt) + + cl.enqueue_marker(self.queue, wait_for=events) + self.queue.finish() + + return local_exps + + # }}} + # }}} -# {{{ FMM top-level +# {{{ qbx fmm top-level -def drive_fmm(expansion_wrangler, src_weights): +def drive_qbx_fmm(expansion_wrangler, src_weights): """Top-level driver routine for the QBX fast multipole calculation. :arg geo_data: A :class:`QBXFMMGeometryData` instance. @@ -529,6 +779,172 @@ def drive_fmm(expansion_wrangler, src_weights): # }}} +# {{{ qbmx fmm top-level + +def drive_qbmx_fmm(expansion_wrangler, src_weights): + """Top-level driver routine for the QBMX fast multipole calculation. + + :arg expansion_wrangler: A :class:`QBMXExpansionWrangler` + :arg src_weights: Source 'density/weights/charges'. + Passed unmodified to *expansion_wrangler*. + + Returns the potentials computed by *expansion_wrangler*. + + See also :func:`boxtree.fmm.drive_fmm`. + """ + wrangler = expansion_wrangler + + geo_data = wrangler.geo_data + traversal = geo_data.traversal() + + # Interface guidelines: Attributes of the tree are assumed to be known + # to the expansion wrangler and should not be passed. + + logger.debug("start qbmx fmm") + + logger.debug("reorder source weights") + src_weights = wrangler.reorder_sources(src_weights) + + # {{{ "Step 2.1:" Construct local multipoles + + logger.debug("construct qbx multipoles") + + qbx_mpole_exps = wrangler.form_global_qbx_multipoles( + src_weights) + + # Henceforth, qbx_mpole_exps get treated as the sources for the FMM. + del src_weights + + logger.debug("construct box multipoles") + + mpole_exps = wrangler.translate_qbx_multipoles_to_box_multipole( + traversal.level_start_source_box_nrs, + traversal.source_boxes, + qbx_mpole_exps) + + # }}} + + # {{{ "Step 2.2:" Propagate multipoles upward + + logger.debug("propagate multipoles upward") + wrangler.coarsen_multipoles( + traversal.level_start_source_parent_box_nrs, + traversal.source_parent_boxes, + mpole_exps) + + # mpole_exps is called Phi in [1] + + # }}} + + # {{{ "Stage 3:" Direct evaluation from neighbor source boxes ("list 1") + + logger.debug("direct evaluation from neighbor source boxes ('list 1')") + potentials = wrangler.eval_qbx_multipoles_direct( + traversal.target_boxes, + traversal.neighbor_source_boxes_starts, + traversal.neighbor_source_boxes_lists, + qbx_mpole_exps) + + # these potentials are called alpha in [1] + + # }}} + + # {{{ "Stage 4:" translate separated siblings' ("list 2") mpoles to local + + logger.debug("translate separated siblings' ('list 2') mpoles to local") + local_exps = wrangler.multipole_to_local( + traversal.level_start_target_or_target_parent_box_nrs, + traversal.target_or_target_parent_boxes, + traversal.sep_siblings_starts, + traversal.sep_siblings_lists, + mpole_exps) + + # local_exps represents both Gamma and Delta in [1] + + # }}} + + # {{{ "Stage 5:" evaluate sep. smaller mpoles ("list 3") at particles + + logger.debug("evaluate sep. smaller mpoles at particles ('list 3 far')") + + # (the point of aiming this stage at particles is specifically to keep its + # contribution *out* of the downward-propagating local expansions) + + potentials = potentials + wrangler.eval_multipoles( + traversal.level_start_target_box_nrs, + traversal.target_boxes, + traversal.sep_smaller_by_level, + mpole_exps) + + # these potentials are called beta in [1] + + if traversal.sep_close_smaller_starts is not None: + logger.debug("evaluate separated close smaller interactions directly " + "('list 3 close')") + + potentials = potentials + wrangler.eval_qbx_multipoles_direct( + traversal.target_boxes, + traversal.sep_close_smaller_starts, + traversal.sep_close_smaller_lists, + qbx_mpole_exps) + + # }}} + + # {{{ "Stage 6:" form locals for separated bigger mpoles ("list 4") + + logger.debug("form locals for separated bigger mpoles ('list 4 far')") + + local_exps = local_exps + wrangler.form_locals_from_qbx_multipoles( + traversal.level_start_target_or_target_parent_box_nrs, + traversal.target_or_target_parent_boxes, + traversal.sep_bigger_starts, + traversal.sep_bigger_lists, + qbx_mpole_exps) + + if traversal.sep_close_bigger_starts is not None: + logger.debug("evaluate separated close bigger interactions directly " + "('list 4 close')") + + potentials = potentials + wrangler.eval_qbx_multipoles_direct( + traversal.target_or_target_parent_boxes, + traversal.sep_close_bigger_starts, + traversal.sep_close_bigger_lists, + qbx_mpole_exps) + + # }}} + + # {{{ "Stage 7:" propagate local_exps downward + + logger.debug("propagate local_exps downward") + + wrangler.refine_locals( + traversal.level_start_target_or_target_parent_box_nrs, + traversal.target_or_target_parent_boxes, + local_exps) + + # }}} + + # {{{ "Stage 8:" evaluate locals + + logger.debug("evaluate locals") + + potentials = potentials + wrangler.eval_locals( + traversal.level_start_target_box_nrs, + traversal.target_boxes, + local_exps) + + # }}} + + logger.debug("reorder potentials") + result = wrangler.reorder_potentials(potentials) + + logger.info("qbmx fmm complete") + + return result + +# }}} + + def write_performance_model(outf, geo_data): from pymbolic import var p_fmm = var("p_fmm") diff --git a/pytential/qbx/geometry.py b/pytential/qbx/geometry.py index ce93e851d61741af3951096d3f8e131c76b221fc..95698d4ae6a1e02cc9c0fb345ab0a683cd3c5729 100644 --- a/pytential/qbx/geometry.py +++ b/pytential/qbx/geometry.py @@ -393,7 +393,7 @@ class QBXFMMGeometryCodeGetter(object): # }}} -# {{{ geometry data +# {{{ target/source/center info class TargetInfo(DeviceDataRecord): """Describes the internal structure of the QBX FMM's list of :attr:`targets`. @@ -419,6 +419,17 @@ class TargetInfo(DeviceDataRecord): """ +class SourceInfo(DeviceDataRecord): + """Describes the internal structure of the QBX FMM's list of :attr:`sources`. + + .. attribute:: sources + + Shape: ``[dim,nsources]`` + + .. attribute:: nsources + """ + + class CenterInfo(DeviceDataRecord): """Information on location of QBX centers. Returned from :meth:`QBXFMMGeometryData.center_info`. @@ -444,6 +455,95 @@ class CenterInfo(DeviceDataRecord): def ncenters(self): return len(self.radii) +# }}} + + +# {{{ base class for geometry data + +class QBXFMMGeometryDataBase(object): + + # {{{ tree + + @memoize_method + def tree(self): + """Build and return a :class:`boxtree.tree.Tree` + for this source with these targets. + + |cached| + """ + + code_getter = self.code_getter + target_info = self.target_info() + source_info = self.source_info() + + with cl.CommandQueue(self.cl_context) as queue: + nparticles = source_info.nsources + target_info.ntargets + + refine_weights = cl.array.zeros(queue, nparticles, dtype=np.int32) + refine_weights[:source_info.nsources] = 1 + refine_weights.finish() + + # NOTE: max_leaf_refine_weight has an impact on accuracy. + # For instance, if a panel contains 64*4 = 256 nodes, then + # a box with 128 sources will contain at most half a + # panel, meaning that its width will be on the order h/2, + # which means many QBX disks (diameter h) will be forced + # to cross boxes. + # So we set max_leaf_refine weight comfortably large + # to avoid having too many disks overlap more than one box. + # + # FIXME: Should investigate this further. + tree, _ = code_getter.build_tree(queue, + particles=source_info.sources, + targets=target_info.targets, + max_leaf_refine_weight=256, + refine_weights=refine_weights, + debug=self.debug, + kind="adaptive-level-restricted") + + return tree + + # }}} + + # {{{ traversal + + @memoize_method + def traversal(self): + """Return a :class:`boxtree.traversal.FMMTraversalInfo` with merged + close lists. + (See :meth:`boxtree.traversal.FMMTraversalInfo.merge_close_lists`) + |cached| + """ + + with cl.CommandQueue(self.cl_context) as queue: + trav, _ = self.code_getter.build_traversal(queue, self.tree(), + debug=self.debug) + + return trav + + # }}} + + def center_info(self): + raise NotImplementedError() + + def target_info(self): + raise NotImplementedError() + + def source_info(self): + raise NotImplementedError() + + @property + def cl_context(self): + return self.lpot_source.cl_context + + @property + def coord_dtype(self): + return self.lpot_source.fine_density_discr.nodes().dtype + +# }}} + + +# {{{ geometry data (qbx) class CenterToTargetList(DeviceDataRecord): """A lookup table of targets covered by each QBX disk. Indexed by global @@ -460,9 +560,10 @@ class CenterToTargetList(DeviceDataRecord): Lists of targets in tree order. Use with :attr:`starts`. """ + pass -class QBXFMMGeometryData(object): +class QBXFMMGeometryData(QBXFMMGeometryDataBase): """ .. rubric :: Attributes @@ -553,13 +654,15 @@ class QBXFMMGeometryData(object): self.target_stick_out_factor = target_stick_out_factor self.debug = debug - @property - def cl_context(self): - return self.lpot_source.cl_context + # {{{ source info - @property - def coord_dtype(self): - return self.lpot_source.fine_density_discr.nodes().dtype + @memoize_method + def source_info(self): + return SourceInfo( + sources=self.lpot_source.fine_density_discr.nodes(), + nsources=self.lpot_source.fine_density_discr.nnodes) + + # }}} # {{{ center info @@ -675,64 +778,6 @@ class QBXFMMGeometryData(object): # }}} - # {{{ tree - - @memoize_method - def tree(self): - """Build and return a :class:`boxtree.tree.TreeWithLinkedPointSources` - for this source with these targets. - - |cached| - """ - - code_getter = self.code_getter - lpot_src = self.lpot_source - target_info = self.target_info() - - with cl.CommandQueue(self.cl_context) as queue: - nsources = lpot_src.fine_density_discr.nnodes - nparticles = nsources + target_info.ntargets - - refine_weights = cl.array.zeros(queue, nparticles, dtype=np.int32) - refine_weights[:nsources] = 1 - refine_weights.finish() - - # NOTE: max_leaf_refine_weight has an impact on accuracy. - # For instance, if a panel contains 64*4 = 256 nodes, then - # a box with 128 sources will contain at most half a - # panel, meaning that its width will be on the order h/2, - # which means many QBX disks (diameter h) will be forced - # to cross boxes. - # So we set max_leaf_refine weight comfortably large - # to avoid having too many disks overlap more than one box. - # - # FIXME: Should investigate this further. - tree, _ = code_getter.build_tree(queue, - particles=lpot_src.fine_density_discr.nodes(), - targets=target_info.targets, - max_leaf_refine_weight=256, - refine_weights=refine_weights, - debug=self.debug, - kind="adaptive-level-restricted") - - return tree - - # }}} - - @memoize_method - def traversal(self): - """Return a :class:`boxtree.traversal.FMMTraversalInfo` with merged - close lists. - (See :meth:`boxtree.traversal.FMMTraversalInfo.merge_close_lists`) - |cached| - """ - - with cl.CommandQueue(self.cl_context) as queue: - trav, _ = self.code_getter.build_traversal(queue, self.tree(), - debug=self.debug) - - return trav - def leaf_to_center_lookup(self): """Return a :class:`boxtree.area_query.LeavesToBallsLookup` to look up which which QBX disks overlap each leaf box. @@ -1078,4 +1123,99 @@ class QBXFMMGeometryData(object): # }}} + +# {{{ geometry data (qbmx) + +class QBMXFMMGeometryData(QBXFMMGeometryDataBase): + """ + .. automethod:: source_info() + .. automethod:: center_info() + .. automethod:: target_info() + .. automethod:: tree() + .. automethod:: traversal() + """ + + def __init__(self, code_getter, lpot_source, center_side, target_discrs, debug): + self.code_getter = code_getter + self.lpot_source = lpot_source + self.center_side = center_side + self.target_discrs = target_discrs + self.debug = debug + + @memoize_method + def lpot_sources_in_tree_order(self): + tree = self.tree() + lpot_src = self.lpot_source + + with cl.CommandQueue(self.cl_context) as queue: + source_nodes = lpot_src.fine_density_discr.nodes().with_queue(queue) + user_source_ids = tree.user_source_ids + + from pytools.obj_array import make_obj_array + sources = make_obj_array([ + source_nodes[i][user_source_ids].with_queue(None) + for i in range(lpot_src.ambient_dim)]) + + return sources + + @memoize_method + def source_info(self): + # For QBMX, the centers play the role of sources in the FMM. + center_info = self.center_info() + return SourceInfo( + sources=center_info.centers, + nsources=center_info.ncenters) + + @memoize_method + def center_info(self): + """ Return a :class:`CenterInfo`. |cached| + """ + + lpot_source = self.lpot_source + + with cl.CommandQueue(self.cl_context) as queue: + centers = lpot_source.fine_centers(self.center_side) + sides = cl.array.empty(queue, len(centers[0]), dtype=np.int32) + sides.fill(self.center_side) + sides.finish() + radii = lpot_source.panel_sizes("nsources", "fine").with_queue(queue) / 2 + + return CenterInfo( + sides=sides, + radii=radii, + centers=centers).with_queue(None) + + @memoize_method + def target_info(self): + code_getter = self.code_getter + lpot_src = self.lpot_source + + with cl.CommandQueue(self.cl_context) as queue: + target_discr_starts = [] + + ntargets = 0 + for target_discr in self.target_discrs: + target_discr_starts.append(ntargets) + ntargets += target_discr.nnodes + + target_discr_starts.append(ntargets) + + targets = cl.array.empty( + self.cl_context, (lpot_src.ambient_dim, ntargets), + self.coord_dtype) + + for start, target_discr in zip( + target_discr_starts, self.target_discrs): + code_getter.copy_targets_kernel()( + queue, + targets=targets[:, start:start+target_discr.nnodes], + points=target_discr.nodes()) + + return TargetInfo( + targets=targets, + target_discr_starts=target_discr_starts, + ntargets=ntargets).with_queue(None) + +# }}} + # vim: foldmethod=marker:filetype=pyopencl diff --git a/pytential/qbx/interactions.py b/pytential/qbx/interactions.py index 38271cdf96623cbd226b11835e118c52940294e6..71f51246d5a7d5be999c4f72e8b4cfaac63a9207 100644 --- a/pytential/qbx/interactions.py +++ b/pytential/qbx/interactions.py @@ -32,7 +32,7 @@ from sumpy.e2e import E2EBase from sumpy.e2p import E2PBase -# {{{ form qbx expansions from points +# {{{ form qbx expansions (local, multipole) from points class P2QBXLFromCSR(P2EBase): default_name = "p2qbxl_from_csr" @@ -110,7 +110,6 @@ class P2QBXLFromCSR(P2EBase): return loopy_knl - @memoize_method def get_optimized_kernel(self): # FIXME knl = self.get_kernel() @@ -120,6 +119,147 @@ class P2QBXLFromCSR(P2EBase): def __call__(self, queue, **kwargs): return self.get_cached_optimized_kernel()(queue, **kwargs) + +class P2QBXM(P2EBase): + default_name = "p2qbxm" + + def get_kernel(self): + ncoeffs = len(self.expansion) + + from sumpy.tools import gather_loopy_source_arguments + arguments = ( + [ + lp.GlobalArg("sources", None, shape="dim, nsources", + dim_tags="sep,c", offset=lp.auto), + lp.GlobalArg("strengths", None, shape="nsources"), + lp.GlobalArg("qbx_centers", None, shape="dim, nsources", + dim_tags="sep,c"), + lp.GlobalArg("qbx_expansions", None, + shape=("nsources", ncoeffs)), + lp.ValueArg("nsources", np.int32), + "..." + ] + gather_loopy_source_arguments([self.expansion])) + + loopy_knl = lp.make_kernel( + [ + "{[isrc]: 0<=isrc center[idim] = qbx_centers[idim, isrc] + <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} + <> strength = strengths[isrc] + """] + self.get_loopy_instructions() + [""" + """] + [""" + qbx_expansions[isrc, {i}] = strength * coeff{i} \ + {{id_prefix=write_expn}} + """.format(i=i) for i in range(ncoeffs)] + [""" + end + """], + arguments, + name=self.name, assumptions="nsources>=1", + silenced_warnings="write_race(write_expn*);temp_shape_fallback", + default_offset=lp.auto) + + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) + loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "isrc", 16, outer_tag="g.0", inner_tag="l.0") + return knl + + def __call__(self, queue, **kwargs): + return self.get_cached_optimized_kernel()(queue, **kwargs) + +# }}} + + +# {{{ qbx multipole to multipole + +class QBXM2M(E2EBase): + default_name = "qbxm2m" + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + + from sumpy.tools import gather_loopy_arguments + loopy_knl = lp.make_kernel( + [ + "{[isrc_box]: 0<=isrc_box src_ibox = source_boxes[isrc_box] {id=read_src_ibox} + <> isrc_start = box_source_starts[src_ibox] + <> isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] + <> center[idim] = centers[idim, src_ibox] {dup=idim} + + for isrc + <> d[idim] = center[idim] - sources[idim, isrc] {dup=idim} + + """] + [""" + <> src_coeff{i} = qbx_expansions[isrc, {i}] \ + {{dep=read_src_ibox}} + + """.format(i=i) for i in range(ncoeff_src)] + [ + ] + self.get_translation_loopy_insns() + [""" + end + """] + [""" + + src_expansions[src_ibox - src_base_ibox, {i}] = \ + simul_reduce(sum, isrc, coeff{i}) \ + {{id_prefix=write_expn}} + """.format(i=i) for i in range(ncoeff_tgt)] + [""" + + end + """], + [ + lp.GlobalArg("source_boxes", np.int32, shape=("nsrc_boxes"), + offset=lp.auto), + lp.GlobalArg("sources", None, shape="dim, nsources", + dim_tags="sep,c"), + lp.GlobalArg("centers", None, shape="dim, nbox_centers"), + lp.ValueArg("src_base_ibox", np.int32), + lp.GlobalArg("src_expansions", None, + shape=("nsrc_level_boxes", ncoeff_tgt), offset=lp.auto), + lp.GlobalArg("qbx_expansions", None, + shape=("nsources", ncoeff_src)), + lp.GlobalArg("box_source_starts,box_source_counts_nonchild", + None, shape=None), + lp.ValueArg("nsources,nbox_centers,nsrc_level_boxes", np.int32), + "..." + ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), + name=self.name, assumptions="nsrc_boxes>=1", + silenced_warnings="write_race(write_expn*);temp_shape_fallback") + + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + + return loopy_knl + + @memoize_method + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "isrc_box", 16, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + return self.get_cached_optimized_kernel()(queue, **kwargs) + # }}} @@ -382,6 +522,217 @@ class QBXL2P(E2PBase): def __call__(self, queue, **kwargs): return self.get_cached_optimized_kernel()(queue, **kwargs) + +class QBXM2PFromCSR(E2PBase): + default_name = "qbxm2p_from_csr" + + def get_kernel(self): + ncoeffs = len(self.expansion) + loopy_insns, result_names = self.get_loopy_insns_and_result_names() + + from sumpy.tools import gather_loopy_source_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + <> itgt_start = box_target_starts[tgt_ibox] + <> itgt_end = itgt_start+box_target_counts_nonchild[tgt_ibox] + + <> isrc_box_start = source_box_starts[itgt_box] + <> isrc_box_end = source_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = source_box_lists[isrc_box] + <> isrc_start = box_source_starts[src_ibox] + <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] + + for itgt + <> tgt[idim] = targets[idim, itgt] + + for isrc + """] + [""" + + <> coeff{coeffidx} = \ + src_expansions[isrc, {coeffidx}] + + """.format(coeffidx=i) for i in range(ncoeffs)] + [ + """ + + <> src[idim] = sources[idim, isrc] {dup=idim} + <> b[idim] = tgt[idim] - src[idim] {dup=idim} + + """] + loopy_insns + [""" + end """] + [""" + result[{resultidx}, itgt] = result[{resultidx}, itgt] + \ + kernel_scaling * simul_reduce(sum, isrc, + result_{resultidx}_p) {{id_prefix=write_result}} + """.format(resultidx=i) for i in range(len(result_names))] + [""" + end + end + end + """], + [ + lp.GlobalArg("box_target_starts,box_target_counts_nonchild," + "box_source_starts,box_source_counts_nonchild,", + None, shape=None), + lp.GlobalArg("source_box_starts, source_box_lists,", + None, shape=None), + lp.GlobalArg("result", None, + shape="nkernels,ntargets", dim_tags="sep,c"), + lp.GlobalArg("targets", None, + shape="dim,ntargets", dim_tags="sep,c"), + lp.GlobalArg("sources", None, + shape="dim,nsources", dim_tags="sep,c"), + lp.GlobalArg("src_expansions", None, + shape="nsources,ncoeffs"), + lp.ValueArg("nsources", np.int32), + lp.ValueArg("ntargets", np.int32), + lp.ValueArg("ncoeffs", np.int32), + "...", + ] + gather_loopy_source_arguments(self.kernels), + name=self.name, assumptions="ntgt_boxes>=1", + silenced_warnings="temp_shape_fallback") + + loopy_knl = lp.fix_parameters( + loopy_knl, + dim=self.dim, + nkernels=len(self.kernels)) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + + for knl in self.kernels: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + + return loopy_knl + + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + knl = self.get_cached_optimized_kernel() + + return knl(queue, **kwargs) + +# }}} + + +# {{{ QBXM2L + +class QBXM2LFromCSR(E2EBase): + default_name = "qbxm2l_from_csr" + + def get_kernel(self): + ncoeff_src = len(self.src_expansion) + ncoeff_tgt = len(self.tgt_expansion) + + from sumpy.tools import gather_loopy_source_arguments + loopy_knl = lp.make_kernel( + [ + "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] + <> isrc_box_start = source_box_starts[itgt_box] + <> isrc_box_end = source_box_starts[itgt_box+1] + + for isrc_box + <> src_ibox = source_box_lists[isrc_box] {id=read_src_ibox} + <> isrc_start = box_source_starts[src_ibox] + <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox] + + for isrc + """] + [""" + + <> src_coeff{coeffidx} = \ + src_expansions[isrc, {coeffidx}] \ + {{dep=read_src_ibox}} + + """.format(coeffidx=i) for i in range(ncoeff_src)] + [ + """ + + <> d[idim] = \ + centers[idim, tgt_ibox] - sources[idim, isrc] \ + {dup=idim} + + """] + self.get_translation_loopy_insns() + [""" + end """] + [""" + tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ + tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + \ + simul_reduce(sum, isrc, coeff{i}) \ + {{id_prefix=write_result}} + """.format(i=i) for i in range(ncoeff_tgt)] + [ + """ + end + end + """], + [ + lp.GlobalArg( + "box_source_starts,box_source_counts_nonchild,", + None, shape=None), + lp.GlobalArg("source_box_starts, source_box_lists,", + None, shape=None, offset=lp.auto), + lp.GlobalArg("tgt_expansions", None, + shape=("nlevel_targets", ncoeff_tgt), + offset=lp.auto), + lp.GlobalArg("src_expansions", None, + shape=("nsources", ncoeff_src)), + lp.GlobalArg("centers", None, + shape="dim,naligned_centers"), + lp.GlobalArg("target_boxes", None, shape="ntgt_boxes", + offset=lp.auto), + lp.GlobalArg("sources", None, + shape="dim,nsources", dim_tags="sep,c"), + lp.ValueArg("tgt_base_ibox", np.int32), + lp.ValueArg("nsources", np.int32), + lp.ValueArg("naligned_centers", np.int32), + lp.ValueArg("nlevel_targets", np.int32), + "...", + ] + gather_loopy_source_arguments( + [self.src_expansion, self.tgt_expansion]), + name=self.name, assumptions="ntgt_boxes>=1", + silenced_warnings="temp_shape_fallback;write_race(write_result*)") + + loopy_knl = lp.fix_parameters( + loopy_knl, + dim=self.dim) + + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + + for expn in [self.src_expansion, self.tgt_expansion]: + loopy_knl = expn.prepare_loopy_kernel(loopy_knl) + + return loopy_knl + + def get_optimized_kernel(self): + # FIXME + knl = self.get_kernel() + knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") + return knl + + def __call__(self, queue, **kwargs): + knl = self.get_cached_optimized_kernel() + + return knl(queue, **kwargs) + # }}} # vim: foldmethod=marker diff --git a/test/test_layer_pot.py b/test/test_layer_pot.py index d89615ec5b4a29f1fb617e166a8bc5f5e7c7a0c5..93a38465306d6ec54e4c73d0ad72dd229ee479d8 100644 --- a/test/test_layer_pot.py +++ b/test/test_layer_pot.py @@ -729,9 +729,9 @@ def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k) zero_op_table = { "green": - sym.S(k_sym, dn_u_sym, qbx_forced_limit=-1, **knl_kwargs) - - sym.D(k_sym, u_sym, qbx_forced_limit="avg", **knl_kwargs) - - 0.5*u_sym, + sym.S(k_sym, dn_u_sym, qbx_forced_limit=1, **knl_kwargs) + - sym.D(k_sym, u_sym, qbx_forced_limit=1, **knl_kwargs), + #- 0.5*u_sym "green_grad": d1.resolve(d1.dnabla(2) * d1(sym.S(k_sym, dn_u_sym, @@ -764,12 +764,12 @@ def test_identities(ctx_getter, zero_op_name, curve_name, curve_f, qbx_order, k) from meshmode.discretization import Discretization from meshmode.discretization.poly_element import \ InterpolatoryQuadratureSimplexGroupFactory - from pytential.qbx import QBXLayerPotentialSource + from pytential.qbx import QBMXLayerPotentialSource pre_density_discr = Discretization( cl_ctx, mesh, InterpolatoryQuadratureSimplexGroupFactory(target_order)) - qbx, _ = QBXLayerPotentialSource( + qbx, _ = QBMXLayerPotentialSource( pre_density_discr, 4*target_order, qbx_order, fmm_order=qbx_order + 15).with_refinement() density_discr = qbx.density_discr