diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 0e218024c690b2d8fd1c8c34983a2434fcbf20a9..c66f9c02254e70de6c8f60aec52882a83125e5eb 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -791,8 +791,8 @@ class M2LPostprocessLocal(E2EBase): for itgt_box [itgt_coeff]: tgt_expansions[itgt_box, itgt_coeff] = \ m2l_postprocess_inner( - tgt_rscale, src_rscale, + tgt_rscale, [isrc_coeff]: tgt_expansions_before_postprocessing[ \ itgt_box, isrc_coeff], ) diff --git a/sumpy/toys.py b/sumpy/toys.py index a794ff1529691de4f8ab81fc8a5f4aeee730671a..c71f6107eafb8916228dd30451515a493d8d223f 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -102,7 +102,7 @@ class ToyContext: local_expn_class=None, expansion_factory=None, extra_source_kwargs=None, - extra_kernel_kwargs=None): + extra_kernel_kwargs=None, m2l_use_fft=None): self.cl_context = cl_context self.queue = cl.CommandQueue(self.cl_context) self.kernel = kernel @@ -116,15 +116,22 @@ class ToyContext: mpole_expn_class = \ expansion_factory.get_multipole_expansion_class(kernel) if local_expn_class is None: - from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory + from sumpy.expansion.m2l import (NonFFTM2LTranslationClassFactory, + FFTM2LTranslationClassFactory) + if m2l_use_fft: + m2l_translation_class_factory = FFTM2LTranslationClassFactory() + else: + m2l_translation_class_factory = NonFFTM2LTranslationClassFactory() local_expn_class = \ expansion_factory.get_local_expansion_class(kernel) - m2l_translation_class_factory = NonFFTM2LTranslationClassFactory() m2l_translation_class = \ m2l_translation_class_factory.get_m2l_translation_class( kernel, local_expn_class) local_expn_class = partial(local_expn_class, m2l_translation=m2l_translation_class()) + elif m2l_use_fft is not None: + raise ValueError("local_expn_class and m2l_use_fft are both supplied. " + "Use only one of these arguments") self.mpole_expn_class = mpole_expn_class self.local_expn_class = local_expn_class @@ -181,10 +188,51 @@ class ToyContext: self.mpole_expn_class(self.no_target_deriv_kernel, from_order), self.mpole_expn_class(self.no_target_deriv_kernel, to_order)) + @memoize_method + def use_translation_classes_dependent_data(self): + l_expn = self.local_expn_class(self.no_target_deriv_kernel, 2) + return l_expn.m2l_translation.use_preprocessing + + @memoize_method + def use_fft(self): + l_expn = self.local_expn_class(self.no_target_deriv_kernel, 2) + return l_expn.m2l_translation.use_fft + @memoize_method def get_m2l(self, from_order, to_order): - from sumpy import E2EFromCSR - return E2EFromCSR(self.cl_context, + from sumpy import E2EFromCSR, M2LUsingTranslationClassesDependentData + if self.use_translation_classes_dependent_data(): + m2l_class = M2LUsingTranslationClassesDependentData + else: + m2l_class = E2EFromCSR + return m2l_class(self.cl_context, + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) + + @memoize_method + def get_m2l_translation_class_dependent_data_kernel(self, from_order, to_order): + from sumpy import M2LGenerateTranslationClassesDependentData + return M2LGenerateTranslationClassesDependentData(self.cl_context, + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) + + @memoize_method + def get_m2l_expansion_size(self, from_order, to_order): + m_expn = self.mpole_expn_class(self.no_target_deriv_kernel, from_order) + l_expn = self.local_expn_class(self.no_target_deriv_kernel, to_order) + return l_expn.m2l_translation.preprocess_multipole_nexprs(l_expn, m_expn) + + @memoize_method + def get_m2l_preprocess_mpole_kernel(self, from_order, to_order): + from sumpy import M2LPreprocessMultipole + return M2LPreprocessMultipole(self.cl_context, + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) + + @memoize_method + def get_m2l_postprocess_local_kernel(self, from_order, to_order): + from sumpy import M2LPostprocessLocal + return M2LPostprocessLocal(self.cl_context, self.mpole_expn_class(self.no_target_deriv_kernel, from_order), self.local_expn_class(self.no_target_deriv_kernel, to_order)) @@ -267,7 +315,8 @@ def _e2p(psource, targets, e2p): return pot.get(queue) -def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs): +def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs, + extra_kernel_kwargs): toy_ctx = psource.toy_ctx queue = toy_ctx.queue @@ -290,32 +339,133 @@ def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs): ], dtype=np.float64).T.copy() ) + coeffs = cl.array.to_device(queue, np.array([psource.coeffs])) + args = { + "queue": toy_ctx.queue, + "src_expansions": coeffs, + "src_base_ibox": 0, + "tgt_base_ibox": 0, + "ntgt_level_boxes": 2, + "target_boxes": target_boxes, + "src_box_starts": src_box_starts, + "src_box_lists": src_box_lists, + "centers": centers, + "src_rscale": psource.rscale, + "tgt_rscale": to_rscale, + **extra_kernel_kwargs, + **toy_ctx.extra_kernel_kwargs, + } + + evt, (to_coeffs,) = e2e(**args) - evt, (to_coeffs,) = e2e( - toy_ctx.queue, - src_expansions=coeffs, - src_base_ibox=0, - tgt_base_ibox=0, - ntgt_level_boxes=2, + return expn_class( + toy_ctx, to_center, to_rscale, to_order, to_coeffs[1].get(queue), + derived_from=psource, **expn_kwargs) - target_boxes=target_boxes, - src_box_starts=src_box_starts, - src_box_lists=src_box_lists, - centers=centers, +def _m2l(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs, + translation_classes_kwargs): + toy_ctx = psource.toy_ctx + queue = toy_ctx.queue - src_rscale=psource.rscale, - tgt_rscale=to_rscale, - **toy_ctx.extra_kernel_kwargs) + coeffs = cl.array.to_device(queue, np.array([psource.coeffs])) - return expn_class( - toy_ctx, to_center, to_rscale, to_order, to_coeffs[1].get(queue), + m2l_use_translation_classes_dependent_data = \ + toy_ctx.use_translation_classes_dependent_data() + + if m2l_use_translation_classes_dependent_data: + data_kernel = translation_classes_kwargs["data_kernel"] + preprocess_kernel = translation_classes_kwargs["preprocess_kernel"] + postprocess_kernel = translation_classes_kwargs["postprocess_kernel"] + expn_size = translation_classes_kwargs["m2l_expn_size"] + + # Preprocess the mpole expansion + preprocessed_src_expansions = cl.array.zeros(queue, (1, expn_size), + dtype=np.complex128) + evt, _ = preprocess_kernel(queue, + src_expansions=coeffs, + preprocessed_src_expansions=preprocessed_src_expansions, + src_rscale=np.float64(psource.rscale), + **toy_ctx.extra_kernel_kwargs) + + if toy_ctx.use_fft: + from sumpy.tools import (run_opencl_fft, get_opencl_fft_app, + get_native_event) + fft_app = get_opencl_fft_app(queue, (expn_size,), + dtype=preprocessed_src_expansions.dtype, inverse=False) + ifft_app = get_opencl_fft_app(queue, (expn_size,), + dtype=preprocessed_src_expansions.dtype, inverse=True) + + evt, preprocessed_src_expansions = run_opencl_fft(fft_app, queue, + preprocessed_src_expansions, inverse=False, wait_for=[evt]) + + # Compute translation classes data + m2l_translation_classes_lists = cl.array.to_device(queue, + np.array([0], dtype=np.int32)) + dist = np.array(to_center - psource.center, dtype=np.float64) + dim = toy_ctx.kernel.dim + m2l_translation_vectors = cl.array.to_device(queue, dist.reshape(dim, 1)) + m2l_translation_classes_dependent_data = cl.array.zeros(queue, + (1, expn_size), dtype=np.complex128) + + evt, _ = data_kernel( + queue, + src_rscale=np.float64(psource.rscale), + ntranslation_classes=1, + translation_classes_level_start=0, + m2l_translation_vectors=m2l_translation_vectors, + m2l_translation_classes_dependent_data=( + m2l_translation_classes_dependent_data), + ntranslation_vectors=1, + wait_for=[get_native_event(evt)], + **toy_ctx.extra_kernel_kwargs) + + if toy_ctx.use_fft: + evt, m2l_translation_classes_dependent_data = run_opencl_fft(fft_app, + queue, m2l_translation_classes_dependent_data, inverse=False, + wait_for=[evt]) + + ret = _e2e(psource, to_center, to_rscale, to_order, + e2e, expn_class, expn_kwargs, + { + "src_expansions": preprocessed_src_expansions, + "m2l_translation_classes_lists": m2l_translation_classes_lists, + "m2l_translation_classes_dependent_data": ( + m2l_translation_classes_dependent_data), + "translation_classes_level_start": 0, + } + ) + + # Postprocess the local expansion + local_before = cl.array.to_device(queue, np.array([ret.coeffs])) + to_coeffs = cl.array.zeros(queue, (1, len(data_kernel.tgt_expansion)), + dtype=coeffs.dtype) + + if toy_ctx.use_fft: + evt, local_before = run_opencl_fft(ifft_app, queue, + local_before, inverse=True, wait_for=[get_native_event(evt)]) + + evt, _ = postprocess_kernel(queue=queue, + tgt_expansions_before_postprocessing=local_before, + tgt_expansions=to_coeffs, + src_rscale=np.float64(psource.rscale), + tgt_rscale=np.float64(to_rscale), + wait_for=[get_native_event(evt)], + **toy_ctx.extra_kernel_kwargs) + + return expn_class( + toy_ctx, to_center, to_rscale, to_order, to_coeffs.get(queue)[0], derived_from=psource, **expn_kwargs) + else: + ret = _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, + expn_kwargs, {}) -# }}} + return ret +# }}} + # {{{ potential source classes class PotentialSource: @@ -596,7 +746,7 @@ def multipole_expand(psource: PotentialSource, return _e2e(psource, center, rscale, order, psource.toy_ctx.get_m2m(psource.order, order), - MultipoleExpansion, expn_kwargs) + MultipoleExpansion, expn_kwargs, {}) else: raise TypeError(f"do not know how to expand '{type(psource).__name__}'") @@ -617,9 +767,28 @@ def local_expand( if order is None: order = psource.order - return _e2e(psource, center, rscale, order, - psource.toy_ctx.get_m2l(psource.order, order), - LocalExpansion, expn_kwargs) + toy_ctx = psource.toy_ctx + translation_classes_kwargs = {} + m2l_use_translation_classes_dependent_data = \ + toy_ctx.use_translation_classes_dependent_data() + + if m2l_use_translation_classes_dependent_data: + data_kernel = toy_ctx.get_m2l_translation_class_dependent_data_kernel( + psource.order, order) + preprocess_kernel = toy_ctx.get_m2l_preprocess_mpole_kernel( + psource.order, order) + postprocess_kernel = toy_ctx.get_m2l_postprocess_local_kernel( + psource.order, order) + translation_classes_kwargs["data_kernel"] = data_kernel + translation_classes_kwargs["preprocess_kernel"] = preprocess_kernel + translation_classes_kwargs["postprocess_kernel"] = postprocess_kernel + translation_classes_kwargs["m2l_expn_size"] = \ + toy_ctx.get_m2l_expansion_size(psource.order, order) + + return _m2l(psource, center, rscale, order, + toy_ctx.get_m2l(psource.order, order), + LocalExpansion, expn_kwargs, + translation_classes_kwargs) elif isinstance(psource, LocalExpansion): if order is None: @@ -627,7 +796,7 @@ def local_expand( return _e2e(psource, center, rscale, order, psource.toy_ctx.get_l2l(psource.order, order), - LocalExpansion, expn_kwargs) + LocalExpansion, expn_kwargs, {}) else: raise TypeError(f"do not know how to expand '{type(psource).__name__}'") diff --git a/test/test_kernels.py b/test/test_kernels.py index d1768fd7c5040615a993f1ebb105b9bb1498c5b3..8f0a98ef21e6308ff5de6252318b6f7c840dc7fe 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -43,7 +43,8 @@ from sumpy.expansion.local import ( VolumeTaylorLocalExpansion, H2DLocalExpansion, LinearPDEConformingVolumeTaylorLocalExpansion) -from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory +from sumpy.expansion.m2l import (NonFFTM2LTranslationClassFactory, + FFTM2LTranslationClassFactory) from sumpy.kernel import ( LaplaceKernel, HelmholtzKernel, @@ -473,27 +474,35 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative # {{{ test_translations -@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class", [ - (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), +@pytest.mark.parametrize("knl, local_expn_class, mpole_expn_class, use_fft", [ + (LaplaceKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion, - LinearPDEConformingVolumeTaylorMultipoleExpansion), - (LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + LinearPDEConformingVolumeTaylorMultipoleExpansion, False), + (LaplaceKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion, + LinearPDEConformingVolumeTaylorMultipoleExpansion, True), + (LaplaceKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (LaplaceKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion, - LinearPDEConformingVolumeTaylorMultipoleExpansion), - (HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + LinearPDEConformingVolumeTaylorMultipoleExpansion, False), + (LaplaceKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion, + LinearPDEConformingVolumeTaylorMultipoleExpansion, True), + (HelmholtzKernel(2), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (HelmholtzKernel(2), LinearPDEConformingVolumeTaylorLocalExpansion, - LinearPDEConformingVolumeTaylorMultipoleExpansion), - (HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion), + LinearPDEConformingVolumeTaylorMultipoleExpansion, False), + (HelmholtzKernel(3), VolumeTaylorLocalExpansion, VolumeTaylorMultipoleExpansion, + False), (HelmholtzKernel(3), LinearPDEConformingVolumeTaylorLocalExpansion, - LinearPDEConformingVolumeTaylorMultipoleExpansion), - (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion), + LinearPDEConformingVolumeTaylorMultipoleExpansion, False), + (HelmholtzKernel(2), H2DLocalExpansion, H2DMultipoleExpansion, False), (StokesletKernel(2, 0, 0), VolumeTaylorLocalExpansion, - VolumeTaylorMultipoleExpansion), + VolumeTaylorMultipoleExpansion, False), (StokesletKernel(2, 0, 0), LinearPDEConformingVolumeTaylorLocalExpansion, - LinearPDEConformingVolumeTaylorMultipoleExpansion), + LinearPDEConformingVolumeTaylorMultipoleExpansion, False), ]) def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, - visualize=False): + use_fft, visualize=False): if visualize: logging.basicConfig(level=logging.INFO) @@ -550,7 +559,10 @@ def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, else: orders = [3, 4, 5] - m2l_factory = NonFFTM2LTranslationClassFactory() + if use_fft: + m2l_factory = FFTM2LTranslationClassFactory() + else: + m2l_factory = NonFFTM2LTranslationClassFactory() m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() toy_ctx = t.ToyContext( diff --git a/test/test_misc.py b/test/test_misc.py index 9b72578120e63ee576df99f1e7c849f628f6a62d..98f07da1039c34a740860d5ff5c2045e32660625 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -230,6 +230,7 @@ class P2E2E2PTestCase: expansion1: Callable[..., Any] expansion2: Callable[..., Any] conv_factor: str + m2l_use_fft: bool = False @property def dim(self): @@ -266,6 +267,17 @@ P2E2E2P_TEST_CASES = ( expansion1=t.multipole_expand, expansion2=t.local_expand, conv_factor="norm(t-c2)/(norm(c2-c1)-norm(c1-s))"), + + # multipole to local, 3D with FFT + P2E2E2PTestCase( + source=np.array([-2., 2., 1.]), + center1=np.array([-2., 5., 3.]), + center2=np.array([0., 0., 0.]), + target=np.array([0., 0., -1]), + expansion1=t.multipole_expand, + expansion2=t.local_expand, + m2l_use_fft=True, + conv_factor="norm(t-c2)/(norm(c2-c1)-norm(c1-s))"), ) # }}} @@ -302,7 +314,8 @@ def test_toy_p2e2e2p(actx_factory, case): actx = actx_factory() ctx = t.ToyContext(actx.context, LaplaceKernel(dim), - expansion_factory=VolumeTaylorExpansionFactory()) + expansion_factory=VolumeTaylorExpansionFactory(), + m2l_use_fft=case.m2l_use_fft) errors = []