diff --git a/examples/helmholtz2d.py b/examples/helmholtz2d.py new file mode 100644 index 0000000000000000000000000000000000000000..e0350b2e8f744ac050d3dd040f7c00302de3d9a7 --- /dev/null +++ b/examples/helmholtz2d.py @@ -0,0 +1,435 @@ +""" This example evaluates the volume potential over + [-1,1]^2 with the Helmholtz kernel. +""" + +__copyright__ = "Copyright (C) 2020 Xiaoyu Wei" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import logging +import numpy as np +import pyopencl as cl +from volumential.tools import ScalarFieldExpressionEvaluation as Eval + +import pymbolic as pmbl +from functools import partial + + +logger = logging.getLogger(__name__) + +if 1: + # verbose + logging.basicConfig(level=logging.INFO) +else: + # clean + logging.basicConfig(level=logging.CRITICAL) + + +def main(): + + print("*************************") + print("* Setting up...") + print("*************************") + + dim = 2 + + download_table = False + table_filename = "nft_helmholtz2d.hdf5" + + print("Using table cache:", table_filename) + + q_order = 4 # quadrature order + n_levels = 7 # 2^(n_levels-1) subintervals in 1D + + dtype = np.complex128 + m_order = 20 # multipole order + force_direct_evaluation = False + + print("Multipole order =", m_order) + + k = 3 + alpha = 160 + + from sumpy.kernel import HelmholtzKernel + integral_kernel = HelmholtzKernel(dim) + out_kernels = [integral_kernel] + + # FIXME: specify dtype from the kernel interface + extra_kernel_kwargs = {"k": np.float64(k)} + + x = pmbl.var("x") + y = pmbl.var("y") + expp = pmbl.var("exp") + + norm2 = x ** 2 + y ** 2 + solu_expr = expp(-alpha * norm2) + source_expr = -( + (4 * alpha ** 2 * norm2 - 4 * alpha) * expp(-alpha * norm2) + + k**2 * solu_expr + ) + + logger.info("Source expr: " + str(source_expr)) + logger.info("Solu expr: " + str(solu_expr)) + + # bounding box + a = -1 + b = 1 + root_table_source_extent = 2 + + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + + source_eval = Eval(dim, source_expr, [x, y]) + + # {{{ generate quad points + + import volumential.meshgen as mg + + # Show meshgen info + mg.greet() + + mesh = mg.MeshGen2D(q_order, n_levels, a, b, queue=queue) + mesh.print_info() + q_points = mesh.get_q_points() + q_weights = mesh.get_q_weights() + assert len(q_points) == len(q_weights) + assert q_points.shape[1] == dim + + q_points = np.ascontiguousarray(np.transpose(q_points)) + + from pytools.obj_array import make_obj_array + + q_points = make_obj_array( + [cl.array.to_device(queue, q_points[i]) for i in range(dim)]) + + q_weights = cl.array.to_device(queue, q_weights) + # q_radii = cl.array.to_device(queue, q_radii) + + # }}} + + # {{{ discretize the source field + + source_vals = cl.array.to_device( + queue, source_eval(queue, np.array([coords.get() for coords in q_points])) + ) + + # particle_weigt = source_val * q_weight + + # }}} End discretize the source field + + # {{{ build tree and traversals + + from boxtree.tools import AXIS_NAMES + + axis_names = AXIS_NAMES[:dim] + + from pytools import single_valued + + coord_dtype = single_valued(coord.dtype for coord in q_points) + from boxtree.bounding_box import make_bounding_box_dtype + + bbox_type, _ = make_bounding_box_dtype(ctx.devices[0], dim, coord_dtype) + bbox = np.empty(1, bbox_type) + for ax in axis_names: + bbox["min_" + ax] = a + bbox["max_" + ax] = b + + # tune max_particles_in_box to reconstruct the mesh + # TODO: use points from FieldPlotter are used as target points for better + # visuals + from boxtree import TreeBuilder + + tb = TreeBuilder(ctx) + tree, _ = tb( + queue, + particles=q_points, + targets=q_points, + bbox=bbox, + max_particles_in_box=q_order ** 2 * 4 - 1, + kind="adaptive-level-restricted", + ) + + bbox2 = np.array([[a, b], [a, b]]) + tree2, _ = tb( + queue, + particles=q_points, + targets=q_points, + bbox=bbox2, + max_particles_in_box=q_order ** 2 * 4 - 1, + kind="adaptive-level-restricted", + ) + + from boxtree.traversal import FMMTraversalBuilder + + tg = FMMTraversalBuilder(ctx) + trav, _ = tg(queue, tree) + + # }}} End build tree and traversals + + # {{{ build near field potential table + + from volumential.table_manager import NearFieldInteractionTableManager + import os + + if download_table and (not os.path.isfile(table_filename)): + import json + with open("table_urls.json", 'r') as fp: + urls = json.load(fp) + + print("Downloading table from %s" % urls['Helmholtz2D']) + import subprocess + subprocess.call(["wget", "-q", urls['Helmholtz2D'], table_filename]) + + tm = NearFieldInteractionTableManager( + table_filename, root_extent=root_table_source_extent, + queue=queue, dtype=dtype) + + assert (abs( + int((b - a) / root_table_source_extent) * root_table_source_extent + - (b - a)) < 1e-15) + nftable = [] + + # FIXME: need at leat two levels even for uniform trees? + for lev in range(tree.nlevels - 1, tree.nlevels + 1): + print("Getting table at level", lev) + tb, _ = tm.get_table( + dim, kernel_type="Helmholtz", sumpy_knl=integral_kernel, + q_order=q_order, source_box_level=lev, + compute_method="DrosteSum", queue=queue, + n_brick_quad_points=50, adaptive_level=True, + adaptive_quadrature=True, use_symmetry=True, + alpha=0.1, nlevels=15, + extra_kernel_kwargs=extra_kernel_kwargs + ) + nftable.append(tb) + + print("Using table list of length", len(nftable)) + + # }}} End build near field potential table + + # {{{ sumpy expansion for laplace kernel + + from sumpy.expansion import DefaultExpansionFactory + expn_factory = DefaultExpansionFactory() + local_expn_class = expn_factory.get_local_expansion_class(integral_kernel) + mpole_expn_class = expn_factory.get_multipole_expansion_class(integral_kernel) + + exclude_self = True + + from volumential.expansion_wrangler_fpnd import ( + FPNDExpansionWranglerCodeContainer, + FPNDExpansionWrangler + ) + + wcc = FPNDExpansionWranglerCodeContainer( + ctx, + partial(mpole_expn_class, integral_kernel), + partial(local_expn_class, integral_kernel), + out_kernels, + exclude_self=exclude_self, + ) + + if exclude_self: + target_to_source = np.arange(tree.ntargets, dtype=np.int32) + self_extra_kwargs = {"target_to_source": target_to_source} + else: + self_extra_kwargs = {} + + # FIXME: unify naming convention: kernel_extra_kwargs vs extra_kernel_kwargs + wrangler = FPNDExpansionWrangler( + code_container=wcc, + queue=queue, tree=tree, + near_field_table=nftable, + dtype=dtype, + fmm_level_to_order=lambda kernel, kernel_args, tree, lev: m_order, + quad_order=q_order, + self_extra_kwargs=self_extra_kwargs, + kernel_extra_kwargs=extra_kernel_kwargs + ) + + # }}} End sumpy expansion for laplace kernel + + print("*************************") + print("* Performing FMM ...") + print("*************************") + + # {{{ conduct fmm computation + + from volumential.volume_fmm import drive_volume_fmm + + import time + queue.finish() + + t0 = time.time() + + pot, = drive_volume_fmm( + trav, + wrangler, + source_vals * q_weights, + source_vals, + direct_evaluation=force_direct_evaluation, + ) + queue.finish() + + t1 = time.time() + + print("Finished in %.2f seconds." % (t1 - t0)) + print("(%e points per second)" % ( + len(q_weights) / (t1 - t0) + )) + + # }}} End conduct fmm computation + + print("*************************") + print("* Postprocessing ...") + print("*************************") + + # {{{ postprocess and plot + + # print(pot) + + solu_eval = Eval(dim, solu_expr, [x, y]) + + x = q_points[0].get() + y = q_points[1].get() + ze = solu_eval(queue, np.array([x, y])) + zs = pot.get() + + print_error = True + if print_error: + err = np.max(np.abs(ze - zs)) + print("Error =", err) + + # Interpolated surface + if 0: + h = 0.005 + out_x = np.arange(a, b + h, h) + out_y = np.arange(a, b + h, h) + oxx, oyy = np.meshgrid(out_x, out_y) + out_targets = make_obj_array( + [ + cl.array.to_device(queue, oxx.flatten()), + cl.array.to_device(queue, oyy.flatten()), + ] + ) + + from volumential.volume_fmm import interpolate_volume_potential + + # src = source_field([q.get() for q in q_points]) + # src = cl.array.to_device(queue, src) + interp_pot = interpolate_volume_potential(out_targets, trav, wrangler, pot) + opot = interp_pot.get() + + import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import Axes3D + + plt3d = plt.figure() + ax = Axes3D(plt3d) # noqa + surf = ax.plot_surface(oxx, oyy, opot.reshape(oxx.shape)) # noqa + # ax.scatter(x, y, src.get()) + # ax.set_zlim(-0.25, 0.25) + + plt.draw() + plt.show() + + # Boxtree + if 0: + import matplotlib.pyplot as plt + + if dim == 2: + # plt.plot(q_points[0].get(), q_points[1].get(), ".") + pass + + from boxtree.visualization import TreePlotter + + plotter = TreePlotter(tree.get(queue=queue)) + plotter.draw_tree(fill=False, edgecolor="black") + # plotter.draw_box_numbers() + plotter.set_bounding_box() + plt.gca().set_aspect("equal") + + plt.draw() + # plt.show() + plt.savefig("tree.png") + + # Direct p2p + if 0: + print("Performing P2P") + pot_direct, = drive_volume_fmm( + trav, wrangler, source_vals * q_weights, + source_vals, direct_evaluation=True) + zds = pot_direct.get() + zs = pot.get() + + print("P2P-FMM diff =", np.max(np.abs(zs - zds))) + + print("P2P Error =", np.max(np.abs(ze - zds))) + + """ + import matplotlib.pyplot as plt + import matplotlib.cm as cm + x = q_points[0].get() + y = q_points[1].get() + plt.scatter(x, y, c=np.log(abs(zs-zds)) / np.log(10), cmap=cm.jet) + plt.colorbar() + + plt.xlabel("Multipole order = " + str(m_order)) + + plt.draw() + plt.show() + """ + + # Scatter plot + if 0: + import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import Axes3D + + x = q_points[0].get() + y = q_points[1].get() + ze = solu_eval(queue, np.array([x, y])) + zs = pot.get() + + plt3d = plt.figure() + ax = Axes3D(plt3d) + ax.scatter(x, y, zs, s=1) + # ax.scatter(x, y, source_field([q.get() for q in q_points]), s=1) + # import matplotlib.cm as cm + + # ax.scatter(x, y, zs, c=np.log(abs(zs-zds)), cmap=cm.jet) + # plt.gca().set_aspect("equal") + + # ax.set_xlim3d([-1, 1]) + # ax.set_ylim3d([-1, 1]) + # ax.set_zlim3d([np.min(z), np.max(z)]) + # ax.set_zlim3d([-0.002, 0.00]) + + plt.draw() + plt.show() + # plt.savefig("exact.png") + + # }}} End postprocess and plot + + +if __name__ == '__main__': + main() + + +# vim: filetype=pyopencl:foldmethod=marker diff --git a/examples/volume_potential_2d.py b/examples/laplace2d.py similarity index 100% rename from examples/volume_potential_2d.py rename to examples/laplace2d.py diff --git a/examples/volume_potential_3d.py b/examples/laplace3d.py similarity index 100% rename from examples/volume_potential_3d.py rename to examples/laplace3d.py diff --git a/volumential/droste.py b/volumential/droste.py index 55716ada303d6c33df9f0625409369a8abfdeae5..45845df53b4c281c023d31e05c1c8c8c4e34c0f8 100644 --- a/volumential/droste.py +++ b/volumential/droste.py @@ -185,8 +185,6 @@ class DrosteBase(KernelCacheWrapper): retain_names=[result_name], complex_dtype=np.complex128, ) - # for i in loopy_insns: - # print(i) return loopy_insns def get_sumpy_kernel_eval_insns(self): @@ -336,7 +334,10 @@ class DrosteBase(KernelCacheWrapper): if "result_dtype" in kwargs: result_dtype = kwargs["result_dtype"] else: - result_dtype = np.float64 + if self.integral_knl.is_complex_valued: + result_dtype = np.complex128 + else: + result_dtype = np.float64 # allocate return arrays if self.dim == 1: @@ -593,7 +594,9 @@ class DrosteBase(KernelCacheWrapper): return mapped_q_points[q_points_ordering] def postprocess_cheb_table(self, cheb_table, cheb_coefs): - nfp_table = np.zeros([self.n_q_points, ] + list(cheb_table.shape[self.dim:])) + nfp_table = np.zeros( + [self.n_q_points, ] + list(cheb_table.shape[self.dim:]), + dtype=cheb_table.dtype) # transform to interpolatory basis functions # NOTE: the reversed order of indices, e.g., @@ -946,7 +949,10 @@ class DrosteReduced(DrosteBase): if "result_dtype" in kwargs: result_dtype = kwargs["result_dtype"] else: - result_dtype = np.float64 + if self.integral_knl.is_complex_valued: + result_dtype = np.complex128 + else: + result_dtype = np.float64 # allocate return arrays if self.dim == 1: @@ -1035,8 +1041,6 @@ class DrosteReduced(DrosteBase): ) ] - # print(ext_ids) - # The idea is that, any member of the hyperoctahedral group # has the decomposition m = f * s, where f is a flip and s # is a swap. @@ -1068,7 +1072,6 @@ class DrosteReduced(DrosteBase): + " % 2 == 0, 1, -1)" ) - # print(', '.join(ext_tgt_ordering)) ext_tgt_ordering.reverse() ext_fun_ordering.reverse() @@ -1304,7 +1307,6 @@ class DrosteReduced(DrosteBase): nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, - **extra_kernel_kwargs ) cheb_table_case = res2["result"] @@ -1312,49 +1314,35 @@ class DrosteReduced(DrosteBase): def build_cheb_table(self, queue, **kwargs): # Build the table using Cheb modes as sources + + if self.integral_knl.is_complex_valued: + dtype = np.complex128 + else: + dtype = np.float64 + if self.dim == 1: cheb_table = ( - np.zeros((self.nfunctions, self.ntgt_points, self.ncases)) + np.nan - ) + np.zeros((self.nfunctions, self.ntgt_points, self.ncases), + dtype) + np.nan) elif self.dim == 2: cheb_table = ( - np.zeros( - ( - self.nfunctions, - self.nfunctions, - self.ntgt_points, - self.ntgt_points, - self.ncases, - ) - ) - + np.nan - ) + np.zeros((self.nfunctions, self.nfunctions, + self.ntgt_points, self.ntgt_points, + self.ncases,), dtype) + np.nan) elif self.dim == 3: cheb_table = ( - np.zeros( - ( - self.nfunctions, - self.nfunctions, - self.nfunctions, - self.ntgt_points, - self.ntgt_points, - self.ntgt_points, - self.ncases, - ) - ) - + np.nan - ) + np.zeros((self.nfunctions, self.nfunctions, self.nfunctions, + self.ntgt_points, self.ntgt_points, self.ntgt_points, + self.ncases,), dtype) + np.nan) else: raise NotImplementedError for base_case_id in range(self.nbcases): - # print(base_case_id) self.current_base_case = base_case_id case_id = self.reduce_by_symmetry.reduced_vec_ids[base_case_id] cheb_table[..., case_id] = self.call_loopy_kernel_case( queue, base_case_id, **kwargs )[..., 0] - # print("Loopy kernel finished") # {{{ expansion by symmetry flippable, swappable_groups = \ @@ -1404,10 +1392,6 @@ class DrosteReduced(DrosteBase): .copy() ) - # if case_id == 69: - # print("after swapping:", - # np.linalg.norm(tmp_table_part - cheb_table[...,case_id])) - # apply f, also figure out the rule for sign changes for fid in range(nflippables): if ext_id[0][fid]: @@ -1435,12 +1419,6 @@ class DrosteReduced(DrosteBase): tmp_table_part, (self.dim - 1 - iaxis) + self.dim ) - # if case_id == 69: - # print("after flipping:", - # np.linalg.norm(tmp_table_part - cheb_table[...,case_id])) - # print("Filling by symmetry", - # base_case_vec, "-->", ext_vec, swapped_axes, ext_id) - assert tuple(ext_vec) in self.reduce_by_symmetry.full_vecs ext_case_id = self.reduce_by_symmetry.full_vecs.index(tuple(ext_vec)) cheb_table[..., ext_case_id] = tmp_table_part diff --git a/volumential/expansion_wrangler_fpnd.py b/volumential/expansion_wrangler_fpnd.py index b89c48e56ef8efe4749a75cf4ec8091bd34ac2f1..fe9b059eb497d4cb8add2786b75ab4da2876a5fd 100644 --- a/volumential/expansion_wrangler_fpnd.py +++ b/volumential/expansion_wrangler_fpnd.py @@ -381,19 +381,20 @@ class FPNDSumpyExpansionWrangler( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].data), - ) + ), dtype=self.near_field_table[kname][0].data.dtype ) mode_nmlz_combined = np.zeros( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].mode_normalizers), - ) + ), dtype=self.near_field_table[kname][0].mode_normalizers.dtype ) exterior_mode_nmlz_combined = np.zeros( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].kernel_exterior_normalizers), - ) + ), + dtype=self.near_field_table[kname][0].kernel_exterior_normalizers.dtype ) for lev in range(len(self.near_field_table[kname])): table_data_combined[lev, :] = self.near_field_table[kname][lev].data @@ -994,13 +995,13 @@ class FPNDFMMLibExpansionWrangler( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].data), - ) + ), dtype=self.near_field_table[kname][0].data.dtype ) mode_nmlz_combined = np.zeros( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].mode_normalizers), - ) + ), dtype=self.near_field_table[kname][0].mode_normalizers.dtype ) for lev in range(len(self.near_field_table[kname])): table_data_combined[lev, :] = self.near_field_table[kname][lev].data @@ -1011,7 +1012,8 @@ class FPNDFMMLibExpansionWrangler( ( len(self.near_field_table[kname]), len(self.near_field_table[kname][0].kernel_exterior_normalizers), - ) + ), + dtype=self.near_field_table[kname][0].kernel_exterior_normalizers.dtype ) for lev in range(len(self.near_field_table[kname])): table_data_combined[lev, :] = self.near_field_table[kname][lev].data diff --git a/volumential/list1.py b/volumential/list1.py index ad7d9ca95914bb14045f814ac6448cab69b66aba..e845589c9bc7973dfb82dae2555e884de6070ca4 100644 --- a/volumential/list1.py +++ b/volumential/list1.py @@ -305,6 +305,11 @@ class NearFieldFromCSR(NearFieldEvalBase): def get_kernel(self): + if self.integral_kernel.is_complex_valued: + potential_dtype = np.complex128 + else: + potential_dtype = np.float64 + lpknl = loopy.make_kernel( # NOQA [ "{ [ tbox ] : 0 <= tbox < n_tgt_boxes }", @@ -411,11 +416,12 @@ class NearFieldFromCSR(NearFieldEvalBase): loopy.TemporaryVariable("tgt_table_lev", np.int32), loopy.TemporaryVariable("tgt_table_lev_tmp", np.float64), loopy.ValueArg("encoding_base", np.int32), - loopy.GlobalArg("mode_nmlz", np.float64, "n_tables, n_q_points"), - loopy.GlobalArg("exterior_mode_nmlz", np.float64, - "n_tables, n_q_points"), - loopy.GlobalArg("table_data", np.float64, - "n_tables, n_table_entries"), + loopy.GlobalArg("mode_nmlz", potential_dtype, + "n_tables, n_q_points"), + loopy.GlobalArg("exterior_mode_nmlz", potential_dtype, + "n_tables, n_q_points"), + loopy.GlobalArg("table_data", potential_dtype, + "n_tables, n_table_entries"), loopy.GlobalArg("source_boxes", np.int32, "n_source_boxes"), loopy.GlobalArg("box_centers", None, "dim, aligned_nboxes"), loopy.ValueArg("aligned_nboxes", np.int32), @@ -430,9 +436,7 @@ class NearFieldFromCSR(NearFieldEvalBase): lang_version=(2018, 2) ) - # print(lpknl) # lpknl = loopy.set_options(lpknl, write_code=True) - lpknl = loopy.set_options(lpknl, return_dict=True) return lpknl @@ -442,6 +446,7 @@ class NearFieldFromCSR(NearFieldEvalBase): type(self).__name__, self.name, self.kname, + "complex_kernel=" + str(self.integral_kernel.is_complex_valued), "potential_kind=%d" % self.potential_kind, "infer_scaling=" + str(self.extra_kwargs["infer_kernel_scaling"]), "scaling_policy=" + self.codegen_compute_scaling(), diff --git a/volumential/volume_fmm.py b/volumential/volume_fmm.py index cb2848191e0b045a12cb07ef0b34d9095350f089..4504f9fbcde3c6b2d6717af384adeab0f2abe529 100644 --- a/volumential/volume_fmm.py +++ b/volumential/volume_fmm.py @@ -105,8 +105,6 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, ) recorder.add("form_multipoles", timing_future) - # print(max(abs(mpole_exps))) - # }}} # {{{ Propagate multipoles upward @@ -119,8 +117,6 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, ) recorder.add("coarsen_multipoles", timing_future) - # print(max(abs(mpole_exps))) - # mpole_exps is called Phi in [1] # }}} @@ -188,8 +184,13 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, value_dtypes=[wrangler.dtype], ) - p2p_extra_kwargs = dict(wrangler.self_extra_kwargs) - p2p_extra_kwargs.update(wrangler.kernel_extra_kwargs) + if hasattr(wrangler, "self_extra_kwargs"): + p2p_extra_kwargs = dict(wrangler.self_extra_kwargs) + else: + p2p_extra_kwargs = {} + + if hasattr(wrangler, "self_extra_kwargs"): + p2p_extra_kwargs.update(wrangler.kernel_extra_kwargs) evt, (ref_pot,) = p2p( wrangler.queue, @@ -240,8 +241,6 @@ def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, ) recorder.add("multipole_to_local", timing_future) - # print(max(abs(local_exps))) - # local_exps represents both Gamma and Delta in [1] # }}}