diff --git a/sumpy/__init__.py b/sumpy/__init__.py index bb1c7069487881b5ba1836bb4c4759080a8a8640..04bdb31823c9da64b15ded4beffb3d761a2de598 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -22,6 +22,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import os from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR @@ -36,13 +37,18 @@ __all__ = [ "E2EFromCSR", "E2EFromChildren", "E2EFromParent"] -code_cache = PersistentDict("sumpy-code-cache-v4-"+VERSION_TEXT) +code_cache = PersistentDict("sumpy-code-cache-v5-"+VERSION_TEXT) # {{{ cache control CACHING_ENABLED = True +CACHING_ENABLED = ( + "SUMPY_NO_CACHE" not in os.environ + and + "CG_NO_CACHE" not in os.environ) + def set_caching_enabled(flag): """Set whether :mod:`loopy` is allowed to use disk caching for its various @@ -53,7 +59,7 @@ def set_caching_enabled(flag): class CacheMode(object): - """A context manager for setting whether :mod:`loopy` is allowed to use + """A context manager for setting whether :mod:`sumpy` is allowed to use disk caches. """ diff --git a/sumpy/codegen.py b/sumpy/codegen.py index f23822828409eba0e007094e777bf4bc36f2837a..ac746fcc091084f0ea1d324729d2cac9a466284a 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -71,6 +71,26 @@ BESSEL_PREAMBLE = """//CL// #include <pyopencl-bessel-j.cl> #include <pyopencl-bessel-y.cl> #include <pyopencl-bessel-j-complex.cl> + +typedef struct bessel_j_two_result_str +{ + cdouble_t jv, jvp1; +} bessel_j_two_result; + +bessel_j_two_result bessel_jv_two(int v, double z) +{ + bessel_j_two_result result; + result.jv = cdouble_fromreal(bessel_jv(v, z)); + result.jvp1 = cdouble_fromreal(bessel_jv(v+1, z)); + return result; +} + +bessel_j_two_result bessel_jv_two_complex(int v, cdouble_t z) +{ + bessel_j_two_result result; + bessel_j_complex(v, z, &result.jv, &result.jvp1); + return result; +} """ HANKEL_PREAMBLE = """//CL// @@ -81,11 +101,11 @@ typedef struct hank1_01_result_str cdouble_t order0, order1; } hank1_01_result; -hank1_01_result hank1_01(cdouble_t z) +hank1_01_result hank1_01(double z) { hank1_01_result result; - result.order0 = cdouble_new(bessel_j0(z.x), bessel_y0(z.x)); - result.order1 = cdouble_new(bessel_j1(z.x), bessel_y1(z.x)); + result.order0 = cdouble_new(bessel_j0(z), bessel_y0(z)); + result.order1 = cdouble_new(bessel_j1(z), bessel_y1(z)); return result; } @@ -107,9 +127,11 @@ def bessel_preamble_generator(target, seen_dtypes, seen_functions): if any(func.name == "hank1_01" for func in seen_functions): yield ("50-sumpy-hankel", HANKEL_PREAMBLE) require_bessel = True - if require_bessel or any(func.name == "bessel_jv" for func in seen_functions): + if (require_bessel + or any(func.name == "bessel_jv_two" for func in seen_functions)): yield ("40-sumpy-bessel", BESSEL_PREAMBLE) + hank1_01_result_dtype = cl.tools.get_or_register_dtype("hank1_01_result", np.dtype([ ("order0", np.complex128), @@ -117,6 +139,13 @@ hank1_01_result_dtype = cl.tools.get_or_register_dtype("hank1_01_result", ]), ) +bessel_j_two_result_dtype = cl.tools.get_or_register_dtype("bessel_j_two_result", + np.dtype([ + ("jv", np.complex128), + ("jvp1", np.complex128), + ]), + ) + def bessel_mangler(target, identifier, arg_dtypes): """A function "mangler" to make Bessel functions @@ -132,11 +161,21 @@ def bessel_mangler(target, identifier, arg_dtypes): if identifier == "hank1_01": if arg_dtypes[0].kind == "c": identifier = "hank1_01_complex" + return (np.dtype(hank1_01_result_dtype), + identifier, (np.dtype(np.complex128),)) + else: + return (np.dtype(hank1_01_result_dtype), + identifier, (np.dtype(np.float64),)) + + elif identifier == "bessel_jv_two": + if arg_dtypes[1].kind == "c": + identifier = "bessel_jv_two_complex" + return (np.dtype(bessel_j_two_result_dtype), + identifier, (np.dtype(np.int32), np.dtype(np.complex128),)) + else: + return (np.dtype(bessel_j_two_result_dtype), + identifier, (np.dtype(np.int32), np.dtype(np.float64),)) - return (np.dtype(hank1_01_result_dtype), - identifier, (np.dtype(np.complex128),)) - elif identifier == "bessel_jv": - return np.dtype(np.float64), identifier else: return None @@ -150,8 +189,8 @@ class BesselGetter(object): return prim.Variable("hank1_01")(arg) @memoize_method - def bessel_j_impl(self, order, arg): - return prim.Variable("bessel_jv")(order, arg) + def bessel_jv_two(self, order, arg): + return prim.Variable("bessel_jv_two")(order, arg) @memoize_method def hankel_1(self, order, arg): @@ -183,14 +222,14 @@ class BesselGetter(object): def bessel_j(self, order, arg): top_order = self.bessel_j_arg_to_top_order[arg] + bessel_two = ( + prim.CommonSubexpression(self.bessel_jv_two(top_order-1, arg), + "bessel_jv_two_result")) + if order == top_order: - return prim.CommonSubexpression( - self.bessel_j_impl(order, arg), - "bessel_j_%d" % order) + return prim.Lookup(bessel_two, "jvp1") elif order == top_order-1: - return prim.CommonSubexpression( - self.bessel_j_impl(order, arg), - "bessel_j_%d" % order) + return prim.Lookup(bessel_two, "jv") elif order < 0: return (-1)**order*self.bessel_j(-order, arg) else: diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9ea1ef2ac65c668a3346f9ab73963a8d59a2fec9..3138ce19ed403bf02894226dc7baecbde63d7f8e 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -168,16 +168,17 @@ class LayerPotentialBase(KernelComputation): import pyopencl as cl dev = self.context.devices[0] - if dev.type & cl.device_type.CPU: - loopy_knl = lp.split_iname(loopy_knl, "itgt", 16, outer_tag="g.0", - inner_tag="l.0") - loopy_knl = lp.split_iname(loopy_knl, "isrc", 256) - loopy_knl = lp.set_loop_priority(loopy_knl, - ["isrc_outer", "itgt_inner"]) - else: - from warnings import warn - warn("don't know how to tune layer potential computation for '%s'" % dev) - loopy_knl = lp.split_iname(loopy_knl, "itgt", 128, outer_tag="g.0") + if 0: + if dev.type & cl.device_type.CPU: + loopy_knl = lp.split_iname(loopy_knl, "itgt", 16, outer_tag="g.0", + inner_tag="l.0") + loopy_knl = lp.split_iname(loopy_knl, "isrc", 256) + loopy_knl = lp.set_loop_priority(loopy_knl, + ["isrc_outer", "itgt_inner"]) + else: + from warnings import warn + warn("don't know how to tune layer potential computation for '%s'" % dev) + loopy_knl = lp.split_iname(loopy_knl, "itgt", 128, outer_tag="g.0") return loopy_knl @@ -250,6 +251,8 @@ class LayerPotentialMatrixGenerator(LayerPotentialBase): def __call__(self, queue, targets, sources, centers, **kwargs): knl = self.get_optimized_kernel() + knl = lp.set_options(knl, cl_build_options=["-g", "-O0"]) + return knl(queue, src=sources, tgt=targets, center=centers, **kwargs) diff --git a/test/test_kernels.py b/test/test_kernels.py index ad53759e12126f7eca109952075f6dcb18f656bd..c44324d5df2d2c9aab349a7201cee9dc3220b33c 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -88,21 +88,26 @@ def test_p2p(ctx_getter): assert rel_err < 1e-3 -@pytest.mark.parametrize("order", [2, 3, 4, 5]) -@pytest.mark.parametrize(("knl", "expn_class"), [ +@pytest.mark.parametrize("order", [4]) +@pytest.mark.parametrize(("base_knl", "expn_class"), [ (LaplaceKernel(2), VolumeTaylorLocalExpansion), (LaplaceKernel(2), VolumeTaylorMultipoleExpansion), (HelmholtzKernel(2), VolumeTaylorMultipoleExpansion), (HelmholtzKernel(2), VolumeTaylorLocalExpansion), (HelmholtzKernel(2), H2DLocalExpansion), - (HelmholtzKernel(2), H2DMultipoleExpansion) + (HelmholtzKernel(2), H2DMultipoleExpansion), + + (HelmholtzKernel(2, allow_evanescent=True), VolumeTaylorMultipoleExpansion), + (HelmholtzKernel(2, allow_evanescent=True), VolumeTaylorLocalExpansion), + (HelmholtzKernel(2, allow_evanescent=True), H2DLocalExpansion), + (HelmholtzKernel(2, allow_evanescent=True), H2DMultipoleExpansion), ]) @pytest.mark.parametrize("with_source_derivative", [ False, True ]) -def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): +def test_p2e2p(ctx_getter, base_knl, expn_class, order, with_source_derivative): #logging.basicConfig(level=logging.INFO) from sympy.core.cache import clear_cache @@ -117,11 +122,16 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): nsources = 100 extra_kwargs = {} - if isinstance(knl, HelmholtzKernel): - extra_kwargs["k"] = 0.05 + if isinstance(base_knl, HelmholtzKernel): + if base_knl.allow_evanescent: + extra_kwargs["k"] = 0.2 * (0.707 + 0.707j) + else: + extra_kwargs["k"] = 0.2 if with_source_derivative: - knl = DirectionalSourceDerivative(knl, "dir_vec") + knl = DirectionalSourceDerivative(base_knl, "dir_vec") + else: + knl = base_knl out_kernels = [ knl, @@ -277,6 +287,14 @@ def test_p2e2p(ctx_getter, knl, expn_class, order, with_source_derivative): slack += 1 grad_slack += 2 + if isinstance(base_knl, HelmholtzKernel) and base_knl.allow_evanescent: + slack += 0.5 + grad_slack += 0.5 + + if expn_class is VolumeTaylorMultipoleExpansion: + slack += 0.3 + grad_slack += 0.3 + assert eoc_rec_pot.order_estimate() > tgt_order - slack assert eoc_rec_grad_x.order_estimate() > tgt_order_grad - grad_slack