From 375adc07bf4424c3cd2f385de028e63274c2a1e0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 25 Jun 2012 20:59:59 -0400 Subject: [PATCH] Add Bessel Y. --- pyopencl/clmath.py | 15 +- pyopencl/elementwise.py | 10 +- src/cl/pyopencl-airy.cl | 2 +- src/cl/pyopencl-bessel-j.cl | 124 ++++------ src/cl/pyopencl-bessel-y.cl | 435 ++++++++++++++++++++++++++++++++++++ src/cl/pyopencl-cephes.cl | 69 ------ src/cl/pyopencl-eval-tbl.cl | 120 ++++++++++ test/test_clmath.py | 92 +++++--- 8 files changed, 670 insertions(+), 197 deletions(-) create mode 100644 src/cl/pyopencl-bessel-y.cl delete mode 100644 src/cl/pyopencl-cephes.cl create mode 100644 src/cl/pyopencl-eval-tbl.cl diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py index 56b98ec3..f47d71e7 100644 --- a/pyopencl/clmath.py +++ b/pyopencl/clmath.py @@ -157,13 +157,18 @@ trunc = _make_unary_array_func("trunc") @cl_array.elwise_kernel_runner def _bessel_jn(result, sig, exp): - return elementwise.get_bessel_jn_kernel(result.context) + return elementwise.get_bessel_kernel(result.context, "j") + +@cl_array.elwise_kernel_runner +def _bessel_yn(result, sig, exp): + return elementwise.get_bessel_kernel(result.context, "y") def bessel_jn(n, x, queue=None): - """Return a new array of floating point values composed from the - entries of `significand` and `exponent`, paired together as - `result = significand * 2**exponent`. - """ result = x._new_like_me(queue=queue) _bessel_jn(result, n, x) return result + +def bessel_yn(n, x, queue=None): + result = x._new_like_me(queue=queue) + _bessel_yn(result, n, x) + return result diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 6efd6498..ed755d13 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -643,14 +643,14 @@ def get_ldexp_kernel(context): @context_dependent_memoize -def get_bessel_jn_kernel(context): +def get_bessel_kernel(context, which_func): return get_elwise_kernel(context, "double *z, int ord_n, double *x", - "z[i] = bessel_jn(ord_n, x[i])", - name="bessel_jn_kernel", + "z[i] = bessel_%sn(ord_n, x[i])" % which_func, + name="bessel_%sn_kernel" % which_func, preamble=""" - #include - """) + #include + """ % which_func) @context_dependent_memoize diff --git a/src/cl/pyopencl-airy.cl b/src/cl/pyopencl-airy.cl index 23f1dae3..41d24036 100644 --- a/src/cl/pyopencl-airy.cl +++ b/src/cl/pyopencl-airy.cl @@ -8,7 +8,7 @@ #pragma once -#include +#include __constant const double airy_maxairy = 103.892; diff --git a/src/cl/pyopencl-bessel-j.cl b/src/cl/pyopencl-bessel-j.cl index 229c166f..9801c7a6 100644 --- a/src/cl/pyopencl-bessel-j.cl +++ b/src/cl/pyopencl-bessel-j.cl @@ -19,52 +19,16 @@ #pragma once -#include +#include #include -typedef double T; - -// {{{ evaluate_rational - -T evaluate_rational_backend(__constant const T* num, __constant const T* denom, T z, int count) -{ - T s1, s2; - if(z <= 1) - { - s1 = num[count-1]; - s2 = denom[count-1]; - for(int i = (int)count - 2; i >= 0; --i) - { - s1 *= z; - s2 *= z; - s1 += num[i]; - s2 += denom[i]; - } - } - else - { - z = 1 / z; - s1 = num[0]; - s2 = denom[0]; - for(unsigned i = 1; i < count; ++i) - { - s1 *= z; - s2 *= z; - s1 += num[i]; - s2 += denom[i]; - } - } - return s1 / s2; -} - -// }}} - -#define evaluate_rational(num, denom, z) \ - evaluate_rational_backend(num, denom, z, sizeof(num)/sizeof(T)) +typedef double bessel_j_scalar_type; +// FIXME: T is really a bad name +typedef bessel_j_scalar_type T; // {{{ bessel_j0 -__constant const T bessel_j0_P1[] = { +__constant const bessel_j_scalar_type bessel_j0_P1[] = { -4.1298668500990866786e+11, 2.7282507878605942706e+10, -6.2140700423540120665e+08, @@ -73,7 +37,7 @@ __constant const T bessel_j0_P1[] = { 1.0344222815443188943e+02, -1.2117036164593528341e-01 }; -__constant const T bessel_j0_Q1[] = { +__constant const bessel_j_scalar_type bessel_j0_Q1[] = { 2.3883787996332290397e+12, 2.6328198300859648632e+10, 1.3985097372263433271e+08, @@ -82,7 +46,7 @@ __constant const T bessel_j0_Q1[] = { 1.0, 0.0 }; -__constant const T bessel_j0_P2[] = { +__constant const bessel_j_scalar_type bessel_j0_P2[] = { -1.8319397969392084011e+03, -1.2254078161378989535e+04, -7.2879702464464618998e+03, @@ -92,7 +56,7 @@ __constant const T bessel_j0_P2[] = { 7.4321196680624245801e+02, 4.8591703355916499363e+01 }; -__constant const T bessel_j0_Q2[] = { +__constant const bessel_j_scalar_type bessel_j0_Q2[] = { -3.5783478026152301072e+05, 2.4599102262586308984e+05, -8.4055062591169562211e+04, @@ -102,7 +66,7 @@ __constant const T bessel_j0_Q2[] = { -2.5258076240801555057e+01, 1.0 }; -__constant const T bessel_j0_PC[] = { +__constant const bessel_j_scalar_type bessel_j0_PC[] = { 2.2779090197304684302e+04, 4.1345386639580765797e+04, 2.1170523380864944322e+04, @@ -110,7 +74,7 @@ __constant const T bessel_j0_PC[] = { 1.5376201909008354296e+02, 8.8961548424210455236e-01 }; -__constant const T bessel_j0_QC[] = { +__constant const bessel_j_scalar_type bessel_j0_QC[] = { 2.2779090197304684318e+04, 4.1370412495510416640e+04, 2.1215350561880115730e+04, @@ -118,7 +82,7 @@ __constant const T bessel_j0_QC[] = { 1.5711159858080893649e+02, 1.0 }; -__constant const T bessel_j0_PS[] = { +__constant const bessel_j_scalar_type bessel_j0_PS[] = { -8.9226600200800094098e+01, -1.8591953644342993800e+02, -1.1183429920482737611e+02, @@ -126,7 +90,7 @@ __constant const T bessel_j0_PS[] = { -1.2441026745835638459e+00, -8.8033303048680751817e-03 }; -__constant const T bessel_j0_QS[] = { +__constant const bessel_j_scalar_type bessel_j0_QS[] = { 5.7105024128512061905e+03, 1.1951131543434613647e+04, 7.2642780169211018836e+03, @@ -135,16 +99,16 @@ __constant const T bessel_j0_QS[] = { 1.0 }; -T bessel_j0(T x) +bessel_j_scalar_type bessel_j0(bessel_j_scalar_type x) { - const T x1 = 2.4048255576957727686e+00, + const bessel_j_scalar_type x1 = 2.4048255576957727686e+00, x2 = 5.5200781102863106496e+00, x11 = 6.160e+02, x12 = -1.42444230422723137837e-03, x21 = 1.4130e+03, x22 = 5.46860286310649596604e-04; - T value, factor, r, rc, rs; + bessel_j_scalar_type value, factor, r, rc, rs; if (x < 0) { @@ -156,25 +120,25 @@ T bessel_j0(T x) } if (x <= 4) // x in (0, 4] { - T y = x * x; - r = evaluate_rational(bessel_j0_P1, bessel_j0_Q1, y); + bessel_j_scalar_type y = x * x; + r = boost_evaluate_rational(bessel_j0_P1, bessel_j0_Q1, y); factor = (x + x1) * ((x - x11/256) - x12); value = factor * r; } else if (x <= 8.0) // x in (4, 8] { - T y = 1 - (x * x)/64; - r = evaluate_rational(bessel_j0_P2, bessel_j0_Q2, y); + bessel_j_scalar_type y = 1 - (x * x)/64; + r = boost_evaluate_rational(bessel_j0_P2, bessel_j0_Q2, y); factor = (x + x2) * ((x - x21/256) - x22); value = factor * r; } else // x in (8, \infty) { - T y = 8 / x; - T y2 = y * y; - T z = x - 0.25f * M_PI; - rc = evaluate_rational(bessel_j0_PC, bessel_j0_QC, y2); - rs = evaluate_rational(bessel_j0_PS, bessel_j0_QS, y2); + bessel_j_scalar_type y = 8 / x; + bessel_j_scalar_type y2 = y * y; + bessel_j_scalar_type z = x - 0.25f * M_PI; + rc = boost_evaluate_rational(bessel_j0_PC, bessel_j0_QC, y2); + rs = boost_evaluate_rational(bessel_j0_PS, bessel_j0_QS, y2); factor = sqrt(2 / (x * M_PI)); value = factor * (rc * cos(z) - y * rs * sin(z)); } @@ -186,7 +150,7 @@ T bessel_j0(T x) // {{{ bessel_j1 -__constant const T bessel_j1_P1[] = { +__constant const bessel_j_scalar_type bessel_j1_P1[] = { -1.4258509801366645672e+11, 6.6781041261492395835e+09, -1.1548696764841276794e+08, @@ -195,7 +159,7 @@ __constant const T bessel_j1_P1[] = { 1.0650724020080236441e+01, -1.0767857011487300348e-02 }; -__constant const T bessel_j1_Q1[] = { +__constant const bessel_j_scalar_type bessel_j1_Q1[] = { 4.1868604460820175290e+12, 4.2091902282580133541e+10, 2.0228375140097033958e+08, @@ -204,7 +168,7 @@ __constant const T bessel_j1_Q1[] = { 1.0, 0.0 }; -__constant const T bessel_j1_P2[] = { +__constant const bessel_j_scalar_type bessel_j1_P2[] = { -1.7527881995806511112e+16, 1.6608531731299018674e+15, -3.6658018905416665164e+13, @@ -214,7 +178,7 @@ __constant const T bessel_j1_P2[] = { -7.5023342220781607561e+03, 4.6179191852758252278e+00 }; -__constant const T bessel_j1_Q2[] = { +__constant const bessel_j_scalar_type bessel_j1_Q2[] = { 1.7253905888447681194e+18, 1.7128800897135812012e+16, 8.4899346165481429307e+13, @@ -224,7 +188,7 @@ __constant const T bessel_j1_Q2[] = { 1.3886978985861357615e+03, 1.0 }; -__constant const T bessel_j1_PC[] = { +__constant const bessel_j_scalar_type bessel_j1_PC[] = { -4.4357578167941278571e+06, -9.9422465050776411957e+06, -6.6033732483649391093e+06, @@ -233,7 +197,7 @@ __constant const T bessel_j1_PC[] = { -1.6116166443246101165e+03, 0.0 }; -__constant const T bessel_j1_QC[] = { +__constant const bessel_j_scalar_type bessel_j1_QC[] = { -4.4357578167941278568e+06, -9.9341243899345856590e+06, -6.5853394797230870728e+06, @@ -242,7 +206,7 @@ __constant const T bessel_j1_QC[] = { -1.4550094401904961825e+03, 1.0 }; -__constant const T bessel_j1_PS[] = { +__constant const bessel_j_scalar_type bessel_j1_PS[] = { 3.3220913409857223519e+04, 8.5145160675335701966e+04, 6.6178836581270835179e+04, @@ -251,7 +215,7 @@ __constant const T bessel_j1_PS[] = { 3.5265133846636032186e+01, 0.0 }; -__constant const T bessel_j1_QS[] = { +__constant const bessel_j_scalar_type bessel_j1_QS[] = { 7.0871281941028743574e+05, 1.8194580422439972989e+06, 1.4194606696037208929e+06, @@ -262,16 +226,16 @@ __constant const T bessel_j1_QS[] = { }; -T bessel_j1(T x) +bessel_j_scalar_type bessel_j1(bessel_j_scalar_type x) { - const T x1 = 3.8317059702075123156e+00, + const bessel_j_scalar_type x1 = 3.8317059702075123156e+00, x2 = 7.0155866698156187535e+00, x11 = 9.810e+02, x12 = -3.2527979248768438556e-04, x21 = 1.7960e+03, x22 = -3.8330184381246462950e-05; - T value, factor, r, rc, rs, w; + bessel_j_scalar_type value, factor, r, rc, rs, w; w = fabs(x); if (x == 0) @@ -280,25 +244,25 @@ T bessel_j1(T x) } if (w <= 4) // w in (0, 4] { - T y = x * x; - r = evaluate_rational(bessel_j1_P1, bessel_j1_Q1, y); + bessel_j_scalar_type y = x * x; + r = boost_evaluate_rational(bessel_j1_P1, bessel_j1_Q1, y); factor = w * (w + x1) * ((w - x11/256) - x12); value = factor * r; } else if (w <= 8) // w in (4, 8] { - T y = x * x; - r = evaluate_rational(bessel_j1_P2, bessel_j1_Q2, y); + bessel_j_scalar_type y = x * x; + r = boost_evaluate_rational(bessel_j1_P2, bessel_j1_Q2, y); factor = w * (w + x2) * ((w - x21/256) - x22); value = factor * r; } else // w in (8, \infty) { - T y = 8 / w; - T y2 = y * y; - T z = w - 0.75f * M_PI; - rc = evaluate_rational(bessel_j1_PC, bessel_j1_QC, y2); - rs = evaluate_rational(bessel_j1_PS, bessel_j1_QS, y2); + bessel_j_scalar_type y = 8 / w; + bessel_j_scalar_type y2 = y * y; + bessel_j_scalar_type z = w - 0.75f * M_PI; + rc = boost_evaluate_rational(bessel_j1_PC, bessel_j1_QC, y2); + rs = boost_evaluate_rational(bessel_j1_PS, bessel_j1_QS, y2); factor = sqrt(2 / (w * M_PI)); value = factor * (rc * cos(z) - y * rs * sin(z)); } diff --git a/src/cl/pyopencl-bessel-y.cl b/src/cl/pyopencl-bessel-y.cl new file mode 100644 index 00000000..51eabd94 --- /dev/null +++ b/src/cl/pyopencl-bessel-y.cl @@ -0,0 +1,435 @@ +// Pieced together from Boost C++ and Cephes by +// Andreas Kloeckner (C) 2012 +// +// Pieces from: +// +// Copyright (c) 2006 Xiaogang Zhang, John Maddock +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See +// http://www.boost.org/LICENSE_1_0.txt) +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// What you see here may be used freely, but it comes with no support or +// guarantee. + + + + + +#pragma once + +#include +#include + +typedef double bessel_y_scalar_type; + +// {{{ bessel_y0 + +__constant const bessel_y_scalar_type bessel_y0_P1[] = { + 1.0723538782003176831e+11, + -8.3716255451260504098e+09, + 2.0422274357376619816e+08, + -2.1287548474401797963e+06, + 1.0102532948020907590e+04, + -1.8402381979244993524e+01, +}; +__constant const bessel_y_scalar_type bessel_y0_Q1[] = { + 5.8873865738997033405e+11, + 8.1617187777290363573e+09, + 5.5662956624278251596e+07, + 2.3889393209447253406e+05, + 6.6475986689240190091e+02, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y0_P2[] = { + -2.2213976967566192242e+13, + -5.5107435206722644429e+11, + 4.3600098638603061642e+10, + -6.9590439394619619534e+08, + 4.6905288611678631510e+06, + -1.4566865832663635920e+04, + 1.7427031242901594547e+01, +}; +__constant const bessel_y_scalar_type bessel_y0_Q2[] = { + 4.3386146580707264428e+14, + 5.4266824419412347550e+12, + 3.4015103849971240096e+10, + 1.3960202770986831075e+08, + 4.0669982352539552018e+05, + 8.3030857612070288823e+02, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y0_P3[] = { + -8.0728726905150210443e+15, + 6.7016641869173237784e+14, + -1.2829912364088687306e+11, + -1.9363051266772083678e+11, + 2.1958827170518100757e+09, + -1.0085539923498211426e+07, + 2.1363534169313901632e+04, + -1.7439661319197499338e+01, +}; +__constant const bessel_y_scalar_type bessel_y0_Q3[] = { + 3.4563724628846457519e+17, + 3.9272425569640309819e+15, + 2.2598377924042897629e+13, + 8.6926121104209825246e+10, + 2.4727219475672302327e+08, + 5.3924739209768057030e+05, + 8.7903362168128450017e+02, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y0_PC[] = { + 2.2779090197304684302e+04, + 4.1345386639580765797e+04, + 2.1170523380864944322e+04, + 3.4806486443249270347e+03, + 1.5376201909008354296e+02, + 8.8961548424210455236e-01, +}; +__constant const bessel_y_scalar_type bessel_y0_QC[] = { + 2.2779090197304684318e+04, + 4.1370412495510416640e+04, + 2.1215350561880115730e+04, + 3.5028735138235608207e+03, + 1.5711159858080893649e+02, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y0_PS[] = { + -8.9226600200800094098e+01, + -1.8591953644342993800e+02, + -1.1183429920482737611e+02, + -2.2300261666214198472e+01, + -1.2441026745835638459e+00, + -8.8033303048680751817e-03, +}; +__constant const bessel_y_scalar_type bessel_y0_QS[] = { + 5.7105024128512061905e+03, + 1.1951131543434613647e+04, + 7.2642780169211018836e+03, + 1.4887231232283756582e+03, + 9.0593769594993125859e+01, + 1.0, +}; + +bessel_y_scalar_type bessel_y0(bessel_y_scalar_type x) +{ + const bessel_y_scalar_type + x1 = 8.9357696627916752158e-01, + x2 = 3.9576784193148578684e+00, + x3 = 7.0860510603017726976e+00, + x11 = 2.280e+02, + x12 = 2.9519662791675215849e-03, + x21 = 1.0130e+03, + x22 = 6.4716931485786837568e-04, + x31 = 1.8140e+03, + x32 = 1.1356030177269762362e-04; + + bessel_y_scalar_type value, factor, r, rc, rs; + + if (x < 0) + { + //return policies::raise_domain_error(function, + // "Got x = %1% but x must be non-negative, complex result not supported.", x, pol); + return nan((uint)22); + } + if (x == 0) + { + return -DBL_MAX; + } + if (x <= 3) // x in (0, 3] + { + bessel_y_scalar_type y = x * x; + bessel_y_scalar_type z = 2 * log(x/x1) * bessel_j0(x) / M_PI; + r = boost_evaluate_rational(bessel_y0_P1, bessel_y0_Q1, y); + factor = (x + x1) * ((x - x11/256) - x12); + value = z + factor * r; + } + else if (x <= 5.5f) // x in (3, 5.5] + { + bessel_y_scalar_type y = x * x; + bessel_y_scalar_type z = 2 * log(x/x2) * bessel_j0(x) / M_PI; + r = boost_evaluate_rational(bessel_y0_P2, bessel_y0_Q2, y); + factor = (x + x2) * ((x - x21/256) - x22); + value = z + factor * r; + } + else if (x <= 8) // x in (5.5, 8] + { + bessel_y_scalar_type y = x * x; + bessel_y_scalar_type z = 2 * log(x/x3) * bessel_j0(x) / M_PI; + r = boost_evaluate_rational(bessel_y0_P3, bessel_y0_Q3, y); + factor = (x + x3) * ((x - x31/256) - x32); + value = z + factor * r; + } + else // x in (8, \infty) + { + bessel_y_scalar_type y = 8 / x; + bessel_y_scalar_type y2 = y * y; + bessel_y_scalar_type z = x - 0.25f * M_PI; + rc = boost_evaluate_rational(bessel_y0_PC, bessel_y0_QC, y2); + rs = boost_evaluate_rational(bessel_y0_PS, bessel_y0_QS, y2); + factor = sqrt(2 / (x * M_PI)); + value = factor * (rc * sin(z) + y * rs * cos(z)); + } + + return value; +} + +// }}} + +// {{{ bessel_y1 + +__constant const bessel_y_scalar_type bessel_y1_P1[] = { + 4.0535726612579544093e+13, + 5.4708611716525426053e+12, + -3.7595974497819597599e+11, + 7.2144548214502560419e+09, + -5.9157479997408395984e+07, + 2.2157953222280260820e+05, + -3.1714424660046133456e+02, +}; +__constant const bessel_y_scalar_type bessel_y1_Q1[] = { + 3.0737873921079286084e+14, + 4.1272286200406461981e+12, + 2.7800352738690585613e+10, + 1.2250435122182963220e+08, + 3.8136470753052572164e+05, + 8.2079908168393867438e+02, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y1_P2[] = { + 1.1514276357909013326e+19, + -5.6808094574724204577e+18, + -2.3638408497043134724e+16, + 4.0686275289804744814e+15, + -5.9530713129741981618e+13, + 3.7453673962438488783e+11, + -1.1957961912070617006e+09, + 1.9153806858264202986e+06, + -1.2337180442012953128e+03, +}; +__constant const bessel_y_scalar_type bessel_y1_Q2[] = { + 5.3321844313316185697e+20, + 5.6968198822857178911e+18, + 3.0837179548112881950e+16, + 1.1187010065856971027e+14, + 3.0221766852960403645e+11, + 6.3550318087088919566e+08, + 1.0453748201934079734e+06, + 1.2855164849321609336e+03, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y1_PC[] = { + -4.4357578167941278571e+06, + -9.9422465050776411957e+06, + -6.6033732483649391093e+06, + -1.5235293511811373833e+06, + -1.0982405543459346727e+05, + -1.6116166443246101165e+03, + 0.0, +}; +__constant const bessel_y_scalar_type bessel_y1_QC[] = { + -4.4357578167941278568e+06, + -9.9341243899345856590e+06, + -6.5853394797230870728e+06, + -1.5118095066341608816e+06, + -1.0726385991103820119e+05, + -1.4550094401904961825e+03, + 1.0, +}; +__constant const bessel_y_scalar_type bessel_y1_PS[] = { + 3.3220913409857223519e+04, + 8.5145160675335701966e+04, + 6.6178836581270835179e+04, + 1.8494262873223866797e+04, + 1.7063754290207680021e+03, + 3.5265133846636032186e+01, + 0.0, +}; +__constant const bessel_y_scalar_type bessel_y1_QS[] = { + 7.0871281941028743574e+05, + 1.8194580422439972989e+06, + 1.4194606696037208929e+06, + 4.0029443582266975117e+05, + 3.7890229745772202641e+04, + 8.6383677696049909675e+02, + 1.0, +}; + +bessel_y_scalar_type bessel_y1(bessel_y_scalar_type x) +{ + const bessel_y_scalar_type + x1 = 2.1971413260310170351e+00, + x2 = 5.4296810407941351328e+00, + x11 = 5.620e+02, + x12 = 1.8288260310170351490e-03, + x21 = 1.3900e+03, + x22 = -6.4592058648672279948e-06 + ; + bessel_y_scalar_type value, factor, r, rc, rs; + + if (x <= 0) + { + // domain error + return nan((uint)22); + } + if (x <= 4) // x in (0, 4] + { + bessel_y_scalar_type y = x * x; + bessel_y_scalar_type z = 2 * log(x/x1) * bessel_j1(x) / M_PI; + r = boost_evaluate_rational(bessel_y1_P1, bessel_y1_Q1, y); + factor = (x + x1) * ((x - x11/256) - x12) / x; + value = z + factor * r; + } + else if (x <= 8) // x in (4, 8] + { + bessel_y_scalar_type y = x * x; + bessel_y_scalar_type z = 2 * log(x/x2) * bessel_j1(x) / M_PI; + r = boost_evaluate_rational(bessel_y1_P2, bessel_y1_Q2, y); + factor = (x + x2) * ((x - x21/256) - x22) / x; + value = z + factor * r; + } + else // x in (8, \infty) + { + bessel_y_scalar_type y = 8 / x; + bessel_y_scalar_type y2 = y * y; + bessel_y_scalar_type z = x - 0.75f * M_PI; + rc = boost_evaluate_rational(bessel_y1_PC, bessel_y1_QC, y2); + rs = boost_evaluate_rational(bessel_y1_PS, bessel_y1_QS, y2); + factor = sqrt(2 / (x * M_PI)); + value = factor * (rc * sin(z) + y * rs * cos(z)); + } + + return value; +} + +// }}} + +// {{{ bessel_yn + +bessel_y_scalar_type bessel_yn_small_z(int n, bessel_y_scalar_type z, bessel_y_scalar_type* scale) +{ + // + // See http://functions.wolfram.com/Bessel-TypeFunctions/BesselY/06/01/04/01/02/ + // + // Note that when called we assume that x < epsilon and n is a positive integer. + // + // BOOST_ASSERT(n >= 0); + // BOOST_ASSERT((z < policies::get_epsilon())); + + if(n == 0) + { + return (2 / M_PI) * (log(z / 2) + M_E); + } + else if(n == 1) + { + return (z / M_PI) * log(z / 2) + - 2 / (M_PI * z) + - (z / (2 * M_PI)) * (1 - 2 * M_E); + } + else if(n == 2) + { + return (z * z) / (4 * M_PI) * log(z / 2) + - (4 / (M_PI * z * z)) + - ((z * z) / (8 * M_PI)) * (3./2 - 2 * M_E); + } + else + { + bessel_y_scalar_type p = pow(z / 2, n); + bessel_y_scalar_type result = -((tgamma(n) / M_PI)); + if(p * DBL_MAX < result) + { + bessel_y_scalar_type div = DBL_MAX / 8; + result /= div; + *scale /= div; + if(p * DBL_MAX < result) + { + return -DBL_MAX; + } + } + return result / p; + } +} + + + + +bessel_y_scalar_type bessel_yn(int n, bessel_y_scalar_type x) +{ + //BOOST_MATH_STD_USING + bessel_y_scalar_type value, factor, current, prev; + + //using namespace boost::math::tools; + + if ((x == 0) && (n == 0)) + { + return -DBL_MAX; + } + if (x <= 0) + { + //return policies::raise_domain_error(function, + //"Got x = %1%, but x must be > 0, complex result not supported.", x, pol); + return nan((uint)22); + } + + // + // Reflection comes first: + // + if (n < 0) + { + factor = (n & 0x1) ? -1 : 1; // Y_{-n}(z) = (-1)^n Y_n(z) + n = -n; + } + else + { + factor = 1; + } + + if(x < DBL_EPSILON) + { + bessel_y_scalar_type scale = 1; + value = bessel_yn_small_z(n, x, &scale); + if(DBL_MAX * fabs(scale) < fabs(value)) + return copysign(1, scale) * copysign(1, value) * DBL_MAX; + value /= scale; + } + else if (n == 0) + { + value = bessel_y0(x); + } + else if (n == 1) + { + value = factor * bessel_y1(x); + } + else + { + prev = bessel_y0(x); + current = bessel_y1(x); + int k = 1; + // BOOST_ASSERT(k < n); + do + { + bessel_y_scalar_type fact = 2 * k / x; + if((DBL_MAX - fabs(prev)) / fact < fabs(current)) + { + prev /= current; + factor /= current; + current = 1; + } + value = fact * current - prev; + prev = current; + current = value; + ++k; + } + while(k < n); + if(fabs(DBL_MAX * factor) < fabs(value)) + return sign(value) * sign(value) * DBL_MAX; + value /= factor; + } + return value; +} + +// }}} + +// vim: fdm=marker diff --git a/src/cl/pyopencl-cephes.cl b/src/cl/pyopencl-cephes.cl deleted file mode 100644 index d0ac0605..00000000 --- a/src/cl/pyopencl-cephes.cl +++ /dev/null @@ -1,69 +0,0 @@ -// Ported from Cephes by -// Andreas Kloeckner (C) 2012 -// -// Cephes Math Library Release 2.8: June, 2000 -// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier -// What you see here may be used freely, but it comes with no support or -// guarantee. - -#pragma once - -// {{{ cephes_polevl - -/* - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - */ - -double cephes_polevl(double x, __constant const double *coef, int N) -{ - double ans; - int i; - __constant const double *p; - - p = coef; - ans = *p++; - i = N; - - do - ans = ans * x + *p++; - while( --i ); - - return( ans ); -} - -// }}} - -// {{{ cephes_p1evl - -double cephes_p1evl( double x, __constant const double *coef, int N ) -{ - double ans; - __constant const double *p; - int i; - - p = coef; - ans = x + *p++; - i = N-1; - - do - ans = ans * x + *p++; - while( --i ); - - return( ans ); -} diff --git a/src/cl/pyopencl-eval-tbl.cl b/src/cl/pyopencl-eval-tbl.cl new file mode 100644 index 00000000..c192fdcf --- /dev/null +++ b/src/cl/pyopencl-eval-tbl.cl @@ -0,0 +1,120 @@ +// Pieced together from Boost C++ and Cephes by +// Andreas Kloeckner (C) 2012 +// +// Pieces from: +// +// Copyright (c) 2006 Xiaogang Zhang, John Maddock +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See +// http://www.boost.org/LICENSE_1_0.txt) +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// What you see here may be used freely, but it comes with no support or +// guarantee. + +#pragma once + +typedef double special_func_scalar_type; + +// {{{ cephes_polevl + +/* + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + */ + +special_func_scalar_type cephes_polevl(special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N) +{ + special_func_scalar_type ans; + int i; + __constant const special_func_scalar_type *p; + + p = coef; + ans = *p++; + i = N; + + do + ans = ans * x + *p++; + while( --i ); + + return( ans ); +} + +// }}} + +// {{{ cephes_p1evl + +special_func_scalar_type cephes_p1evl( special_func_scalar_type x, __constant const special_func_scalar_type *coef, int N ) +{ + special_func_scalar_type ans; + __constant const special_func_scalar_type *p; + int i; + + p = coef; + ans = x + *p++; + i = N-1; + + do + ans = ans * x + *p++; + while( --i ); + + return( ans ); +} + +// }}} + +// {{{ boost_evaluate_rational + +special_func_scalar_type boost_evaluate_rational_backend(__constant const special_func_scalar_type* num, __constant const special_func_scalar_type* denom, special_func_scalar_type z, int count) +{ + special_func_scalar_type s1, s2; + if(z <= 1) + { + s1 = num[count-1]; + s2 = denom[count-1]; + for(int i = (int)count - 2; i >= 0; --i) + { + s1 *= z; + s2 *= z; + s1 += num[i]; + s2 += denom[i]; + } + } + else + { + z = 1 / z; + s1 = num[0]; + s2 = denom[0]; + for(unsigned i = 1; i < count; ++i) + { + s1 *= z; + s2 *= z; + s1 += num[i]; + s2 += denom[i]; + } + } + return s1 / s2; +} + +#define boost_evaluate_rational(num, denom, z) \ + boost_evaluate_rational_backend(num, denom, z, sizeof(num)/sizeof(special_func_scalar_type)) + +// }}} + +// vim: fdm=marker diff --git a/test/test_clmath.py b/test/test_clmath.py index ebb0d031..1cea469a 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -180,7 +180,7 @@ def test_frexp(ctx_factory): assert ex_true == exponents[i] @pytools.test.mark_test.opencl -def test_bessel_j(ctx_factory): +def test_bessel(ctx_factory): try: import scipy.special as spec except ImportError: @@ -198,7 +198,7 @@ def test_bessel_j(ctx_factory): nterms = 30 try: - from hellskitchen._native import jfuns2d + from hellskitchen._native import jfuns2d, hank103_vec except ImportError: use_hellskitchen = False else: @@ -209,46 +209,64 @@ def test_bessel_j(ctx_factory): else: a = np.logspace(-5, 5, 10**6) - if use_hellskitchen: - hellskitchen_result = np.empty((len(a), nterms), dtype=np.complex128) - for i, a_i in enumerate(a): - if i % 10000 == 0: - print("%.1f %%" % (100 * i/len(a))) - ier, fjs, _, _ = jfuns2d(nterms, a_i, 1, 0, 10000) - hellskitchen_result[i] = fjs[:nterms] - assert ier == 0 - - a_dev = cl_array.to_device(queue, a) - - for n in range(0, nterms): - cl_bessel = clmath.bessel_jn(n, a_dev).get() - scipy_bessel = spec.jn(n, a) - - error_scipy = np.max(np.abs(cl_bessel-scipy_bessel)) - assert error_scipy < 1e-10, error_scipy - - if use_hellskitchen: - hk_bessel = hellskitchen_result[:, n] - error_hk = np.max(np.abs(cl_bessel-hk_bessel)) - assert error_hk < 1e-10, error_hk - error_hk_scipy = np.max(np.abs(scipy_bessel-hk_bessel)) - print(n, error_scipy, error_hk, error_hk_scipy) + for which_func, cl_func, scipy_func, is_rel in [ + #("j", clmath.bessel_jn, spec.jn, False), + ("y", clmath.bessel_yn, spec.yn, True) + ]: + if is_rel: + def get_err(check, ref): + return np.max(np.abs(check-ref)) / np.max(np.abs(ref)) else: - print(n, error_scipy) + def get_err(check, ref): + return np.max(np.abs(check-ref)) - assert not np.isnan(cl_bessel).any() + if use_hellskitchen: + hellskitchen_result = np.empty((len(a), nterms), dtype=np.complex128) + if which_func == "j": + for i, a_i in enumerate(a): + if i % 10000 == 0: + print("%.1f %%" % (100 * i/len(a))) + ier, fjs, _, _ = jfuns2d(nterms, a_i, 1, 0, 10000) + hellskitchen_result[i] = fjs[:nterms] + assert ier == 0 + elif which_func == "y": + h0, h1 = hank103_vec(a, ifexpon=1) + hellskitchen_result[:, 0] = h0.imag + hellskitchen_result[:, 1] = h1.imag + + a_dev = cl_array.to_device(queue, a) + + for n in range(0, nterms): + cl_bessel = cl_func(n, a_dev).get() + scipy_bessel = scipy_func(n, a) + + error_scipy = get_err(cl_bessel, scipy_bessel) + assert error_scipy < 1e-10, error_scipy + + if use_hellskitchen and ( + which_func == "j" + or + (which_func == "y" and n in [0, 1])): + hk_bessel = hellskitchen_result[:, n] + error_hk = get_err(cl_bessel, hk_bessel) + assert error_hk < 1e-10, error_hk + error_hk_scipy = get_err(scipy_bessel, hk_bessel) + print(n, error_scipy, error_hk, error_hk_scipy) + else: + print(n, error_scipy) - if 0 and n == 15: - import matplotlib.pyplot as pt - #pt.plot(scipy_bessel) - #pt.plot(cl_bessel) + assert not np.isnan(cl_bessel).any() - pt.loglog(a, np.abs(cl_bessel-scipy_bessel), label="vs scipy") - if use_hellskitchen: - pt.loglog(a, np.abs(cl_bessel-hk_bessel), label="vs hellskitchen") - pt.legend() - pt.show() + if 0 and n == 15: + import matplotlib.pyplot as pt + #pt.plot(scipy_bessel) + #pt.plot(cl_bessel) + pt.loglog(a, np.abs(cl_bessel-scipy_bessel), label="vs scipy") + if use_hellskitchen: + pt.loglog(a, np.abs(cl_bessel-hk_bessel), label="vs hellskitchen") + pt.legend() + pt.show() -- GitLab