diff --git a/src/cl/pyopencl-airy.h b/src/cl/pyopencl-airy.h new file mode 100644 index 0000000000000000000000000000000000000000..f4a5fe67528bb10c45431f9dd9f18e8e3bafe336 --- /dev/null +++ b/src/cl/pyopencl-airy.h @@ -0,0 +1,324 @@ +// Ported from Cephes by +// Andreas Kloeckner (C) 20112 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// What you see here may be used freely, but it comes with no support or +// guarantee. + +#pragma once + +#include <pyopencl-cephes.h> + +__constant const double airy_maxairy = 103.892; + +__constant const double airy_sqrt3 = 1.732050807568877293527; +__constant const double airy_sqpii = 5.64189583547756286948E-1; + +__constant const double airy_c1 = 0.35502805388781723926; +__constant const double airy_c2 = 0.258819403792806798405; + +__constant const unsigned short AN[32] = { +0x3fd6,0x2dae,0x2537,0xb658, +0x4028,0x03e3,0x871a,0x9067, +0x4053,0x11e5,0x0de2,0xe1e3, +0x4065,0x02da,0xee40,0x073c, +0x4063,0xf834,0x5ba1,0xfddf, +0x4051,0xa24f,0x4f4c,0xea4f, +0x402c,0x0d8d,0x5c2a,0x0f4d, +0x3ff0,0x0000,0x0000,0x0000, +}; +__constant const unsigned short AD[32] = { +0x3fe2,0x29bc,0x0262,0x4d31, +0x402d,0x8334,0x0533,0x2ca5, +0x4055,0x20e3,0xb04d,0x51a0, +0x4066,0x2a2d,0xc730,0xb7b0, +0x4064,0x8782,0x9a9f,0xfa61, +0x4051,0xde94,0xee91,0xd35f, +0x402c,0x311b,0x950d,0x9d81, +0x3ff0,0x0000,0x0000,0x0000, +}; + +__constant const unsigned short APN[32] = { +0x3fe3,0xa3ea,0x4d4c,0xab3e, +0x402d,0x7dad,0xdc67,0x2bcf, +0x4054,0x83bd,0x0724,0xa9a6, +0x4065,0x65e9,0xba99,0xc9ba, +0x4063,0xea2b,0xcdc2,0x64d7, +0x4051,0x7e95,0x41d4,0x1646, +0x402b,0xe4e8,0x6aa7,0x4099, +0x3ff0,0x0000,0x0000,0x0000, +}; +__constant const unsigned short APD[32] = { +0x3fd5,0x6397,0xd288,0xd5b3, +0x4026,0x5caf,0xedc9,0x327e, +0x4051,0xcb0e,0x1800,0x97e6, +0x4063,0xd8e6,0x1132,0xdbd1, +0x4063,0x269b,0x0dcb,0x3316, +0x4051,0x2b36,0xf9d0,0xf72f, +0x402b,0xb321,0x4e35,0x7982, +0x3ff0,0x0000,0x0000,0x0000, +}; + +__constant const unsigned short BN16[20] = { +0xbfd0,0x3518,0xe211,0x6751, +0x3fe2,0x68bc,0x7072,0x2383, +0xbfd5,0x1d32,0x6785,0xcf29, +0x3fb0,0x7f2a,0xa027,0x78a8, +0xbf6f,0x5604,0x2dba,0xcd1b, +}; +__constant const unsigned short BD16[20] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0xc01c,0xa09d,0x891b,0xab58, +0x4025,0x3539,0xfe0b,0x1101, +0xc014,0xee0b,0xa9a7,0x70e8, +0x3fee,0xa2fc,0xa6da,0x95ff, +0xbfac,0x33d0,0x8f8e,0x86c9, +}; + +__constant const unsigned short BPPN[20] = { +0x3fdd,0xca1d,0x9deb,0x377b, +0xbff1,0x7051,0xc6be,0xe420, +0x3fe4,0x710c,0xf199,0x5ff3, +0xbfc0,0x3c6f,0x8681,0xa8fa, +0x3f7f,0x3b43,0xb8ce,0xb896, +}; +__constant const unsigned short BPPD[20] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0xc021,0x6996,0xb340,0xbc45, +0x402b,0xcc73,0x2ea4,0xbb8b, +0xc01c,0x908c,0xa04a,0xed59, +0x3ff5,0x70fd,0xf9a5,0x70a9, +0xbfb4,0x13d0,0x1b60,0x52e8, +}; + +__constant const unsigned short AFN[36] = { +0xbfc0,0xdb6c,0xd50a,0xe6fb, +0xbfe4,0x0bee,0x9856,0x6852, +0xbfe6,0x2e59,0xc2f7,0x9f7d, +0xbfd1,0xe7ea,0x4bb3,0xf40b, +0xbfa9,0x2f6e,0xf47d,0xbd8a, +0xbf70,0xa401,0xc8d9,0xe090, +0xbf24,0xe06e,0xaf4b,0x009c, +0xbec7,0x4a78,0x1d42,0x366d, +0xbe52,0x041c,0xf68e,0xa2d2, +}; +__constant const unsigned short AFD[36] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0x402a,0xb64b,0x2572,0xedf2, +0x4040,0x575c,0x4478,0x7b1a, +0x403a,0xbc98,0xa3b7,0x3410, +0x4022,0x5fc8,0x2ac9,0x9873, +0x3ff7,0x9acb,0x39de,0x9319, +0x3fbd,0x9dac,0xb404,0x5a2b, +0x3f72,0x08ca,0xe03a,0xf617, +0x3f13,0xc8d7,0xaf76,0xe73b, +0x3e9e,0x52b9,0xb995,0x18a7, +}; + +__constant const unsigned short AGN[44] = { +0x3f94,0x3525,0xddcf,0xbbde, +0x3fd9,0x07d5,0x0064,0x37b7, +0x3ff1,0x0d83,0x3a20,0x34eb, +0x3fee,0x0dac,0xa0ef,0x1acb, +0x3fd6,0x7e69,0xcea8,0xfe1d, +0x3fb0,0x3a41,0x21e9,0x0978, +0x3f77,0xfe99,0xf12f,0x5043, +0x3f32,0x8976,0x600e,0x17a2, +0x3edd,0x4f3d,0x69f8,0x574e, +0x3e75,0xca92,0xbbad,0x11c8, +0x3df7,0x78a4,0x7d97,0xee7a, +}; +__constant const unsigned short AGD[40] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0x4022,0x9e2b,0xf3d5,0x6b40, +0x4033,0xd5d5,0xc0ef,0x18d4, +0x402f,0x211b,0x7ea7,0xdc35, +0x4015,0xe84e,0x2b79,0xdbce, +0x3fee,0x8992,0xc195,0xece3, +0x3fb6,0x221d,0xed64,0xa9ee, +0x3f70,0xe704,0x6be3,0x93bb, +0x3f1a,0x8b61,0xd603,0xa5a0, +0x3eb3,0xa845,0xdb07,0x24e8, +0x3e35,0x1fc7,0x3dd5,0x89d4, +}; + +__constant const unsigned short APFN[36] = { +0x3fc7,0xba0f,0x8e7d,0x5db5, +0x3fec,0x5ff2,0x3d14,0xd07e, +0x3fef,0x98b7,0x11be,0x01af, +0x3fd9,0xadef,0x1397,0x84a1, +0x3fb2,0x2f0d,0xeadc,0x33d1, +0x3f78,0x3115,0xe347,0xa140, +0x3f2e,0x8be8,0x5d03,0x8059, +0x3ed1,0x2495,0x9f80,0x12af, +0x3e5a,0xab6a,0x654d,0x7d86, +}; +__constant const unsigned short APFD[36] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0x402d,0x781b,0x9628,0xcc60, +0x4042,0xc56d,0x2524,0x0e31, +0x403f,0x773d,0x09cc,0xffb8, +0x4025,0xfe6b,0x5163,0x03f7, +0x3ffc,0x9f21,0xc07a,0xc9fd, +0x3fc2,0x2450,0xe40e,0xf796, +0x3f76,0x48f2,0x3a5a,0x351a, +0x3f18,0xa059,0x7cfb,0x63a1, +0x3ea2,0xfdb8,0x5a24,0x1e2e, +}; + +__constant const unsigned short APGN[44] = { +0xbfa2,0x351f,0x5f87,0xaf5b, +0xbfe4,0x64db,0x1ff7,0x5c76, +0xbffb,0x564a,0xc221,0x7e49, +0xbff8,0x0916,0x7f6e,0x0b07, +0xbfe2,0x0910,0xd8b0,0x6edb, +0xbfba,0x234b,0x0d8c,0x9903, +0xbf83,0x6c54,0x7f6c,0x50df, +0xbf3e,0x2afa,0x2424,0x2ad0, +0xbee7,0xf87a,0xbc17,0xf631, +0xbe81,0xe81f,0x501e,0x6c10, +0xbe03,0x5f45,0x5e46,0x870d, +}; +__constant const unsigned short APGD[40] = { +/*0x3ff0,0x0000,0x0000,0x0000,*/ +0x4023,0xb7a2,0x060a,0x9812, +0x4035,0xa3e3,0x4724,0xfc96, +0x4031,0x5025,0xdb2c,0x819a, +0x4018,0xb702,0xd5cd,0x94e2, +0x3ff1,0x6a71,0x4927,0x1eb1, +0x3fb9,0x78de,0x4ad7,0x7bc5, +0x3f73,0x991a,0x4b2b,0xc1d7, +0x3f1e,0xf98f,0x0b16,0xbe1c, +0x3eb7,0x10bf,0xfdde,0x4ef3, +0x3e38,0xe834,0x9dc8,0x647e, +}; + +int airy( double x, double *ai, double *aip, double *bi, double *bip ) +{ + typedef __constant const double *data_t; + double z, zz, t, f, g, uf, ug, k, zeta, theta; + int domflg; + + domflg = 0; + if( x > airy_maxairy ) + { + *ai = 0; + *aip = 0; + *bi = DBL_MAX; + *bip = DBL_MAX; + return(-1); + } + + if( x < -2.09 ) + { + domflg = 15; + t = sqrt(-x); + zeta = -2.0 * x * t / 3.0; + t = sqrt(t); + k = airy_sqpii / t; + z = 1.0/zeta; + zz = z * z; + uf = 1.0 + zz * cephes_polevl( zz, (data_t) AFN, 8 ) / cephes_p1evl( zz, (data_t) AFD, 9 ); + ug = z * cephes_polevl( zz, (data_t) AGN, 10 ) / cephes_p1evl( zz, (data_t) AGD, 10 ); + theta = zeta + 0.25 * M_PI; + f = sin( theta ); + g = cos( theta ); + *ai = k * (f * uf - g * ug); + *bi = k * (g * uf + f * ug); + uf = 1.0 + zz * cephes_polevl( zz, (data_t) APFN, 8 ) / cephes_p1evl( zz, (data_t) APFD, 9 ); + ug = z * cephes_polevl( zz, (data_t) APGN, 10 ) / cephes_p1evl( zz, (data_t) APGD, 10 ); + k = airy_sqpii * t; + *aip = -k * (g * uf + f * ug); + *bip = k * (f * uf - g * ug); + return(0); + } + + if( x >= 2.09 ) /* cbrt(9) */ + { + domflg = 5; + t = sqrt(x); + zeta = 2.0 * x * t / 3.0; + g = exp( zeta ); + t = sqrt(t); + k = 2.0 * t * g; + z = 1.0/zeta; + f = cephes_polevl( z, (data_t) AN, 7 ) / cephes_polevl( z, (data_t) AD, 7 ); + *ai = airy_sqpii * f / k; + k = -0.5 * airy_sqpii * t / g; + f = cephes_polevl( z, (data_t) APN, 7 ) / cephes_polevl( z, (data_t) APD, 7 ); + *aip = f * k; + + if( x > 8.3203353 ) /* zeta > 16 */ + { + f = z * cephes_polevl( z, (data_t) BN16, 4 ) / cephes_p1evl( z, (data_t) BD16, 5 ); + k = airy_sqpii * g; + *bi = k * (1.0 + f) / t; + f = z * cephes_polevl( z, (data_t) BPPN, 4 ) / cephes_p1evl( z, (data_t) BPPD, 5 ); + *bip = k * t * (1.0 + f); + return(0); + } + } + + f = 1.0; + g = x; + t = 1.0; + uf = 1.0; + ug = x; + k = 1.0; + z = x * x * x; + while( t > DBL_EPSILON ) + { + uf *= z; + k += 1.0; + uf /=k; + ug *= z; + k += 1.0; + ug /=k; + uf /=k; + f += uf; + k += 1.0; + ug /=k; + g += ug; + t = fabs(uf/f); + } + uf = airy_c1 * f; + ug = airy_c2 * g; + if( (domflg & 1) == 0 ) + *ai = uf - ug; + if( (domflg & 2) == 0 ) + *bi = airy_sqrt3 * (uf + ug); + + /* the deriviative of ai */ + k = 4.0; + uf = x * x/2.0; + ug = z/3.0; + f = uf; + g = 1.0 + ug; + uf /= 3.0; + t = 1.0; + + while( t > DBL_EPSILON ) + { + uf *= z; + ug /=k; + k += 1.0; + ug *= z; + uf /=k; + f += uf; + k += 1.0; + ug /=k; + uf /=k; + g += ug; + k += 1.0; + t = fabs(ug/g); + } + + uf = airy_c1 * f; + ug = airy_c2 * g; + if( (domflg & 4) == 0 ) + *aip = uf - ug; + if( (domflg & 8) == 0 ) + *bip = airy_sqrt3 * (uf + ug); + return(0); +} diff --git a/src/cl/pyopencl-bessel-j.h b/src/cl/pyopencl-bessel-j.h index 0991c33f5ebb457e2b0386eee1a52e71215b5afe..6a076c494d1bdd64cfbb1619fc95dea58435c116 100644 --- a/src/cl/pyopencl-bessel-j.h +++ b/src/cl/pyopencl-bessel-j.h @@ -1,13 +1,28 @@ +// Pieced together from Boost C++ and Cephes by +// Andreas Kloeckner (C) 20112 +// +// Pieces from: +// // Copyright (c) 2006 Xiaogang Zhang, John Maddock // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See // http://www.boost.org/LICENSE_1_0.txt) +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// What you see here may be used freely, but it comes with no support or +// guarantee. + + + + #pragma once -typedef double T; +#include <pyopencl-cephes.h> +#include <pyopencl-airy.h> -#define MAX_SERIES_ITERATIONS 10000 +typedef double T; // {{{ evaluate_rational @@ -47,7 +62,7 @@ T evaluate_rational_backend(__constant const T* num, __constant const T* denom, #define evaluate_rational(num, denom, z) \ evaluate_rational_backend(num, denom, z, sizeof(num)/sizeof(T)) -// {{{ j0 +// {{{ bessel_j0 __constant const T bessel_j0_P1[] = { -4.1298668500990866786e+11, @@ -169,7 +184,7 @@ T bessel_j0(T x) // }}} -// {{{ j1 +// {{{ bessel_j1 __constant const T bessel_j1_P1[] = { -1.4258509801366645672e+11, @@ -297,278 +312,807 @@ T bessel_j1(T x) // }}} -// {{{ asymptotic_bessel_y_large_x_2 +// {{{ bessel_recur -inline T asymptotic_bessel_j_limit(const T v) -{ - // double case: - T v2 = max((T) 3, v * v); - return v2 * 33 /*73*/; -} +/* Reduce the order by backward recurrence. + * AMS55 #9.1.27 and 9.1.73. + */ -inline T asymptotic_bessel_amplitude(T v, T x) -{ - // Calculate the amplitude of J(v, x) and Y(v, x) for large - // x: see A&S 9.2.28. - T s = 1; - T mu = 4 * v * v; - T txq = 2 * x; - txq *= txq; - - s += (mu - 1) / (2 * txq); - s += 3 * (mu - 1) * (mu - 9) / (txq * txq * 8); - s += 15 * (mu - 1) * (mu - 9) * (mu - 25) / (txq * txq * txq * 8 * 6); - - return sqrt(s * 2 / (M_PI * x)); -} +#define BESSEL_BIG 1.44115188075855872E+17 -T asymptotic_bessel_phase_mx(T v, T x) +double bessel_recur(double *n, double x, double *newn, int cancel ) { - // - // Calculate the phase of J(v, x) and Y(v, x) for large x. - // See A&S 9.2.29. - // Note that the result returned is the phase less x. - // - T mu = 4 * v * v; - T denom = 4 * x; - T denom_mult = denom * denom; - - T s = -M_PI * (v / 2 + 0.25f); - s += (mu - 1) / (2 * denom); - denom *= denom_mult; - s += (mu - 1) * (mu - 25) / (6 * denom); - denom *= denom_mult; - s += (mu - 1) * (mu * mu - 114 * mu + 1073) / (5 * denom); - denom *= denom_mult; - s += (mu - 1) * (5 * mu * mu * mu - 1535 * mu * mu + 54703 * mu - 375733) / (14 * denom); - return s; -} + double pkm2, pkm1, pk, qkm2, qkm1; + /* double pkp1; */ + double k, ans, qk, xk, yk, r, t, kf; + const double big = BESSEL_BIG; + int nflag, ctr; + /* continued fraction for Jn(x)/Jn-1(x) */ + if( *n < 0.0 ) + nflag = 1; + else + nflag = 0; -// }}} +fstart: -// {{{ CF1_jy +#if DEBUG + printf( "recur: n = %.6e, newn = %.6e, cfrac = ", *n, *newn ); +#endif -// Evaluate continued fraction fv = J_(v+1) / J_v, see -// Abramowitz and Stegun, Handbook of Mathematical Functions, 1972, 9.1.73 -int CF1_jy(T v, T x, T* fv, int* sign) -{ - T C, D, f, a, b, delta, tiny, tolerance; - unsigned long k; - int s = 1; - - // |x| <= |v|, CF1_jy converges rapidly - // |x| > |v|, CF1_jy needs O(|x|) iterations to converge - - // modified Lentz's method, see - // Lentz, Applied Optics, vol 15, 668 (1976) - tolerance = 2 * DBL_EPSILON; - tiny = sqrt(DBL_MIN); - C = f = tiny; // b0 = 0, replace with tiny - D = 0; - for (k = 1; k < MAX_SERIES_ITERATIONS * 100; k++) - { - a = -1; - b = 2 * (v + k) / x; - C = b + a / C; - D = b + a * D; - if (C == 0) { C = tiny; } - if (D == 0) { D = tiny; } - D = 1 / D; - delta = C * D; - f *= delta; - if (D < 0) { s = -s; } - if (fabs(delta - 1) < tolerance) - { break; } - } - - // policies::check_series_iterations<T>("boost::math::bessel_jy<%1%>(%1%,%1%) in CF1_jy", k / 100, pol); - *fv = -f; - *sign = s; // sign of denominator - - return 0; + pkm2 = 0.0; + qkm2 = 1.0; + pkm1 = x; + qkm1 = *n + *n; + xk = -x * x; + yk = qkm1; + ans = 1.0; + ctr = 0; + do + { + yk += 2.0; + pk = pkm1 * yk + pkm2 * xk; + qk = qkm1 * yk + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + if( qk != 0 ) + r = pk/qk; + else + r = 0.0; + if( r != 0 ) + { + t = fabs( (ans - r)/r ); + ans = r; + } + else + t = 1.0; + + if( ++ctr > 1000 ) + { + //mtherr( "jv", UNDERFLOW ); + pk = nan(24); + + goto done; + } + if( t < DBL_EPSILON ) + goto done; + + if( fabs(pk) > big ) + { + pkm2 /= big; + pkm1 /= big; + qkm2 /= big; + qkm1 /= big; + } + } + while( t > DBL_EPSILON ); + +done: + +#if DEBUG + printf( "%.6e\n", ans ); +#endif + + /* Change n to n-1 if n < 0 and the continued fraction is small + */ + if( nflag > 0 ) + { + if( fabs(ans) < 0.125 ) + { + nflag = -1; + *n = *n - 1.0; + goto fstart; + } + } + + + kf = *newn; + + /* backward recurrence + * 2k + * J (x) = --- J (x) - J (x) + * k-1 x k k+1 + */ + + pk = 1.0; + pkm1 = 1.0/ans; + k = *n - 1.0; + r = 2 * k; + do + { + pkm2 = (pkm1 * r - pk * x) / x; + /* pkp1 = pk; */ + pk = pkm1; + pkm1 = pkm2; + r -= 2.0; + /* + t = fabs(pkp1) + fabs(pk); + if( (k > (kf + 2.5)) && (fabs(pkm1) < 0.25*t) ) + { + k -= 1.0; + t = x*x; + pkm2 = ( (r*(r+2.0)-t)*pk - r*x*pkp1 )/t; + pkp1 = pk; + pk = pkm1; + pkm1 = pkm2; + r -= 2.0; + } + */ + k -= 1.0; + } + while( k > (kf + 0.5) ); + + /* Take the larger of the last two iterates + * on the theory that it may have less cancellation error. + */ + + if( cancel ) + { + if( (kf >= 0.0) && (fabs(pk) > fabs(pkm1)) ) + { + k += 1.0; + pkm2 = pk; + } + } + *newn = k; +#if DEBUG + printf( "newn %.6e rans %.6e\n", k, pkm2 ); +#endif + return( pkm2 ); } // }}} -// {{{ bessel_j_small_z_series +// {{{ bessel_jvs -typedef struct -{ - unsigned N; - T v; - T mult; - T term; -} bessel_j_small_z_series_term; +#define BESSEL_MAXGAM 171.624376956302725 +#define BESSEL_MAXLOG 7.09782712893383996843E2 + +/* Ascending power series for Jv(x). + * AMS55 #9.1.10. + */ -void bessel_j_small_z_series_term_init(bessel_j_small_z_series_term *self, T v_, T x) +double bessel_jvs(double n, double x) { - self->N = 0; - self->v = v_; - self->mult = x / 2; - self->mult *= -self->mult; - self->term = 1; + double t, u, y, z, k; + int ex; + int sgngam; + + z = -x * x / 4.0; + u = 1.0; + y = u; + k = 1.0; + t = 1.0; + + while( t > DBL_EPSILON ) + { + u *= z / (k * (n+k)); + y += u; + k += 1.0; + if( y != 0 ) + t = fabs( u/y ); + } +#if DEBUG + printf( "power series=%.5e ", y ); +#endif + t = frexp( 0.5*x, &ex ); + ex = ex * n; + if( (ex > -1023) + && (ex < 1023) + && (n > 0.0) + && (n < (BESSEL_MAXGAM-1.0)) ) + { + t = pow( 0.5*x, n ) / tgamma( n + 1.0 ); +#if DEBUG + printf( "pow(.5*x, %.4e)/gamma(n+1)=%.5e\n", n, t ); +#endif + y *= t; + } + else + { +#if DEBUG + z = n * log(0.5*x); + k = lgamma( n+1.0 ); + t = z - k; + printf( "log pow=%.5e, lgam(%.4e)=%.5e\n", z, n+1.0, k ); +#else + t = n * log(0.5*x) - lgamma(n + 1.0); +#endif + if( y < 0 ) + { + sgngam = -sgngam; + y = -y; + } + t += log(y); +#if DEBUG + printf( "log y=%.5e\n", log(y) ); +#endif + if( t < -BESSEL_MAXLOG ) + { + return( 0.0 ); + } + if( t > BESSEL_MAXLOG ) + { + // mtherr( "Jv", OVERFLOW ); + return( DBL_MAX); + } + y = sgngam * exp( t ); + } + return(y); } -T bessel_j_small_z_series_term_do(bessel_j_small_z_series_term *self) +// }}} + +// {{{ bessel_jnt + +__constant const double bessel_jnt_PF2[] = { + -9.0000000000000000000e-2, + 8.5714285714285714286e-2 +}; +__constant const double bessel_jnt_PF3[] = { + 1.3671428571428571429e-1, + -5.4920634920634920635e-2, + -4.4444444444444444444e-3 +}; +__constant const double bessel_jnt_PF4[] = { + 1.3500000000000000000e-3, + -1.6036054421768707483e-1, + 4.2590187590187590188e-2, + 2.7330447330447330447e-3 +}; +__constant const double bessel_jnt_PG1[] = { + -2.4285714285714285714e-1, + 1.4285714285714285714e-2 +}; +__constant const double bessel_jnt_PG2[] = { + -9.0000000000000000000e-3, + 1.9396825396825396825e-1, + -1.1746031746031746032e-2 +}; +__constant const double bessel_jnt_PG3[] = { + 1.9607142857142857143e-2, + -1.5983694083694083694e-1, + 6.3838383838383838384e-3 +}; + +double bessel_jnt(double n, double x) { - T r = self->term; - ++self->N; - self->term *= self->mult / (self->N * (self->N + self->v)); - return r; + double z, zz, z3; + double cbn, n23, cbtwo; + double ai, aip, bi, bip; /* Airy functions */ + double nk, fk, gk, pp, qq; + double F[5], G[4]; + int k; + + cbn = cbrt(n); + z = (x - n)/cbn; + cbtwo = cbrt( 2.0 ); + + /* Airy function */ + zz = -cbtwo * z; + airy( zz, &ai, &aip, &bi, &bip ); + + /* polynomials in expansion */ + zz = z * z; + z3 = zz * z; + F[0] = 1.0; + F[1] = -z/5.0; + F[2] = cephes_polevl( z3, bessel_jnt_PF2, 1 ) * zz; + F[3] = cephes_polevl( z3, bessel_jnt_PF3, 2 ); + F[4] = cephes_polevl( z3, bessel_jnt_PF4, 3 ) * z; + G[0] = 0.3 * zz; + G[1] = cephes_polevl( z3, bessel_jnt_PG1, 1 ); + G[2] = cephes_polevl( z3, bessel_jnt_PG2, 2 ) * z; + G[3] = cephes_polevl( z3, bessel_jnt_PG3, 2 ) * zz; +#if DEBUG + for( k=0; k<=4; k++ ) + printf( "F[%d] = %.5E\n", k, F[k] ); + for( k=0; k<=3; k++ ) + printf( "G[%d] = %.5E\n", k, G[k] ); +#endif + pp = 0.0; + qq = 0.0; + nk = 1.0; + n23 = cbrt( n * n ); + + for( k=0; k<=4; k++ ) + { + fk = F[k]*nk; + pp += fk; + if( k != 4 ) + { + gk = G[k]*nk; + qq += gk; + } +#if DEBUG + printf("fk[%d] %.5E, gk[%d] %.5E\n", k, fk, k, gk ); +#endif + nk /= n23; + } + + fk = cbtwo * ai * pp/cbn + cbrt(4.0) * aip * qq/n; + return(fk); } +// }}} -// -// Series evaluation for BesselJ(v, z) as z -> 0. -// See http://functions.wolfram.com/Bessel-TypeFunctions/BesselJ/06/01/04/01/01/0003/ -// Converges rapidly for all z << v. -// -inline T bessel_j_small_z_series(T v, T x) +// {{{ bessel_jnx + +__constant const double bessel_jnx_lambda[] = { + 1.0, + 1.041666666666666666666667E-1, + 8.355034722222222222222222E-2, + 1.282265745563271604938272E-1, + 2.918490264641404642489712E-1, + 8.816272674437576524187671E-1, + 3.321408281862767544702647E+0, + 1.499576298686255465867237E+1, + 7.892301301158651813848139E+1, + 4.744515388682643231611949E+2, + 3.207490090890661934704328E+3 +}; +__constant const double bessel_jnx_mu[] = { + 1.0, + -1.458333333333333333333333E-1, + -9.874131944444444444444444E-2, + -1.433120539158950617283951E-1, + -3.172272026784135480967078E-1, + -9.424291479571202491373028E-1, + -3.511203040826354261542798E+0, + -1.572726362036804512982712E+1, + -8.228143909718594444224656E+1, + -4.923553705236705240352022E+2, + -3.316218568547972508762102E+3 +}; +__constant const double bessel_jnx_P1[] = { + -2.083333333333333333333333E-1, + 1.250000000000000000000000E-1 +}; +__constant const double bessel_jnx_P2[] = { + 3.342013888888888888888889E-1, + -4.010416666666666666666667E-1, + 7.031250000000000000000000E-2 +}; +__constant const double bessel_jnx_P3[] = { + -1.025812596450617283950617E+0, + 1.846462673611111111111111E+0, + -8.912109375000000000000000E-1, + 7.324218750000000000000000E-2 +}; +__constant const double bessel_jnx_P4[] = { + 4.669584423426247427983539E+0, + -1.120700261622299382716049E+1, + 8.789123535156250000000000E+0, + -2.364086914062500000000000E+0, + 1.121520996093750000000000E-1 +}; +__constant const double bessel_jnx_P5[] = { + -2.8212072558200244877E1, + 8.4636217674600734632E1, + -9.1818241543240017361E1, + 4.2534998745388454861E1, + -7.3687943594796316964E0, + 2.27108001708984375E-1 +}; +__constant const double bessel_jnx_P6[] = { + 2.1257013003921712286E2, + -7.6525246814118164230E2, + 1.0599904525279998779E3, + -6.9957962737613254123E2, + 2.1819051174421159048E2, + -2.6491430486951555525E1, + 5.7250142097473144531E-1 +}; +__constant const double bessel_jnx_P7[] = { + -1.9194576623184069963E3, + 8.0617221817373093845E3, + -1.3586550006434137439E4, + 1.1655393336864533248E4, + -5.3056469786134031084E3, + 1.2009029132163524628E3, + -1.0809091978839465550E2, + 1.7277275025844573975E0 +}; + +double bessel_jnx(double n, double x) { - T prefix; + double zeta, sqz, zz, zp, np; + double cbn, n23, t, z, sz; + double pp, qq, z32i, zzi; + double ak, bk, akl, bkl; + int sign, doa, dob, nflg, k, s, tk, tkp1, m; + double u[8]; + double ai, aip, bi, bip; - int const max_factorial = 170; // long double value from boost + /* Test for x very close to n. + * Use expansion for transition region if so. + */ + cbn = cbrt(n); + z = (x - n)/cbn; + if( fabs(z) <= 0.7 ) + return( bessel_jnt(n,x) ); - if(v < max_factorial) - { - prefix = pow(x / 2, v) / tgamma(v+1); - } - else - { - prefix = v * log(x / 2) - lgamma(v+1); - prefix = exp(prefix); - } - if(0 == prefix) - return prefix; + z = x/n; + zz = 1.0 - z*z; + if( zz == 0.0 ) + return(0.0); - bessel_j_small_z_series_term s; - bessel_j_small_z_series_term_init(&s, v, x); + if( zz > 0.0 ) + { + sz = sqrt( zz ); + t = 1.5 * (log( (1.0+sz)/z ) - sz ); /* zeta ** 3/2 */ + zeta = cbrt( t * t ); + nflg = 1; + } + else + { + sz = sqrt(-zz); + t = 1.5 * (sz - acos(1.0/z)); + zeta = -cbrt( t * t ); + nflg = -1; + } + z32i = fabs(1.0/t); + sqz = cbrt(t); - T factor = DBL_EPSILON; - int counter = MAX_SERIES_ITERATIONS; + /* Airy function */ + n23 = cbrt( n * n ); + t = n23 * zeta; - T result = 0; - T next_term; - do - { - next_term = bessel_j_small_z_series_term_do(&s); - result += next_term; - } - while((fabs(factor * result) < fabs(next_term)) && --counter); +#if DEBUG + printf("zeta %.5E, Airy(%.5E)\n", zeta, t ); +#endif + airy( t, &ai, &aip, &bi, &bip ); - // policies::check_series_iterations<T>("boost::math::bessel_j_small_z_series<%1%>(%1%,%1%)", max_iter, pol); + /* polynomials in expansion */ + u[0] = 1.0; + zzi = 1.0/zz; + u[1] = cephes_polevl( zzi, bessel_jnx_P1, 1 )/sz; + u[2] = cephes_polevl( zzi, bessel_jnx_P2, 2 )/zz; + u[3] = cephes_polevl( zzi, bessel_jnx_P3, 3 )/(sz*zz); + pp = zz*zz; + u[4] = cephes_polevl( zzi, bessel_jnx_P4, 4 )/pp; + u[5] = cephes_polevl( zzi, bessel_jnx_P5, 5 )/(pp*sz); + pp *= zz; + u[6] = cephes_polevl( zzi, bessel_jnx_P6, 6 )/pp; + u[7] = cephes_polevl( zzi, bessel_jnx_P7, 7 )/(pp*sz); + +#if DEBUG + for( k=0; k<=7; k++ ) + printf( "u[%d] = %.5E\n", k, u[k] ); +#endif + + pp = 0.0; + qq = 0.0; + np = 1.0; + /* flags to stop when terms get larger */ + doa = 1; + dob = 1; + akl = DBL_MAX; + bkl = DBL_MAX; + + for( k=0; k<=3; k++ ) + { + tk = 2 * k; + tkp1 = tk + 1; + zp = 1.0; + ak = 0.0; + bk = 0.0; + for( s=0; s<=tk; s++ ) + { + if( doa ) + { + if( (s & 3) > 1 ) + sign = nflg; + else + sign = 1; + ak += sign * bessel_jnx_mu[s] * zp * u[tk-s]; + } + + if( dob ) + { + m = tkp1 - s; + if( ((m+1) & 3) > 1 ) + sign = nflg; + else + sign = 1; + bk += sign * bessel_jnx_lambda[s] * zp * u[m]; + } + zp *= z32i; + } - return prefix * result; + if( doa ) + { + ak *= np; + t = fabs(ak); + if( t < akl ) + { + akl = t; + pp += ak; + } + else + doa = 0; + } + + if( dob ) + { + bk += bessel_jnx_lambda[tkp1] * zp * u[0]; + bk *= -np/sqz; + t = fabs(bk); + if( t < bkl ) + { + bkl = t; + qq += bk; + } + else + dob = 0; + } +#if DEBUG + printf("a[%d] %.5E, b[%d] %.5E\n", k, ak, k, bk ); +#endif + if( np < DBL_EPSILON ) + break; + np /= n*n; + } + + /* normalizing factor ( 4*zeta/(1 - z**2) )**1/4 */ + t = 4.0 * zeta/zz; + t = sqrt( sqrt(t) ); + + t *= ai*pp/cbrt(n) + aip*qq/(n23*n); + return(t); } // }}} -// {{{ jn +// {{{ bessel_hankel + +/* Hankel's asymptotic expansion + * for large x. + * AMS55 #9.2.5. + */ -inline T asymptotic_bessel_j_large_x_2(T v, T x) +double bessel_hankel( double n, double x ) { - // See A&S 9.2.19. - // Get the phase and amplitude: - T ampl = asymptotic_bessel_amplitude(v, x); - T phase = asymptotic_bessel_phase_mx(v, x); - // - // Calculate the sine of the phase, using: - // cos(x+p) = cos(x)cos(p) - sin(x)sin(p) - // - T sin_phase = cos(phase) * cos(x) - sin(phase) * sin(x); - return sin_phase * ampl; + double t, u, z, k, sign, conv; + double p, q, j, m, pp, qq; + int flag; + + m = 4.0*n*n; + j = 1.0; + z = 8.0 * x; + k = 1.0; + p = 1.0; + u = (m - 1.0)/z; + q = u; + sign = 1.0; + conv = 1.0; + flag = 0; + t = 1.0; + pp = 1.0e38; + qq = 1.0e38; + + while( t > DBL_EPSILON ) + { + k += 2.0; + j += 1.0; + sign = -sign; + u *= (m - k * k)/(j * z); + p += sign * u; + k += 2.0; + j += 1.0; + u *= (m - k * k)/(j * z); + q += sign * u; + t = fabs(u/p); + if( t < conv ) + { + conv = t; + qq = q; + pp = p; + flag = 1; + } + /* stop if the terms start getting larger */ + if( (flag != 0) && (t > conv) ) + { +#if DEBUG + printf( "Hankel: convergence to %.4E\n", conv ); +#endif + goto hank1; + } + } + +hank1: + u = x - (0.5*n + 0.25) * M_PI; + t = sqrt( 2.0/(M_PI*x) ) * ( pp * cos(u) - qq * sin(u) ); +#if DEBUG + printf( "hank: %.6e\n", t ); +#endif + return( t ); } +// }}} + +// {{{ bessel_jv + +// SciPy says jn has no advantage over jv, so alias the two. -T bessel_jn(int n, T x) +#define bessel_jn bessel_jv + +double bessel_jv(double n, double x) { - T value = 0, factor, current, prev, next; + double k, q, t, y, an; + int i, sign, nint; - // - // Reflection has to come first: - // - if (n < 0) + nint = 0; /* Flag for integer n */ + sign = 1; /* Flag for sign inversion */ + an = fabs( n ); + y = floor( an ); + if( y == an ) + { + nint = 1; + i = an - 16384.0 * floor( an/16384.0 ); + if( n < 0.0 ) { - factor = (n & 0x1) ? -1 : 1; // J_{-n}(z) = (-1)^n J_n(z) - n = -n; + if( i & 1 ) + sign = -sign; + n = an; } - else + if( x < 0.0 ) { - factor = 1; + if( i & 1 ) + sign = -sign; + x = -x; } - // - // Special cases: - // - if (n == 0) + if( n == 0.0 ) + return( bessel_j0(x) ); + if( n == 1.0 ) + return( sign * bessel_j1(x) ); + } + + if( (x < 0.0) && (y != an) ) + { + // mtherr( "Jv", DOMAIN ); + // y = 0.0; + y = nan(22); + goto done; + } + + y = fabs(x); + + if( y < DBL_EPSILON ) + goto underf; + + k = 3.6 * sqrt(y); + t = 3.6 * sqrt(an); + if( (y < t) && (an > 21.0) ) + return( sign * bessel_jvs(n,x) ); + if( (an < k) && (y > 21.0) ) + return( sign * bessel_hankel(n,x) ); + + if( an < 500.0 ) + { + /* Note: if x is too large, the continued + * fraction will fail; but then the + * Hankel expansion can be used. + */ + if( nint != 0 ) { - return factor * bessel_j0(x); + k = 0.0; + q = bessel_recur( &n, x, &k, 1 ); + if( k == 0.0 ) + { + y = bessel_j0(x)/q; + goto done; + } + if( k == 1.0 ) + { + y = bessel_j1(x)/q; + goto done; + } } - if (n == 1) + + if( an > 2.0 * y ) + goto rlarger; + + if( (n >= 0.0) && (n < 20.0) + && (y > 6.0) && (y < 20.0) ) { - return factor * bessel_j1(x); + /* Recur backwards from a larger value of n + */ +rlarger: + k = n; + + y = y + an + 1.0; + if( y < 30.0 ) + y = 30.0; + y = n + floor(y-n); + q = bessel_recur( &y, x, &k, 0 ); + y = bessel_jvs(y,x) * q; + goto done; } - if (x == 0) // n >= 2 + if( k <= 30.0 ) { - return 0; + k = 2.0; + } + else if( k < 90.0 ) + { + k = (3*k)/4; + } + if( an > (k + 3.0) ) + { + if( n < 0.0 ) + k = -k; + q = n - floor(n); + k = floor(k) + q; + if( n > 0.0 ) + q = bessel_recur( &n, x, &k, 1 ); + else + { + t = k; + k = n; + q = bessel_recur( &t, x, &k, 1 ); + k = t; + } + if( q == 0.0 ) + { +underf: + y = 0.0; + goto done; + } + } + else + { + k = n; + q = 1.0; } - if(fabs(x) > asymptotic_bessel_j_limit(n)) - return factor * asymptotic_bessel_j_large_x_2(n, x); - - // BOOST_ASSERT(n > 1); - T scale = 1; - if (n < fabs(x)) // forward recurrence - { - prev = bessel_j0(x); - current = bessel_j1(x); - for (int k = 1; k < n; k++) - { - T fact = 2 * k / x; - if((DBL_MAX - fabs(prev)) / fabs(fact) < fabs(current)) - { - scale /= current; - prev /= current; - current = 1; - } - value = fact * current - prev; - prev = current; - current = value; - } - } - else if(x < 1) - { - return factor * bessel_j_small_z_series((T) n, x); - } - else // backward recurrence - { - T fn; int s; // fn = J_(n+1) / J_n - // |x| <= n, fast convergence for continued fraction CF1 - CF1_jy((T) n, x, &fn, &s); - prev = fn; - current = 1; - for (int k = n; k > 0; k--) - { - T fact = 2 * k / x; - if((DBL_MAX - fabs(prev)) / fact < fabs(current)) - { - prev /= current; - scale /= current; - current = 1; - } - next = fact * current - prev; - prev = current; - current = next; - } - value = bessel_j0(x) / current; // normalization - scale = 1 / scale; - } - value *= factor; - - if(DBL_MAX * scale < fabs(value)) - return nan(22ul); - - return value / scale; + /* boundary between convergence of + * power series and Hankel expansion + */ + y = fabs(k); + if( y < 26.0 ) + t = (0.0083*y + 0.09)*y + 12.9; + else + t = 0.9 * y; + + if( x > t ) + y = bessel_hankel(k,x); + else + y = bessel_jvs(k,x); +#if DEBUG + printf( "y = %.16e, recur q = %.16e\n", y, q ); +#endif + if( n > 0.0 ) + y /= q; + else + y *= q; + } + + else + { + /* For large n, use the uniform expansion + * or the transitional expansion. + * But if x is of the order of n**2, + * these may blow up, whereas the + * Hankel expansion will then work. + */ + if( n < 0.0 ) + { + //mtherr( "Jv", TLOSS ); + //y = 0.0; + y = nan(23); + goto done; + } + t = x/n; + t /= n; + if( t > 0.3 ) + y = bessel_hankel(n,x); + else + y = bessel_jnx(n,x); + } + +done: return( sign * y); } // }}} diff --git a/src/cl/pyopencl-cephes.h b/src/cl/pyopencl-cephes.h new file mode 100644 index 0000000000000000000000000000000000000000..25101a2848a8c115d1aa6ba94d6332bc4a3fead5 --- /dev/null +++ b/src/cl/pyopencl-cephes.h @@ -0,0 +1,69 @@ +// Ported from Cephes by +// Andreas Kloeckner (C) 20112 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// What you see here may be used freely, but it comes with no support or +// guarantee. + +#pragma once + +// {{{ cephes_polevl + +/* + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + */ + +double cephes_polevl(double x, __constant const double *coef, int N) +{ + double ans; + int i; + __constant const double *p; + + p = coef; + ans = *p++; + i = N; + + do + ans = ans * x + *p++; + while( --i ); + + return( ans ); +} + +// }}} + +// {{{ cephes_p1evl + +double cephes_p1evl( double x, __constant const double *coef, int N ) +{ + double ans; + __constant const double *p; + int i; + + p = coef; + ans = x + *p++; + i = N-1; + + do + ans = ans * x + *p++; + while( --i ); + + return( ans ); +}