diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e387cbc3bcd66ae9bd56b8273890353e49cd6917
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,48 @@
+Python 2.7 AMD CPU:
+  script:
+  - py_version=2.7
+  - cl_dev=amd_pu
+  - EXTRA_INSTALL="numpy mako"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python2.7
+  - amd-cl-cpu
+  except:
+  - tags
+Python 3.4 AMD CPU:
+  script:
+  - py_version=3.4
+  - cl_dev=amd_pu
+  - EXTRA_INSTALL="numpy mako"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python3.4
+  - amd-cl-cpu
+  except:
+  - tags
+Python 2.6 AMD CPU:
+  script:
+  - py_version=2.6
+  - cl_dev=amd_pu
+  - EXTRA_INSTALL="numpy mako"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python2.6
+  - amd-cl-cpu
+  except:
+  - tags
+Python 2.7 K20:
+  script:
+  - py_version=2.7
+  - cl_dev=nvi_k20
+  - EXTRA_INSTALL="numpy mako"
+  - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh
+  - ". ./build-and-test-py-project.sh"
+  tags:
+  - python2.7
+  - cuda
+  except:
+  - tags
diff --git a/contrib/fortran-to-opencl/setup.cfg b/contrib/fortran-to-opencl/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..d3f13a0e64b79c00a957cb1369e335e0b8a00d76
--- /dev/null
+++ b/contrib/fortran-to-opencl/setup.cfg
@@ -0,0 +1,3 @@
+[flake8]
+ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802
+max-line-length=85
diff --git a/contrib/fortran-to-opencl/translate.py b/contrib/fortran-to-opencl/translate.py
index d0355423e10b274ecfc658ed3f1b7c722586bb82..4c3486f4d52c135aacd2a915c90089fd71e8acb2 100644
--- a/contrib/fortran-to-opencl/translate.py
+++ b/contrib/fortran-to-opencl/translate.py
@@ -27,7 +27,7 @@ import numpy as np
 import re
 from pymbolic.parser import Parser as ExpressionParserBase
 from pymbolic.mapper import CombineMapper
-import pymbolic.primitives
+import pymbolic.primitives as p
 from pymbolic.mapper.c_code import CCodeMapper as CCodeMapperBase
 
 from warnings import warn
@@ -43,6 +43,15 @@ class TranslationError(RuntimeError):
     pass
 
 
+def complex_type_name(dtype):
+    if dtype == np.complex64:
+        return "cfloat"
+    if dtype == np.complex128:
+        return "cdouble"
+    else:
+        raise RuntimeError
+
+
 # {{{ AST components
 
 def dtype_to_ctype(dtype):
@@ -99,7 +108,7 @@ _and = intern("and")
 _or = intern("or")
 
 
-class TypedLiteral(pymbolic.primitives.Leaf):
+class TypedLiteral(p.Leaf):
     def __init__(self, value, dtype):
         self.value = value
         self.dtype = np.dtype(dtype)
@@ -110,6 +119,18 @@ class TypedLiteral(pymbolic.primitives.Leaf):
     mapper_method = intern("map_literal")
 
 
+def simplify_typed_literal(expr):
+    if (isinstance(expr, p.Product)
+            and len(expr.children) == 2
+            and isinstance(expr.children[1], TypedLiteral)
+            and p.is_constant(expr.children[0])
+            and expr.children[0] == -1):
+        tl = expr.children[1]
+        return TypedLiteral("-"+tl.value, tl.dtype)
+    else:
+        return expr
+
+
 class FortranExpressionParser(ExpressionParserBase):
     # FIXME double/single prec literals
 
@@ -134,7 +155,6 @@ class FortranExpressionParser(ExpressionParserBase):
     def parse_terminal(self, pstate):
         scope = self.tree_walker.scope_stack[-1]
 
-        from pymbolic.primitives import Subscript, Call, Variable
         from pymbolic.parser import (
             _identifier, _openpar, _closepar, _float)
 
@@ -164,17 +184,17 @@ class FortranExpressionParser(ExpressionParserBase):
                 # not a subscript
                 scope.use_name(name)
 
-                return Variable(name)
+                return p.Variable(name)
 
-            left_exp = Variable(name)
+            left_exp = p.Variable(name)
 
             pstate.advance()
             pstate.expect_not_end()
 
             if scope.is_known(name):
-                cls = Subscript
+                cls = p.Subscript
             else:
-                cls = Call
+                cls = p.Call
 
             if pstate.next_tag is _closepar:
                 pstate.advance()
@@ -219,14 +239,14 @@ class FortranExpressionParser(ExpressionParserBase):
                 _PREC_CALL, _PREC_COMPARISON, _openpar,
                 _PREC_LOGICAL_OR, _PREC_LOGICAL_AND)
         from pymbolic.primitives import (
-                ComparisonOperator, LogicalAnd, LogicalOr)
+                Comparison, LogicalAnd, LogicalOr)
 
         next_tag = pstate.next_tag()
         if next_tag is _openpar and _PREC_CALL > min_precedence:
             raise TranslationError("parenthesis operator only works on names")
         elif next_tag in self.COMP_MAP and _PREC_COMPARISON > min_precedence:
             pstate.advance()
-            left_exp = ComparisonOperator(
+            left_exp = Comparison(
                     left_exp,
                     self.COMP_MAP[next_tag],
                     self.parse_expression(pstate, _PREC_COMPARISON))
@@ -250,7 +270,10 @@ class FortranExpressionParser(ExpressionParserBase):
                 assert len(left_exp) == 2
                 r, i = left_exp
 
-                dtype = (r.dtype.type(0) + i.dtype.type(0))
+                r = simplify_typed_literal(r)
+                i = simplify_typed_literal(i)
+
+                dtype = (r.dtype.type(0) + i.dtype.type(0)).dtype
                 if dtype == np.float32:
                     dtype = np.complex64
                 else:
@@ -295,6 +318,17 @@ class TypeInferenceMapper(CombineMapper):
             else:
                 raise RuntimeError("unexpected complex type")
 
+        elif name in ["imag", "real", "abs", "dble"]:
+            arg, = expr.parameters
+            base_dtype = self.rec(arg)
+
+            if base_dtype == np.complex128:
+                return np.dtype(np.float64)
+            elif base_dtype == np.complex64:
+                return np.dtype(np.float32)
+            else:
+                return base_dtype
+
         else:
             return CombineMapper.map_call(self, expr)
 
@@ -304,14 +338,6 @@ class ComplexCCodeMapper(CCodeMapperBase):
         CCodeMapperBase.__init__(self)
         self.infer_type = infer_type
 
-    def complex_type_name(self, dtype):
-        if dtype == np.complex64:
-            return "cfloat"
-        if dtype == np.complex128:
-            return "cdouble"
-        else:
-            raise RuntimeError
-
     def map_sum(self, expr, enclosing_prec):
         tgt_dtype = self.infer_type(expr)
         is_complex = tgt_dtype.kind == 'c'
@@ -319,19 +345,30 @@ class ComplexCCodeMapper(CCodeMapperBase):
         if not is_complex:
             return CCodeMapperBase.map_sum(self, expr, enclosing_prec)
         else:
-            tgt_name = self.complex_type_name(tgt_dtype)
+            tgt_name = complex_type_name(tgt_dtype)
 
             reals = [child for child in expr.children
                     if 'c' != self.infer_type(child).kind]
             complexes = [child for child in expr.children
                     if 'c' == self.infer_type(child).kind]
 
-            from pymbolic.mapper.stringifier import PREC_SUM
+            from pymbolic.mapper.stringifier import PREC_SUM, PREC_NONE
             real_sum = self.join_rec(" + ", reals, PREC_SUM)
-            complex_sum = self.join_rec(" + ", complexes, PREC_SUM)
+
+            if len(complexes) == 1:
+                myprec = PREC_SUM
+            else:
+                myprec = PREC_NONE
+
+            complex_sum = self.rec(complexes[0], myprec)
+            for child in complexes[1:]:
+                complex_sum = "%s_add(%s, %s)" % (
+                        tgt_name, complex_sum,
+                        self.rec(child, PREC_NONE))
 
             if real_sum:
-                result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum)
+                result = "%s_add(%s_fromreal(%s), %s)" % (
+                        tgt_name, tgt_name, real_sum, complex_sum)
             else:
                 result = complex_sum
 
@@ -344,7 +381,7 @@ class ComplexCCodeMapper(CCodeMapperBase):
         if not is_complex:
             return CCodeMapperBase.map_product(self, expr, enclosing_prec)
         else:
-            tgt_name = self.complex_type_name(tgt_dtype)
+            tgt_name = complex_type_name(tgt_dtype)
 
             reals = [child for child in expr.children
                     if 'c' != self.infer_type(child).kind]
@@ -366,8 +403,7 @@ class ComplexCCodeMapper(CCodeMapperBase):
                         self.rec(child, PREC_NONE))
 
             if real_prd:
-                # elementwise semantics are correct
-                result = "%s * %s" % (real_prd, complex_prd)
+                result = "%s_rmul(%s, %s)" % (tgt_name, real_prd, complex_prd)
             else:
                 result = complex_prd
 
@@ -383,16 +419,18 @@ class ComplexCCodeMapper(CCodeMapperBase):
         if not (n_complex or d_complex):
             return CCodeMapperBase.map_quotient(self, expr, enclosing_prec)
         elif n_complex and not d_complex:
-            # elementwise semnatics are correct
-            return CCodeMapperBase.map_quotient(self, expr, enclosing_prec)
+            return "%s_divider(%s, %s)" % (
+                    complex_type_name(tgt_dtype),
+                    self.rec(expr.numerator, PREC_NONE),
+                    self.rec(expr.denominator, PREC_NONE))
         elif not n_complex and d_complex:
             return "%s_rdivide(%s, %s)" % (
-                    self.complex_type_name(tgt_dtype),
+                    complex_type_name(tgt_dtype),
                     self.rec(expr.numerator, PREC_NONE),
                     self.rec(expr.denominator, PREC_NONE))
         else:
             return "%s_divide(%s, %s)" % (
-                    self.complex_type_name(tgt_dtype),
+                    complex_type_name(tgt_dtype),
                     self.rec(expr.numerator, PREC_NONE),
                     self.rec(expr.denominator, PREC_NONE))
 
@@ -419,12 +457,12 @@ class ComplexCCodeMapper(CCodeMapperBase):
 
                 if b_complex and not e_complex:
                     return "%s_powr(%s, %s)" % (
-                            self.complex_type_name(tgt_dtype),
+                            complex_type_name(tgt_dtype),
                             self.rec(expr.base, PREC_NONE),
                             self.rec(expr.exponent, PREC_NONE))
                 else:
                     return "%s_pow(%s, %s)" % (
-                            self.complex_type_name(tgt_dtype),
+                            complex_type_name(tgt_dtype),
                             self.rec(expr.base, PREC_NONE),
                             self.rec(expr.exponent, PREC_NONE))
 
@@ -464,28 +502,42 @@ class CCodeMapper(ComplexCCodeMapper):
         from pymbolic.mapper.stringifier import PREC_NONE
 
         tgt_dtype = self.infer_type(expr)
+        arg_dtypes = [self.infer_type(par) for par in expr.parameters]
 
         name = expr.function.name
         if 'f' == tgt_dtype.kind and name == "abs":
             name = "fabs"
 
-        if 'c' == tgt_dtype.kind:
+        elif 'c' == tgt_dtype.kind:
             if name in ["conjg", "dconjg"]:
                 name = "conj"
 
             if name[:2] == "cd" and name[2:] in ["log", "exp", "sqrt"]:
                 name = name[2:]
 
-            if name == "aimag":
-                name = "imag"
-
             if name == "dble":
                 name = "real"
 
             name = "%s_%s" % (
-                    self.complex_type_name(tgt_dtype),
+                    complex_type_name(tgt_dtype),
                     name)
 
+        elif name in ["aimag", "real", "imag"] and tgt_dtype.kind == "f":
+            arg_dtype, = arg_dtypes
+
+            if name == "aimag":
+                name = "imag"
+
+            name = "%s_%s" % (
+                    complex_type_name(arg_dtype),
+                    name)
+
+        elif 'c' == tgt_dtype.kind and name == "abs":
+            arg_dtype, = arg_dtypes
+
+            name = "%s_abs" % (
+                    complex_type_name(arg_dtype))
+
         return self.format("%s(%s)",
                 name,
                 self.join_rec(", ", expr.parameters, PREC_NONE))
@@ -512,7 +564,10 @@ class CCodeMapper(ComplexCCodeMapper):
         from pymbolic.mapper.stringifier import PREC_NONE
         if expr.dtype.kind == "c":
             r, i = expr.value
-            return "{ %s, %s }" % (self.rec(r, PREC_NONE), self.rec(i, PREC_NONE))
+            return "%s_new(%s, %s)" % (
+                    complex_type_name(expr.dtype),
+                    self.rec(r, PREC_NONE),
+                    self.rec(i, PREC_NONE))
         else:
             return expr.value
 
@@ -758,10 +813,9 @@ class ArgumentAnalayzer(FTreeWalkerBase):
 
         lhs = self.parse_expr(node.variable)
 
-        from pymbolic.primitives import Subscript, Call
-        if isinstance(lhs, Subscript):
+        if isinstance(lhs, p.Subscript):
             lhs_name = lhs.aggregate.name
-        elif isinstance(lhs, Call):
+        elif isinstance(lhs, p.Call):
             # in absence of dim info, subscripts get parsed as calls
             lhs_name = lhs.function.name
         else:
@@ -797,11 +851,10 @@ class ArgumentAnalayzer(FTreeWalkerBase):
     def map_Call(self, node):
         scope = self.scope_stack[-1]
 
-        from pymbolic.primitives import Subscript, Variable
         for i, arg_str in enumerate(node.items):
             arg = self.parse_expr(arg_str)
-            if isinstance(arg, (Variable, Subscript)):
-                if isinstance(arg, Subscript):
+            if isinstance(arg, (p.Variable, p.Subscript)):
+                if isinstance(arg, p.Subscript):
                     arg_name = arg.aggregate.name
                 else:
                     arg_name = arg.name
@@ -926,9 +979,9 @@ class F2CLTranslator(FTreeWalkerBase):
                     if shape is not None:
                         result.append(cgen.Statement(
                             "%s %s[nitemsof(%s)]"
-                                % (
-                                    dtype_to_ctype(scope.get_type(name)),
-                                    name, name)))
+                            % (
+                                dtype_to_ctype(scope.get_type(name)),
+                                name, name)))
                     else:
                         result.append(self.get_declarator(name))
 
@@ -1358,6 +1411,13 @@ def f2cl_files(source_file, target_file, **kwargs):
 
 
 if __name__ == "__main__":
+    import logging
+    console = logging.StreamHandler()
+    console.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
+    console.setFormatter(formatter)
+    logging.getLogger('fparser').addHandler(console)
+
     from cgen.opencl import CLConstant
 
     if 0:
diff --git a/doc/misc.rst b/doc/misc.rst
index 950e27da8412d84cec520687ae9d65e0aa58d819..31b954e3fe8c637b15f8d8ab6fa5853b40ed583e 100644
--- a/doc/misc.rst
+++ b/doc/misc.rst
@@ -110,13 +110,22 @@ other software to be turned into the corresponding :mod:`pyopencl` objects.
 User-visible Changes
 ====================
 
-Version 2014.1
+Version 2015.2
 --------------
 .. note::
 
     This version is currently under development. You can get snapshots from
     PyOpenCL's `git repository <https://github.com/pyopencl/pyopencl>`_
 
+Version 2015.1
+--------------
+
+* Support for new-style buffer protocol
+* Numerous fixes
+
+Version 2014.1
+--------------
+
 * :ref:`ipython-integration`
 * Bug fixes
 
diff --git a/doc/tools.rst b/doc/tools.rst
index e85b4e9a39ac8dca6be4b8a4a6831ab49bf8852c..9c647b1992c6337cc58d73ec24d13e4ecb6a18af 100644
--- a/doc/tools.rst
+++ b/doc/tools.rst
@@ -26,7 +26,7 @@ the available memory.
 Using :class:`pyopencl.array.Array` instances with a :class:`MemoryPool` is
 not complicated::
 
-    mem_pool = cl_tools.MemoryPool(cl_tools.ImmediateAllocator(queue))
+    mem_pool = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue))
     a_dev = cl_array.arange(queue, 2000, dtype=np.float32, allocator=mem_pool)
 
 .. class:: PooledBuffer
diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py
index 387e31a4d0a455368ca45bef287220b5f9ead72e..0fb9c611a93f43610298c66533a80e9572fe44fc 100644
--- a/pyopencl/algorithm.py
+++ b/pyopencl/algorithm.py
@@ -392,6 +392,9 @@ RADIX_SORT_OUTPUT_STMT_TPL = Template(r"""//CL//
 
 
 # {{{ driver
+# import hoisted here to be used as a default argument in the constructor
+from pyopencl.scan import GenericScanKernel
+
 
 class RadixSort(object):
     """Provides a general `radix sort <https://en.wikipedia.org/wiki/Radix_sort>`_
@@ -401,7 +404,7 @@ class RadixSort(object):
     """
     def __init__(self, context, arguments, key_expr, sort_arg_names,
             bits_at_a_time=2, index_dtype=np.int32, key_dtype=np.uint32,
-            options=[]):
+            scan_kernel=GenericScanKernel, options=[]):
         """
         :arg arguments: A string of comma-separated C argument declarations.
             If *arguments* is specified, then *input_expr* must also be
@@ -469,8 +472,7 @@ class RadixSort(object):
         scan_preamble = preamble \
                 + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args)
 
-        from pyopencl.scan import GenericScanKernel
-        self.scan_kernel = GenericScanKernel(
+        self.scan_kernel = scan_kernel(
                 context, scan_dtype,
                 arguments=scan_arguments,
                 input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr,
@@ -821,8 +823,8 @@ class ListOfListsBuilder:
         from pyopencl.tools import VectorArg, OtherArg
         kernel_list_args = [
                 VectorArg(index_dtype, "plb_%s_count" % name)
-                    for name, dtype in self.list_names_and_dtypes
-                    if name not in self.count_sharing]
+                for name, dtype in self.list_names_and_dtypes
+                if name not in self.count_sharing]
 
         user_list_args = []
         for name, dtype in self.list_names_and_dtypes:
diff --git a/pyopencl/array.py b/pyopencl/array.py
index 8d29b6c1d74db82d4230cb3f513a41e820bad7a8..bd2ee902ea179e4ec39db06884ac14ecd3f84d1f 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -60,7 +60,7 @@ except:
 
 # {{{ vector types
 
-class vec:
+class vec:  # noqa
     pass
 
 
@@ -271,7 +271,7 @@ class ArrayHasOffsetError(ValueError):
         ValueError.__init__(self, val)
 
 
-class _copy_queue:
+class _copy_queue:  # noqa
     pass
 
 
diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py
index 0a6aed652b9b3a67613e7be349d422f9fb6faa01..26e5b994bc2a265e1e6805d79b59e688ee7f31a8 100644
--- a/pyopencl/characterize/__init__.py
+++ b/pyopencl/characterize/__init__.py
@@ -313,19 +313,6 @@ def get_simd_group_size(dev, type_size):
 
     if dev.type & cl.device_type.CPU:
         # implicit assumption: Impl. will vectorize
-
-        if type_size == 1:
-            return dev.preferred_vector_width_char
-        elif type_size == 2:
-            return dev.preferred_vector_width_short
-        elif type_size == 4:
-            return dev.preferred_vector_width_float
-        elif type_size == 8:
-            return dev.preferred_vector_width_double
-        else:
-            from warnings import warn
-            warn("unexpected dtype size in get_simd_group on CPU device, "
-                    "guessing group width 1")
-            return 1
+        return 1
 
     return None
diff --git a/pyopencl/cl/pyopencl-bessel-j-complex.cl b/pyopencl/cl/pyopencl-bessel-j-complex.cl
new file mode 100644
index 0000000000000000000000000000000000000000..99592828adf23f5bcf12a6a18b2c2d14941f3c82
--- /dev/null
+++ b/pyopencl/cl/pyopencl-bessel-j-complex.cl
@@ -0,0 +1,240 @@
+/*
+Evaluate Bessel J function J_v(z) and J_{v+1}(z) with v a nonnegative integer
+and z anywhere in the complex plane.
+
+Copyright (C) Vladimir Rokhlin
+Copyright (C) 2010-2012 Leslie Greengard and Zydrunas Gimbutas
+Copyright (C) 2015 Shidong Jiang, Andreas Kloeckner
+
+Manually translated from
+https://github.com/zgimbutas/fmmlib2d/blob/master/src/cdjseval2d.f
+
+Originally licensed under GPL, permission to license under MIT granted via email
+by Vladimir Rokhlin on May 25, 2015 and by Zydrunas Gimbutas on May 17, 2015.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+*/
+
+void bessel_j_complex(int v, cdouble_t z, cdouble_t *j_v, cdouble_t *j_vp1)
+{
+  int n;
+  int nmax = 10000;
+
+  int k;
+  int kmax=8;
+
+  int vscale, vp1scale;
+  double vscaling, vp1scaling;
+
+  const double small = 2e-1;
+  const double median = 1.0e0;
+
+  const double upbound = 1e40;
+  const double upbound_inv = 1e-40;
+
+  double dd;
+  double k_factorial_inv, kv_factorial_inv, kvp1_factorial_inv;
+
+  cdouble_t z_half, mz_half2, mz_half_2k, z_half_v, z_half_vp1;
+
+  cdouble_t ima = cdouble_new(0, 1);
+  cdouble_t neg_ima = cdouble_new(0, -1);
+
+  cdouble_t zinv, ztmp;
+  cdouble_t j_nm1, j_n, j_np1;
+
+  cdouble_t psi, zsn, zmul, zmulinv;
+  cdouble_t unscaled_j_n, unscaled_j_nm1, unscaled_j_np1;
+  cdouble_t unscaled_j_v, unscaled_j_vp1;
+  cdouble_t scaling;
+
+  // assert( v >= 0 );
+
+#if 0
+  if (cdouble_abs(z) < tiny)
+  {
+    if (v == 0)
+    {
+      *j_v = cdouble_new(1, 0);
+      *j_vp1 = cdouble_new(0, 0);
+    } else
+    {
+      *j_v = cdouble_new(0, 0);
+      *j_vp1 = cdouble_new(0, 0);
+    }
+    return;
+  }
+#endif
+
+  // {{{ power series for (small z) or (large v and median z)
+  if ( (cdouble_abs(z) < small) || ( (v>12) && (cdouble_abs(z) < median)))
+  {
+    z_half = cdouble_divider(z,2.0);
+
+    mz_half2 = cdouble_neg(cdouble_mul(z_half, z_half));
+
+    z_half_v = cdouble_powr(z_half, v);
+    z_half_vp1 = cdouble_mul(z_half_v, z_half);
+
+
+    // compute 1/v!
+    kv_factorial_inv = 1.0;
+    for ( k = 1; k <= v; k++)
+    {
+      kv_factorial_inv /= k;
+    }
+
+    kvp1_factorial_inv = kv_factorial_inv / (v+1);
+
+    k_factorial_inv = 1.0;
+
+    // compute the power series of bessel j function
+    mz_half_2k = cdouble_new(1.0, 0);
+
+    *j_v = cdouble_new(0, 0);
+    *j_vp1 = cdouble_new(0, 0);
+
+    for ( k = 0; k < kmax; k++ )
+    {
+      *j_v = cdouble_add(
+          *j_v,
+          cdouble_mulr(mz_half_2k, kv_factorial_inv*k_factorial_inv));
+      *j_vp1 = cdouble_add(*j_vp1,
+          cdouble_mulr(mz_half_2k, kvp1_factorial_inv*k_factorial_inv));
+
+      mz_half_2k = cdouble_mul(mz_half_2k, mz_half2);
+      k_factorial_inv /= (k+1);
+      kv_factorial_inv /= (k+v+1);
+      kvp1_factorial_inv /= (k+v+2);
+    }
+
+    *j_v = cdouble_mul(*j_v, z_half_v );
+    *j_vp1 = cdouble_mul(*j_vp1, z_half_vp1 );
+
+    return;
+  }
+
+  // }}}
+
+  // {{{ use recurrence for large z
+
+  j_nm1 = cdouble_new(0, 0);
+  j_n = cdouble_new(1, 0);
+
+  n = v;
+
+  zinv = cdouble_rdivide(1,z);
+
+  while (true)
+  {
+    j_np1 = cdouble_sub(
+        cdouble_mul(cdouble_rmul(2*n, zinv), j_n),
+        j_nm1);
+
+    n += 1;
+    j_nm1 = j_n;
+    j_n = j_np1;
+
+    if (n > nmax)
+    {
+      *j_v = cdouble_new(nan(0x8e55e1u), 0);
+      *j_vp1 = cdouble_new(nan(0x8e55e1u), 0);
+      return;
+    }
+
+    dd = pow((cdouble_real(j_n)), 2)+pow(cdouble_imag(j_n),2);
+    if (dd > upbound)
+      break;
+  }
+
+  // downward recursion, account for rescalings
+  // Record the number of times of the missed rescalings
+  // for j_v and j_vp1.
+
+  unscaled_j_np1 = cdouble_new(0, 0);
+  unscaled_j_n = cdouble_new(1, 0);
+
+  // Use normalization condition http://dlmf.nist.gov/10.12#E5
+  psi = cdouble_new(0, 0);
+
+  if (cdouble_imag(z) <= 0)
+    zmul = ima;
+  else
+    zmul = neg_ima;
+
+  zsn = cdouble_powr(zmul, n%4);
+
+  zmulinv = cdouble_rdivide(1, zmul);
+
+  vscale = 0;
+  vp1scale = 0;
+
+  while (n > 0)
+  {
+    ztmp = cdouble_sub(
+        cdouble_mul(cdouble_rmul(2*n, zinv), unscaled_j_n),
+        unscaled_j_np1);
+    dd = pow(cdouble_real(ztmp), 2) + pow(cdouble_imag(ztmp), 2);
+
+    unscaled_j_nm1 = ztmp;
+
+
+    psi = cdouble_add(psi, cdouble_mul(unscaled_j_n, zsn));
+    zsn = cdouble_mul(zsn, zmulinv);
+
+    n -= 1;
+    unscaled_j_np1 = unscaled_j_n;
+    unscaled_j_n = unscaled_j_nm1;
+
+    if (dd > upbound)
+    {
+      unscaled_j_np1 = cdouble_rmul(upbound_inv, unscaled_j_np1);
+      unscaled_j_n = cdouble_rmul(upbound_inv, unscaled_j_n);
+      psi = cdouble_rmul(upbound_inv,psi);
+      if (n < v) vscale++;
+      if (n < v+1) vp1scale++;
+    }
+
+    if (n == v)
+      unscaled_j_v = unscaled_j_n;
+    if (n == v+1)
+      unscaled_j_vp1 = unscaled_j_n;
+
+  }
+
+  psi = cdouble_add(cdouble_rmul(2, psi), unscaled_j_n);
+
+  if ( cdouble_imag(z) <= 0 )
+  {
+    scaling = cdouble_divide( cdouble_exp( cdouble_mul(ima,z) ), psi);
+  } else
+  {
+    scaling = cdouble_divide( cdouble_exp( cdouble_mul(neg_ima,z) ), psi);
+  }
+  vscaling = pow(upbound_inv, vscale);
+  vp1scaling = pow(upbound_inv, vp1scale);
+
+  *j_v = cdouble_mul(unscaled_j_v, cdouble_mulr(scaling, vscaling));
+  *j_vp1 = cdouble_mul(unscaled_j_vp1, cdouble_mulr(scaling,vp1scaling));
+
+  // }}}
+}
+
+// vim: fdm=marker
diff --git a/pyopencl/cl/pyopencl-complex.h b/pyopencl/cl/pyopencl-complex.h
index 67ae2cfd0c97275291013d8a2daae5f0689085fd..976bf81a62dcef2511cb20a4d01220542fca23be 100644
--- a/pyopencl/cl/pyopencl-complex.h
+++ b/pyopencl/cl/pyopencl-complex.h
@@ -28,57 +28,82 @@
 // multiplication (float2*float1) is defined for these types,
 // but do not match the rules of complex arithmetic.
 
+#pragma once
+
 #define PYOPENCL_DECLARE_COMPLEX_TYPE_INT(REAL_TP, REAL_3LTR, TPROOT, TP) \
   \
-  REAL_TP TPROOT##_real(TP a) { return a.x; } \
-  REAL_TP TPROOT##_imag(TP a) { return a.y; } \
-  REAL_TP TPROOT##_abs(TP a) { return hypot(a.x, a.y); } \
+  REAL_TP TPROOT##_real(TP a) { return a.real; } \
+  REAL_TP TPROOT##_imag(TP a) { return a.imag; } \
+  REAL_TP TPROOT##_abs(TP a) { return hypot(a.real, a.imag); } \
+  \
+  TP TPROOT##_new(REAL_TP real, REAL_TP imag) \
+  { \
+    TP result; \
+    result.real = real; \
+    result.imag = imag; \
+    return result; \
+  } \
+  \
+  TP TPROOT##_fromreal(REAL_TP real) \
+  { \
+    TP result; \
+    result.real = real; \
+    result.imag = 0; \
+    return result; \
+  } \
+  \
   \
-  TP TPROOT##_fromreal(REAL_TP a) { return (TP)(a, 0); } \
-  TP TPROOT##_new(REAL_TP a, REAL_TP b) { return (TP)(a, b); } \
-  TP TPROOT##_conj(TP a) { return (TP)(a.x, -a.y); } \
+  TP TPROOT##_neg(TP a) { return TPROOT##_new(-a.real, -a.imag); } \
+  TP TPROOT##_conj(TP a) { return TPROOT##_new(a.real, -a.imag); } \
   \
   TP TPROOT##_add(TP a, TP b) \
   { \
-    return a+b; \
+    return TPROOT##_new(a.real + b.real, a.imag + b.imag); \
+    ; \
   } \
   TP TPROOT##_addr(TP a, REAL_TP b) \
   { \
-    return (TP)(b+a.x, a.y); \
+    return TPROOT##_new(b+a.real, a.imag); \
   } \
   TP TPROOT##_radd(REAL_TP a, TP b) \
   { \
-    return (TP)(a+b.x, b.y); \
+    return TPROOT##_new(a+b.real, b.imag); \
+  } \
+  \
+  TP TPROOT##_sub(TP a, TP b) \
+  { \
+    return TPROOT##_new(a.real - b.real, a.imag - b.imag); \
+    ; \
   } \
   \
   TP TPROOT##_mul(TP a, TP b) \
   { \
-    return (TP)( \
-        a.x*b.x - a.y*b.y, \
-        a.x*b.y + a.y*b.x); \
+    return TPROOT##_new( \
+        a.real*b.real - a.imag*b.imag, \
+        a.real*b.imag + a.imag*b.real); \
   } \
   \
   TP TPROOT##_mulr(TP a, REAL_TP b) \
   { \
-    return a*b; \
+    return TPROOT##_new(a.real*b, a.imag*b); \
   } \
   \
   TP TPROOT##_rmul(REAL_TP a, TP b) \
   { \
-    return a*b; \
+    return TPROOT##_new(a*b.real, a*b.imag); \
   } \
   \
   TP TPROOT##_rdivide(REAL_TP z1, TP z2) \
   { \
-    if (fabs(z2.x) <= fabs(z2.y)) { \
-      REAL_TP ratio = z2.x / z2.y; \
-      REAL_TP denom = z2.y * (1 + ratio * ratio); \
-      return (TP)((z1 * ratio) / denom, - z1 / denom); \
+    if (fabs(z2.real) <= fabs(z2.imag)) { \
+      REAL_TP ratio = z2.real / z2.imag; \
+      REAL_TP denom = z2.imag * (1 + ratio * ratio); \
+      return TPROOT##_new((z1 * ratio) / denom, - z1 / denom); \
     } \
     else { \
-      REAL_TP ratio = z2.y / z2.x; \
-      REAL_TP denom = z2.x * (1 + ratio * ratio); \
-      return (TP)(z1 / denom, - (z1 * ratio) / denom); \
+      REAL_TP ratio = z2.imag / z2.real; \
+      REAL_TP denom = z2.real * (1 + ratio * ratio); \
+      return TPROOT##_new(z1 / denom, - (z1 * ratio) / denom); \
     } \
   } \
   \
@@ -86,170 +111,174 @@
   { \
     REAL_TP ratio, denom, a, b, c, d; \
     \
-    if (fabs(z2.x) <= fabs(z2.y)) { \
-      ratio = z2.x / z2.y; \
-      denom = z2.y; \
-      a = z1.y; \
-      b = z1.x; \
-      c = -z1.x; \
-      d = z1.y; \
+    if (fabs(z2.real) <= fabs(z2.imag)) { \
+      ratio = z2.real / z2.imag; \
+      denom = z2.imag; \
+      a = z1.imag; \
+      b = z1.real; \
+      c = -z1.real; \
+      d = z1.imag; \
     } \
     else { \
-      ratio = z2.y / z2.x; \
-      denom = z2.x; \
-      a = z1.x; \
-      b = z1.y; \
-      c = z1.y; \
-      d = -z1.x; \
+      ratio = z2.imag / z2.real; \
+      denom = z2.real; \
+      a = z1.real; \
+      b = z1.imag; \
+      c = z1.imag; \
+      d = -z1.real; \
     } \
     denom *= (1 + ratio * ratio); \
-    return (TP)( \
+    return TPROOT##_new( \
        (a + b * ratio) / denom, \
        (c + d * ratio) / denom); \
   } \
   \
   TP TPROOT##_divider(TP a, REAL_TP b) \
   { \
-    return a/b; \
+    return TPROOT##_new(a.real/b, a.imag/b); \
   } \
   \
   TP TPROOT##_pow(TP a, TP b) \
   { \
-    REAL_TP logr = log(hypot(a.x, a.y)); \
-    REAL_TP logi = atan2(a.y, a.x); \
-    REAL_TP x = exp(logr * b.x - logi * b.y); \
-    REAL_TP y = logr * b.y + logi * b.x; \
+    REAL_TP logr = log(hypot(a.real, a.imag)); \
+    REAL_TP logi = atan2(a.imag, a.real); \
+    REAL_TP x = exp(logr * b.real - logi * b.imag); \
+    REAL_TP y = logr * b.imag + logi * b.real; \
     \
     REAL_TP cosy; \
     REAL_TP siny = sincos(y, &cosy); \
-    return (TP) (x*cosy, x*siny); \
+    return TPROOT##_new(x*cosy, x*siny); \
   } \
   \
   TP TPROOT##_powr(TP a, REAL_TP b) \
   { \
-    REAL_TP logr = log(hypot(a.x, a.y)); \
-    REAL_TP logi = atan2(a.y, a.x); \
+    REAL_TP logr = log(hypot(a.real, a.imag)); \
+    REAL_TP logi = atan2(a.imag, a.real); \
     REAL_TP x = exp(logr * b); \
     REAL_TP y = logi * b; \
     \
     REAL_TP cosy; \
     REAL_TP siny = sincos(y, &cosy); \
     \
-    return (TP)(x * cosy, x*siny); \
+    return TPROOT##_new(x * cosy, x*siny); \
   } \
   \
   TP TPROOT##_rpow(REAL_TP a, TP b) \
   { \
     REAL_TP logr = log(a); \
-    REAL_TP x = exp(logr * b.x); \
-    REAL_TP y = logr * b.y; \
+    REAL_TP x = exp(logr * b.real); \
+    REAL_TP y = logr * b.imag; \
     \
     REAL_TP cosy; \
     REAL_TP siny = sincos(y, &cosy); \
-    return (TP) (x * cosy, x * siny); \
+    return TPROOT##_new(x * cosy, x * siny); \
   } \
   \
   TP TPROOT##_sqrt(TP a) \
   { \
-    REAL_TP re = a.x; \
-    REAL_TP im = a.y; \
+    REAL_TP re = a.real; \
+    REAL_TP im = a.imag; \
     REAL_TP mag = hypot(re, im); \
     TP result; \
     \
     if (mag == 0.f) { \
-      result.x = result.y = 0.f; \
+      result.real = result.imag = 0.f; \
     } else if (re > 0.f) { \
-      result.x = sqrt(0.5f * (mag + re)); \
-      result.y = im/result.x/2.f; \
+      result.real = sqrt(0.5f * (mag + re)); \
+      result.imag = im/result.real/2.f; \
     } else { \
-      result.y = sqrt(0.5f * (mag - re)); \
+      result.imag = sqrt(0.5f * (mag - re)); \
       if (im < 0.f) \
-        result.y = - result.y; \
-      result.x = im/result.y/2.f; \
+        result.imag = - result.imag; \
+      result.real = im/result.imag/2.f; \
     } \
     return result; \
   } \
   \
   TP TPROOT##_exp(TP a) \
   { \
-    REAL_TP expr = exp(a.x); \
+    REAL_TP expr = exp(a.real); \
     REAL_TP cosi; \
-    REAL_TP sini = sincos(a.y, &cosi); \
-    return (TP)(expr * cosi, expr * sini); \
+    REAL_TP sini = sincos(a.imag, &cosi); \
+    return TPROOT##_new(expr * cosi, expr * sini); \
   } \
   \
   TP TPROOT##_log(TP a) \
-  { return (TP)(log(hypot(a.x, a.y)), atan2(a.y, a.x)); } \
+  { return TPROOT##_new(log(hypot(a.real, a.imag)), atan2(a.imag, a.real)); } \
   \
   TP TPROOT##_sin(TP a) \
   { \
     REAL_TP cosr; \
-    REAL_TP sinr = sincos(a.x, &cosr); \
-    return (TP)(sinr*cosh(a.y), cosr*sinh(a.y)); \
+    REAL_TP sinr = sincos(a.real, &cosr); \
+    return TPROOT##_new(sinr*cosh(a.imag), cosr*sinh(a.imag)); \
   } \
   \
   TP TPROOT##_cos(TP a) \
   { \
     REAL_TP cosr; \
-    REAL_TP sinr = sincos(a.x, &cosr); \
-    return (TP)(cosr*cosh(a.y), -sinr*sinh(a.y)); \
+    REAL_TP sinr = sincos(a.real, &cosr); \
+    return TPROOT##_new(cosr*cosh(a.imag), -sinr*sinh(a.imag)); \
   } \
   \
   TP TPROOT##_tan(TP a) \
   { \
-    REAL_TP re2 = 2.f * a.x; \
-    REAL_TP im2 = 2.f * a.y; \
+    REAL_TP re2 = 2.f * a.real; \
+    REAL_TP im2 = 2.f * a.imag; \
     \
     const REAL_TP limit = log(REAL_3LTR##_MAX); \
     \
     if (fabs(im2) > limit) \
-      return (TP)(0.f, (im2 > 0 ? 1.f : -1.f)); \
+      return TPROOT##_new(0.f, (im2 > 0 ? 1.f : -1.f)); \
     else \
     { \
       REAL_TP den = cos(re2) + cosh(im2); \
-      return (TP) (sin(re2) / den, sinh(im2) / den); \
+      return TPROOT##_new(sin(re2) / den, sinh(im2) / den); \
     } \
   } \
   \
   TP TPROOT##_sinh(TP a) \
   { \
     REAL_TP cosi; \
-    REAL_TP sini = sincos(a.y, &cosi); \
-    return (TP)(sinh(a.x)*cosi, cosh(a.x)*sini); \
+    REAL_TP sini = sincos(a.imag, &cosi); \
+    return TPROOT##_new(sinh(a.real)*cosi, cosh(a.real)*sini); \
   } \
   \
   TP TPROOT##_cosh(TP a) \
   { \
     REAL_TP cosi; \
-    REAL_TP sini = sincos(a.y, &cosi); \
-    return (TP)(cosh(a.x)*cosi, sinh(a.x)*sini); \
+    REAL_TP sini = sincos(a.imag, &cosi); \
+    return TPROOT##_new(cosh(a.real)*cosi, sinh(a.real)*sini); \
   } \
   \
   TP TPROOT##_tanh(TP a) \
   { \
-    REAL_TP re2 = 2.f * a.x; \
-    REAL_TP im2 = 2.f * a.y; \
+    REAL_TP re2 = 2.f * a.real; \
+    REAL_TP im2 = 2.f * a.imag; \
     \
     const REAL_TP limit = log(REAL_3LTR##_MAX); \
     \
     if (fabs(re2) > limit) \
-      return (TP)((re2 > 0 ? 1.f : -1.f), 0.f); \
+      return TPROOT##_new((re2 > 0 ? 1.f : -1.f), 0.f); \
     else \
     { \
       REAL_TP den = cosh(re2) + cos(im2); \
-      return (TP) (sinh(re2) / den, sin(im2) / den); \
+      return TPROOT##_new(sinh(re2) / den, sin(im2) / den); \
     } \
   } \
 
 #define PYOPENCL_DECLARE_COMPLEX_TYPE(BASE, BASE_3LTR) \
-  typedef BASE##2 c##BASE##_t; \
+  typedef union \
+  { \
+    struct { BASE x, y; }; \
+    struct { BASE real, imag; }; \
+  } c##BASE##_t; \
   \
   PYOPENCL_DECLARE_COMPLEX_TYPE_INT(BASE, BASE_3LTR, c##BASE, c##BASE##_t)
 
 PYOPENCL_DECLARE_COMPLEX_TYPE(float, FLT);
-#define cfloat_cast(a) ((cfloat_t) ((a).x, (a).y))
+#define cfloat_cast(a) cfloat_new((a).real, (a).imag)
 
 #ifdef PYOPENCL_DEFINE_CDOUBLE
 PYOPENCL_DECLARE_COMPLEX_TYPE(double, DBL);
-#define cdouble_cast(a) ((cdouble_t) ((a).x, (a).y))
+#define cdouble_cast(a) cdouble_new((a).real, (a).imag)
 #endif
diff --git a/pyopencl/cl/pyopencl-hankel-complex.cl b/pyopencl/cl/pyopencl-hankel-complex.cl
new file mode 100644
index 0000000000000000000000000000000000000000..3115b8de42ae1ccaf608d89f4685365657123a98
--- /dev/null
+++ b/pyopencl/cl/pyopencl-hankel-complex.cl
@@ -0,0 +1,444 @@
+/*
+Evaluate Hankel function of first kind of order 0 and 1 for argument z
+anywhere in the complex plane.
+
+Copyright (C) Vladimir Rokhlin
+Copyright (C) 2010-2012 Leslie Greengard and Zydrunas Gimbutas
+Copyright (C) 2015 Andreas Kloeckner
+
+Auto-translated from
+https://github.com/zgimbutas/fmmlib2d/blob/master/src/hank103.f
+using
+https://github.com/pyopencl/pyopencl/tree/master/contrib/fortran-to-opencl
+
+Originally licensed under GPL, permission to license under MIT granted via email
+by Vladimir Rokhlin on May 25, 2015 and by Zydrunas Gimbutas on May 17, 2015.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+void hank103(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon);
+void hank103u(cdouble_t z, int *ier, cdouble_t *h0, cdouble_t *h1, int ifexpon);
+void hank103p(__constant cdouble_t *p, int m, cdouble_t z, cdouble_t *f);
+void hank103a(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon);
+void hank103l(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon);
+void hank103r(cdouble_t z, int *ier, cdouble_t *h0, cdouble_t *h1, int ifexpon);
+
+/*
+ * this subroutine evaluates the hankel functions H_0^1, H_1^1
+ * for an arbitrary user-specified complex number z. The user
+ * also has the option of evaluating the functions h0, h1
+ * scaled by the (complex) coefficient e^{-i \cdot z}. This
+ * subroutine is a modification of the subroutine hank102
+ * (see), different from the latter by having the parameter
+ * ifexpon. Please note that the subroutine hank102 is in
+ * turn a slightly accelerated version of the old hank101
+ * (see). The principal claim to fame of all three is that
+ * they are valid on the whole  complex plane, and are
+ * reasonably accurate (14-digit relative accuracy) and
+ * reasonably fast. Also, please note that all three have not
+ * been carefully tested in the third quadrant (both x and y
+ * negative); some sort of numerical trouble is possible
+ * (though has not been observed) for LARGE z in the third
+ * quadrant.
+ *
+ * ifexpon = 1 will cause the subroutine to evaluate the Hankel functions
+ *     honestly
+ * ifexpon = 0 will cause the subroutine to scale the Hankel functions
+ *     by e^{-i \cdot z}.
+ */
+
+void hankel_01_complex(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon)
+{
+  cdouble_t cclog;
+  cdouble_t cd;
+  cdouble_t fj0;
+  cdouble_t fj1;
+  cdouble_t h0r;
+  cdouble_t h0u;
+  cdouble_t h1r;
+  cdouble_t h1u;
+  double half_;
+  int ier;
+  cdouble_t ima = cdouble_new(0.0e0, 1.0e0);
+  double pi = 0.31415926535897932e+01;
+  cdouble_t ser2;
+  cdouble_t ser3;
+  double subt;
+  cdouble_t y0;
+  cdouble_t y1;
+  cdouble_t z2;
+  cdouble_t zr;
+  cdouble_t zu;
+  if (cdouble_imag(z) < 0)
+    goto label_1400;
+
+  hank103u(z, & ier, & (* h0), & (* h1), ifexpon);
+  return;
+  label_1400:
+  ;
+
+  if (cdouble_real(z) < 0)
+    goto label_2000;
+
+  hank103r(z, & ier, & (* h0), & (* h1), ifexpon);
+  return;
+  label_2000:
+  ;
+
+  zu = cdouble_conj(z);
+  zr = cdouble_rmul(- 1, zu);
+  hank103u(zu, & ier, & h0u, & h1u, ifexpon);
+  hank103r(zr, & ier, & h0r, & h1r, ifexpon);
+  if (ifexpon == 1)
+    goto label_3000;
+
+  subt = fabs(cdouble_imag(zu));
+  cd = cdouble_exp(cdouble_add(cdouble_fromreal((- 1) * subt), cdouble_mul(ima, zu)));
+  h0u = cdouble_mul(h0u, cd);
+  h1u = cdouble_mul(h1u, cd);
+  cd = cdouble_exp(cdouble_add(cdouble_fromreal((- 1) * subt), cdouble_mul(ima, zr)));
+  h0r = cdouble_mul(h0r, cd);
+  h1r = cdouble_mul(h1r, cd);
+  label_3000:
+  ;
+
+  half_ = 1;
+  half_ = half_ / 2;
+  y0 = cdouble_divide(cdouble_rmul(half_, cdouble_add(h0u, h0r)), ima);
+  fj0 = cdouble_rmul((- 1) * half_, cdouble_add(h0u, cdouble_rmul(- 1, h0r)));
+  y1 = cdouble_divide(cdouble_rmul((- 1) * half_, cdouble_add(h1u, cdouble_rmul(- 1, h1r))), ima);
+  fj1 = cdouble_rmul(half_, cdouble_add(h1u, h1r));
+  z2 = cdouble_rmul(- 1, cdouble_conj(z));
+  cclog = cdouble_log(z2);
+  ser2 = cdouble_add(y0, cdouble_rmul(- 1, cdouble_mul(cdouble_divider(cdouble_rmul(2, fj0), pi), cclog)));
+  ser3 = cdouble_add(y1, cdouble_rmul(- 1, cdouble_mul(cdouble_divider(cdouble_rmul(2, fj1), pi), cclog)));
+  fj0 = cdouble_conj(fj0);
+  fj1 = cdouble_rmul(- 1, cdouble_conj(fj1));
+  ser2 = cdouble_conj(ser2);
+  ser3 = cdouble_rmul(- 1, cdouble_conj(ser3));
+  cclog = cdouble_log(z);
+  y0 = cdouble_add(ser2, cdouble_mul(cdouble_divider(cdouble_rmul(2, fj0), pi), cclog));
+  y1 = cdouble_add(ser3, cdouble_mul(cdouble_divider(cdouble_rmul(2, fj1), pi), cclog));
+  * h0 = cdouble_add(fj0, cdouble_mul(ima, y0));
+  * h1 = cdouble_add(fj1, cdouble_mul(ima, y1));
+  if (ifexpon == 1)
+    return;
+
+  cd = cdouble_exp(cdouble_add(cdouble_fromreal(subt), cdouble_rmul(- 1, cdouble_mul(ima, z))));
+  * h0 = cdouble_mul(* h0, cd);
+  * h1 = cdouble_mul(* h1, cd);
+}
+
+__constant double hank103u_c0p1[] = {(- 1) * 0.6619836118357782e-12, (- 1) * 0.6619836118612709e-12, (- 1) * 0.7307514264754200e-21, 0.3928160926261892e-10, 0.5712712520172854e-09, (- 1) * 0.5712712519967086e-09, (- 1) * 0.1083820384008718e-07, (- 1) * 0.1894529309455499e-18, 0.7528123700585197e-07, 0.7528123700841491e-07, 0.1356544045548053e-16, (- 1) * 0.8147940452202855e-06, (- 1) * 0.3568198575016769e-05, 0.3568198574899888e-05, 0.2592083111345422e-04, 0.4209074870019400e-15, (- 1) * 0.7935843289157352e-04, (- 1) * 0.7935843289415642e-04, (- 1) * 0.6848330800445365e-14, 0.4136028298630129e-03, 0.9210433149997867e-03, (- 1) * 0.9210433149680665e-03, (- 1) * 0.3495306809056563e-02, (- 1) * 0.6469844672213905e-13, 0.5573890502766937e-02, 0.5573890503000873e-02, 0.3767341857978150e-12, (- 1) * 0.1439178509436339e-01, (- 1) * 0.1342403524448708e-01, 0.1342403524340215e-01, 0.8733016209933828e-02, 0.1400653553627576e-11, 0.2987361261932706e-01, 0.2987361261607835e-01, (- 1) * 0.3388096836339433e-11, (- 1) * 0.1690673895793793e+00, 0.2838366762606121e+00, (- 1) * 0.2838366762542546e+00, 0.7045107746587499e+00, (- 1) * 0.5363893133864181e-11, (- 1) * 0.7788044738211666e+00, (- 1) * 0.7788044738130360e+00, 0.5524779104964783e-11, 0.1146003459721775e+01, 0.6930697486173089e+00, (- 1) * 0.6930697486240221e+00, (- 1) * 0.7218270272305891e+00, 0.3633022466839301e-11, 0.3280924142354455e+00, 0.3280924142319602e+00, (- 1) * 0.1472323059106612e-11, (- 1) * 0.2608421334424268e+00, (- 1) * 0.9031397649230536e-01, 0.9031397649339185e-01, 0.5401342784296321e-01, (- 1) * 0.3464095071668884e-12, (- 1) * 0.1377057052946721e-01, (- 1) * 0.1377057052927901e-01, 0.4273263742980154e-13, 0.5877224130705015e-02, 0.1022508471962664e-02, (- 1) * 0.1022508471978459e-02, (- 1) * 0.2789107903871137e-03, 0.2283984571396129e-14, 0.2799719727019427e-04, 0.2799719726970900e-04, (- 1) * 0.3371218242141487e-16, (- 1) * 0.3682310515545645e-05, (- 1) * 0.1191412910090512e-06, 0.1191412910113518e-06};
+__constant double hank103u_c0p2[] = {0.5641895835516786e+00, (- 1) * 0.5641895835516010e+00, (- 1) * 0.3902447089770041e-09, (- 1) * 0.3334441074447365e-11, (- 1) * 0.7052368835911731e-01, (- 1) * 0.7052368821797083e-01, 0.1957299315085370e-08, (- 1) * 0.3126801711815631e-06, (- 1) * 0.3967331737107949e-01, 0.3967327747706934e-01, 0.6902866639752817e-04, 0.3178420816292497e-06, 0.4080457166061280e-01, 0.4080045784614144e-01, (- 1) * 0.2218731025620065e-04, 0.6518438331871517e-02, 0.9798339748600499e-01, (- 1) * 0.9778028374972253e-01, (- 1) * 0.3151825524811773e+00, (- 1) * 0.7995603166188139e-03, 0.1111323666639636e+01, 0.1116791178994330e+01, 0.1635711249533488e-01, (- 1) * 0.8527067497983841e+01, (- 1) * 0.2595553689471247e+02, 0.2586942834408207e+02, 0.1345583522428299e+03, 0.2002017907999571e+00, (- 1) * 0.3086364384881525e+03, (- 1) * 0.3094609382885628e+03, (- 1) * 0.1505974589617013e+01, 0.1250150715797207e+04, 0.2205210257679573e+04, (- 1) * 0.2200328091885836e+04, (- 1) * 0.6724941072552172e+04, (- 1) * 0.7018887749450317e+01, 0.8873498980910335e+04, 0.8891369384353965e+04, 0.2008805099643591e+02, (- 1) * 0.2030681426035686e+05, (- 1) * 0.2010017782384992e+05, 0.2006046282661137e+05, 0.3427941581102808e+05, 0.3432892927181724e+02, (- 1) * 0.2511417407338804e+05, (- 1) * 0.2516567363193558e+05, (- 1) * 0.3318253740485142e+02, 0.3143940826027085e+05, 0.1658466564673543e+05, (- 1) * 0.1654843151976437e+05, (- 1) * 0.1446345041326510e+05, (- 1) * 0.1645433213663233e+02, 0.5094709396573681e+04, 0.5106816671258367e+04, 0.3470692471612145e+01, (- 1) * 0.2797902324245621e+04, (- 1) * 0.5615581955514127e+03, 0.5601021281020627e+03, 0.1463856702925587e+03, 0.1990076422327786e+00, (- 1) * 0.9334741618922085e+01, (- 1) * 0.9361368967669095e+01};
+__constant double hank103u_c1p1[] = {0.4428361927253983e-12, (- 1) * 0.4428361927153559e-12, (- 1) * 0.2575693161635231e-10, (- 1) * 0.2878656317479645e-21, 0.3658696304107867e-09, 0.3658696304188925e-09, 0.7463138750413651e-19, (- 1) * 0.6748894854135266e-08, (- 1) * 0.4530098210372099e-07, 0.4530098210271137e-07, 0.4698787882823243e-06, 0.5343848349451927e-17, (- 1) * 0.1948662942158171e-05, (- 1) * 0.1948662942204214e-05, (- 1) * 0.1658085463182409e-15, 0.1316906100496570e-04, 0.3645368564036497e-04, (- 1) * 0.3645368563934748e-04, (- 1) * 0.1633458547818390e-03, (- 1) * 0.2697770638600506e-14, 0.2816784976551660e-03, 0.2816784976676616e-03, 0.2548673351180060e-13, (- 1) * 0.6106478245116582e-03, 0.2054057459296899e-03, (- 1) * 0.2054057460218446e-03, (- 1) * 0.6254962367291260e-02, 0.1484073406594994e-12, 0.1952900562500057e-01, 0.1952900562457318e-01, (- 1) * 0.5517611343746895e-12, (- 1) * 0.8528074392467523e-01, (- 1) * 0.1495138141086974e+00, 0.1495138141099772e+00, 0.4394907314508377e+00, (- 1) * 0.1334677126491326e-11, (- 1) * 0.1113740586940341e+01, (- 1) * 0.1113740586937837e+01, 0.2113005088866033e-11, 0.1170212831401968e+01, 0.1262152242318805e+01, (- 1) * 0.1262152242322008e+01, (- 1) * 0.1557810619605511e+01, 0.2176383208521897e-11, 0.8560741701626648e+00, 0.8560741701600203e+00, (- 1) * 0.1431161194996653e-11, (- 1) * 0.8386735092525187e+00, (- 1) * 0.3651819176599290e+00, 0.3651819176613019e+00, 0.2811692367666517e+00, (- 1) * 0.5799941348040361e-12, (- 1) * 0.9494630182937280e-01, (- 1) * 0.9494630182894480e-01, 0.1364615527772751e-12, 0.5564896498129176e-01, 0.1395239688792536e-01, (- 1) * 0.1395239688799950e-01, (- 1) * 0.5871314703753967e-02, 0.1683372473682212e-13, 0.1009157100083457e-02, 0.1009157100077235e-02, (- 1) * 0.8997331160162008e-15, (- 1) * 0.2723724213360371e-03, (- 1) * 0.2708696587599713e-04, 0.2708696587618830e-04, 0.3533092798326666e-05, (- 1) * 0.1328028586935163e-16, (- 1) * 0.1134616446885126e-06, (- 1) * 0.1134616446876064e-06};
+__constant double hank103u_c1p2[] = {(- 1) * 0.5641895835446003e+00, (- 1) * 0.5641895835437973e+00, 0.3473016376419171e-10, (- 1) * 0.3710264617214559e-09, 0.2115710836381847e+00, (- 1) * 0.2115710851180242e+00, 0.3132928887334847e-06, 0.2064187785625558e-07, (- 1) * 0.6611954881267806e-01, (- 1) * 0.6611997176900310e-01, (- 1) * 0.3386004893181560e-05, 0.7146557892862998e-04, (- 1) * 0.5728505088320786e-01, 0.5732906930408979e-01, (- 1) * 0.6884187195973806e-02, (- 1) * 0.2383737409286457e-03, 0.1170452203794729e+00, 0.1192356405185651e+00, 0.8652871239920498e-02, (- 1) * 0.3366165876561572e+00, (- 1) * 0.1203989383538728e+01, 0.1144625888281483e+01, 0.9153684260534125e+01, 0.1781426600949249e+00, (- 1) * 0.2740411284066946e+02, (- 1) * 0.2834461441294877e+02, (- 1) * 0.2192611071606340e+01, 0.1445470231392735e+03, 0.3361116314072906e+03, (- 1) * 0.3270584743216529e+03, (- 1) * 0.1339254798224146e+04, (- 1) * 0.1657618537130453e+02, 0.2327097844591252e+04, 0.2380960024514808e+04, 0.7760611776965994e+02, (- 1) * 0.7162513471480693e+04, (- 1) * 0.9520608696419367e+04, 0.9322604506839242e+04, 0.2144033447577134e+05, 0.2230232555182369e+03, (- 1) * 0.2087584364240919e+05, (- 1) * 0.2131762020653283e+05, (- 1) * 0.3825699231499171e+03, 0.3582976792594737e+05, 0.2642632405857713e+05, (- 1) * 0.2585137938787267e+05, (- 1) * 0.3251446505037506e+05, (- 1) * 0.3710875194432116e+03, 0.1683805377643986e+05, 0.1724393921722052e+05, 0.1846128226280221e+03, (- 1) * 0.1479735877145448e+05, (- 1) * 0.5258288893282565e+04, 0.5122237462705988e+04, 0.2831540486197358e+04, 0.3905972651440027e+02, (- 1) * 0.5562781548969544e+03, (- 1) * 0.5726891190727206e+03, (- 1) * 0.2246192560136119e+01, 0.1465347141877978e+03, 0.9456733342595993e+01, (- 1) * 0.9155767836700837e+01};
+void hank103u(cdouble_t z, int *ier, cdouble_t *h0, cdouble_t *h1, int ifexpon)
+{
+
+
+
+
+  cdouble_t ccex;
+  cdouble_t cd;
+  double com;
+  double d;
+  double done;
+  cdouble_t ima = cdouble_new(0.0e0, 1.0e0);
+  int m;
+  double thresh1;
+  double thresh2;
+  double thresh3;
+  cdouble_t zzz9;
+  * ier = 0;
+  com = cdouble_real(z);
+  if (cdouble_imag(z) >= 0)
+    goto label_1200;
+
+  * ier = 4;
+  return;
+  label_1200:
+  ;
+
+  done = 1;
+  thresh1 = 1;
+  thresh2 = 3.7 * 3.7;
+  thresh3 = 400;
+  d = cdouble_real(cdouble_mul(z, cdouble_conj(z)));
+  if ((d < thresh1) || (d > thresh3))
+    goto label_3000;
+
+  if (d > thresh2)
+    goto label_2000;
+
+  cd = cdouble_rdivide(done, cdouble_sqrt(z));
+  ccex = cd;
+  if (ifexpon == 1)
+    ccex = cdouble_mul(ccex, cdouble_exp(cdouble_mul(ima, z)));
+
+  zzz9 = cdouble_powr(z, 9);
+  m = 35;
+  hank103p((__constant cdouble_t *) (& (* hank103u_c0p1)), m, cd, & (* h0));
+  * h0 = cdouble_mul(cdouble_mul(* h0, ccex), zzz9);
+  hank103p((__constant cdouble_t *) (& (* hank103u_c1p1)), m, cd, & (* h1));
+  * h1 = cdouble_mul(cdouble_mul(* h1, ccex), zzz9);
+  return;
+  label_2000:
+  ;
+
+  cd = cdouble_rdivide(done, cdouble_sqrt(z));
+  ccex = cd;
+  if (ifexpon == 1)
+    ccex = cdouble_mul(ccex, cdouble_exp(cdouble_mul(ima, z)));
+
+  m = 31;
+  hank103p((__constant cdouble_t *) (& (* hank103u_c0p2)), m, cd, & (* h0));
+  * h0 = cdouble_mul(* h0, ccex);
+  m = 31;
+  hank103p((__constant cdouble_t *) (& (* hank103u_c1p2)), m, cd, & (* h1));
+  * h1 = cdouble_mul(* h1, ccex);
+  return;
+  label_3000:
+  ;
+
+  if (d > 50.e0)
+    goto label_4000;
+
+  hank103l(z, & (* h0), & (* h1), ifexpon);
+  return;
+  label_4000:
+  ;
+
+  hank103a(z, & (* h0), & (* h1), ifexpon);
+}
+
+void hank103p(__constant cdouble_t *p, int m, cdouble_t z, cdouble_t *f)
+{
+  int i;
+
+  * f = p[m - 1];
+  for (i = m + (- 1); i >= 1; i += - 1)
+  {
+    * f = cdouble_add(cdouble_mul(* f, z), p[i - 1]);
+    label_1200:
+    ;
+
+  }
+
+}
+
+__constant double hank103a_p[] = {0.1000000000000000e+01, (- 1) * 0.7031250000000000e-01, 0.1121520996093750e+00, (- 1) * 0.5725014209747314e+00, 0.6074042001273483e+01, (- 1) * 0.1100171402692467e+03, 0.3038090510922384e+04, (- 1) * 0.1188384262567833e+06, 0.6252951493434797e+07, (- 1) * 0.4259392165047669e+09, 0.3646840080706556e+11, (- 1) * 0.3833534661393944e+13, 0.4854014686852901e+15, (- 1) * 0.7286857349377657e+17, 0.1279721941975975e+20, (- 1) * 0.2599382102726235e+22, 0.6046711487532401e+24, (- 1) * 0.1597065525294211e+27};
+__constant double hank103a_p1[] = {0.1000000000000000e+01, 0.1171875000000000e+00, (- 1) * 0.1441955566406250e+00, 0.6765925884246826e+00, (- 1) * 0.6883914268109947e+01, 0.1215978918765359e+03, (- 1) * 0.3302272294480852e+04, 0.1276412726461746e+06, (- 1) * 0.6656367718817687e+07, 0.4502786003050393e+09, (- 1) * 0.3833857520742789e+11, 0.4011838599133198e+13, (- 1) * 0.5060568503314726e+15, 0.7572616461117957e+17, (- 1) * 0.1326257285320556e+20, 0.2687496750276277e+22, (- 1) * 0.6238670582374700e+24, 0.1644739123064188e+27};
+__constant double hank103a_q[] = {(- 1) * 0.1250000000000000e+00, 0.7324218750000000e-01, (- 1) * 0.2271080017089844e+00, 0.1727727502584457e+01, (- 1) * 0.2438052969955606e+02, 0.5513358961220206e+03, (- 1) * 0.1825775547429317e+05, 0.8328593040162893e+06, (- 1) * 0.5006958953198893e+08, 0.3836255180230434e+10, (- 1) * 0.3649010818849834e+12, 0.4218971570284096e+14, (- 1) * 0.5827244631566907e+16, 0.9476288099260110e+18, (- 1) * 0.1792162323051699e+21, 0.3900121292034000e+23, (- 1) * 0.9677028801069847e+25, 0.2715581773544907e+28};
+__constant double hank103a_q1[] = {0.3750000000000000e+00, (- 1) * 0.1025390625000000e+00, 0.2775764465332031e+00, (- 1) * 0.1993531733751297e+01, 0.2724882731126854e+02, (- 1) * 0.6038440767050702e+03, 0.1971837591223663e+05, (- 1) * 0.8902978767070679e+06, 0.5310411010968522e+08, (- 1) * 0.4043620325107754e+10, 0.3827011346598606e+12, (- 1) * 0.4406481417852279e+14, 0.6065091351222699e+16, (- 1) * 0.9833883876590680e+18, 0.1855045211579829e+21, (- 1) * 0.4027994121281017e+23, 0.9974783533410457e+25, (- 1) * 0.2794294288720121e+28};
+void hank103a(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon)
+{
+  cdouble_t cccexp;
+  cdouble_t cdd;
+  cdouble_t cdumb = cdouble_new(0.70710678118654757e+00, (- 1) * 0.70710678118654746e+00);
+  double done = 1.0e0;
+  int i;
+  cdouble_t ima = cdouble_new(0.0e0, 1.0e0);
+  int m;
+
+
+  double pi = 0.31415926535897932e+01;
+  cdouble_t pp;
+  cdouble_t pp1;
+
+
+  cdouble_t qq;
+  cdouble_t qq1;
+  cdouble_t zinv;
+  cdouble_t zinv22;
+  m = 10;
+  zinv = cdouble_rdivide(done, z);
+  pp = cdouble_fromreal(hank103a_p[m - 1]);
+  pp1 = cdouble_fromreal(hank103a_p1[m - 1]);
+  zinv22 = cdouble_mul(zinv, zinv);
+  qq = cdouble_fromreal(hank103a_q[m - 1]);
+  qq1 = cdouble_fromreal(hank103a_q1[m - 1]);
+  for (i = m + (- 1); i >= 1; i += - 1)
+  {
+    pp = cdouble_add(cdouble_fromreal(hank103a_p[i - 1]), cdouble_mul(pp, zinv22));
+    pp1 = cdouble_add(cdouble_fromreal(hank103a_p1[i - 1]), cdouble_mul(pp1, zinv22));
+    qq = cdouble_add(cdouble_fromreal(hank103a_q[i - 1]), cdouble_mul(qq, zinv22));
+    qq1 = cdouble_add(cdouble_fromreal(hank103a_q1[i - 1]), cdouble_mul(qq1, zinv22));
+    label_1600:
+    ;
+
+  }
+
+  qq = cdouble_mul(qq, zinv);
+  qq1 = cdouble_mul(qq1, zinv);
+  cccexp = cdouble_fromreal(1);
+  if (ifexpon == 1)
+    cccexp = cdouble_exp(cdouble_mul(ima, z));
+
+  cdd = cdouble_sqrt(cdouble_rmul(2 / pi, zinv));
+  * h0 = cdouble_add(pp, cdouble_mul(ima, qq));
+  * h0 = cdouble_mul(cdouble_mul(cdouble_mul(cdd, cdumb), cccexp), * h0);
+  * h1 = cdouble_add(pp1, cdouble_mul(ima, qq1));
+  * h1 = cdouble_rmul(- 1, cdouble_mul(cdouble_mul(cdouble_mul(cdouble_mul(cdd, cccexp), cdumb), * h1), ima));
+}
+
+__constant double hank103l_cj0[] = {0.1000000000000000e+01, (- 1) * 0.2500000000000000e+00, 0.1562500000000000e-01, (- 1) * 0.4340277777777778e-03, 0.6781684027777778e-05, (- 1) * 0.6781684027777778e-07, 0.4709502797067901e-09, (- 1) * 0.2402807549524439e-11, 0.9385966990329841e-14, (- 1) * 0.2896903392077112e-16, 0.7242258480192779e-19, (- 1) * 0.1496334396734045e-21, 0.2597802772107717e-24, (- 1) * 0.3842903509035085e-27, 0.4901662639075363e-30, (- 1) * 0.5446291821194848e-33};
+__constant double hank103l_cj1[] = {(- 1) * 0.5000000000000000e+00, 0.6250000000000000e-01, (- 1) * 0.2604166666666667e-02, 0.5425347222222222e-04, (- 1) * 0.6781684027777778e-06, 0.5651403356481481e-08, (- 1) * 0.3363930569334215e-10, 0.1501754718452775e-12, (- 1) * 0.5214426105738801e-15, 0.1448451696038556e-17, (- 1) * 0.3291935672814899e-20, 0.6234726653058522e-23, (- 1) * 0.9991549123491221e-26, 0.1372465538941102e-28, (- 1) * 0.1633887546358454e-31, 0.1701966194123390e-34};
+__constant double hank103l_ser2[] = {0.2500000000000000e+00, (- 1) * 0.2343750000000000e-01, 0.7957175925925926e-03, (- 1) * 0.1412850839120370e-04, 0.1548484519675926e-06, (- 1) * 0.1153828185281636e-08, 0.6230136717695511e-11, (- 1) * 0.2550971742728932e-13, 0.8195247730999099e-16, (- 1) * 0.2121234517551702e-18, 0.4518746345057852e-21, (- 1) * 0.8061529302289970e-24, 0.1222094716680443e-26, (- 1) * 0.1593806157473552e-29, 0.1807204342667468e-32, (- 1) * 0.1798089518115172e-35};
+__constant double hank103l_ser2der[] = {0.5000000000000000e+00, (- 1) * 0.9375000000000000e-01, 0.4774305555555556e-02, (- 1) * 0.1130280671296296e-03, 0.1548484519675926e-05, (- 1) * 0.1384593822337963e-07, 0.8722191404773715e-10, (- 1) * 0.4081554788366291e-12, 0.1475144591579838e-14, (- 1) * 0.4242469035103405e-17, 0.9941241959127275e-20, (- 1) * 0.1934767032549593e-22, 0.3177446263369152e-25, (- 1) * 0.4462657240925946e-28, 0.5421613028002404e-31, (- 1) * 0.5753886457968550e-34};
+void hank103l(cdouble_t z, cdouble_t *h0, cdouble_t *h1, int ifexpon)
+{
+  cdouble_t cd;
+  cdouble_t cdddlog;
+
+
+  cdouble_t fj0;
+  cdouble_t fj1;
+  double gamma = 0.5772156649015328606e+00;
+  int i;
+  cdouble_t ima = cdouble_new(0.0e0, 1.0e0);
+  int m;
+  double pi = 0.31415926535897932e+01;
+
+
+  double two = 2.0e0;
+  cdouble_t y0;
+  cdouble_t y1;
+  cdouble_t z2;
+  m = 16;
+  fj0 = cdouble_fromreal(0);
+  fj1 = cdouble_fromreal(0);
+  y0 = cdouble_fromreal(0);
+  y1 = cdouble_fromreal(0);
+  z2 = cdouble_mul(z, z);
+  cd = cdouble_fromreal(1);
+  for (i = 1; i <= m; i += 1)
+  {
+    fj0 = cdouble_add(fj0, cdouble_rmul(hank103l_cj0[i - 1], cd));
+    fj1 = cdouble_add(fj1, cdouble_rmul(hank103l_cj1[i - 1], cd));
+    y1 = cdouble_add(y1, cdouble_rmul(hank103l_ser2der[i - 1], cd));
+    cd = cdouble_mul(cd, z2);
+    y0 = cdouble_add(y0, cdouble_rmul(hank103l_ser2[i - 1], cd));
+    label_1800:
+    ;
+
+  }
+
+  fj1 = cdouble_rmul(- 1, cdouble_mul(fj1, z));
+  cdddlog = cdouble_add(cdouble_fromreal(gamma), cdouble_log(cdouble_divider(z, two)));
+  y0 = cdouble_add(cdouble_mul(cdddlog, fj0), y0);
+  y0 = cdouble_rmul(two / pi, y0);
+  y1 = cdouble_mul(y1, z);
+  y1 = cdouble_add(cdouble_add(cdouble_rmul(- 1, cdouble_mul(cdddlog, fj1)), cdouble_divide(fj0, z)), y1);
+  y1 = cdouble_divider(cdouble_rmul((- 1) * two, y1), pi);
+  * h0 = cdouble_add(fj0, cdouble_mul(ima, y0));
+  * h1 = cdouble_add(fj1, cdouble_mul(ima, y1));
+  if (ifexpon == 1)
+    return;
+
+  cd = cdouble_exp(cdouble_rmul(- 1, cdouble_mul(ima, z)));
+  * h0 = cdouble_mul(* h0, cd);
+  * h1 = cdouble_mul(* h1, cd);
+}
+
+__constant double hank103r_c0p1[] = {(- 1) * 0.4268441995428495e-23, 0.4374027848105921e-23, 0.9876152216238049e-23, (- 1) * 0.1065264808278614e-20, 0.6240598085551175e-19, 0.6658529985490110e-19, (- 1) * 0.5107210870050163e-17, (- 1) * 0.2931746613593983e-18, 0.1611018217758854e-15, (- 1) * 0.1359809022054077e-15, (- 1) * 0.7718746693707326e-15, 0.6759496139812828e-14, (- 1) * 0.1067620915195442e-12, (- 1) * 0.1434699000145826e-12, 0.3868453040754264e-11, 0.7061853392585180e-12, (- 1) * 0.6220133527871203e-10, 0.3957226744337817e-10, 0.3080863675628417e-09, (- 1) * 0.1154618431281900e-08, 0.7793319486868695e-08, 0.1502570745460228e-07, (- 1) * 0.1978090852638430e-06, (- 1) * 0.7396691873499030e-07, 0.2175857247417038e-05, (- 1) * 0.8473534855334919e-06, (- 1) * 0.1053381327609720e-04, 0.2042555121261223e-04, (- 1) * 0.4812568848956982e-04, (- 1) * 0.1961519090873697e-03, 0.1291714391689374e-02, 0.9234422384950050e-03, (- 1) * 0.1113890671502769e-01, 0.9053687375483149e-03, 0.5030666896877862e-01, (- 1) * 0.4923119348218356e-01, 0.5202355973926321e+00, (- 1) * 0.1705244841954454e+00, (- 1) * 0.1134990486611273e+01, (- 1) * 0.1747542851820576e+01, 0.8308174484970718e+01, 0.2952358687641577e+01, (- 1) * 0.3286074510100263e+02, 0.1126542966971545e+02, 0.6576015458463394e+02, (- 1) * 0.1006116996293757e+03, 0.3216834899377392e+02, 0.3614005342307463e+03, (- 1) * 0.6653878500833375e+03, (- 1) * 0.6883582242804924e+03, 0.2193362007156572e+04, 0.2423724600546293e+03, (- 1) * 0.3665925878308203e+04, 0.2474933189642588e+04, 0.1987663383445796e+04, (- 1) * 0.7382586600895061e+04, 0.4991253411017503e+04, 0.1008505017740918e+05, (- 1) * 0.1285284928905621e+05, (- 1) * 0.5153674821668470e+04, 0.1301656757246985e+05, (- 1) * 0.4821250366504323e+04, (- 1) * 0.4982112643422311e+04, 0.9694070195648748e+04, (- 1) * 0.1685723189234701e+04, (- 1) * 0.6065143678129265e+04, 0.2029510635584355e+04, 0.1244402339119502e+04, (- 1) * 0.4336682903961364e+03, 0.8923209875101459e+02};
+__constant double hank103r_c0p2[] = {0.5641895835569398e+00, (- 1) * 0.5641895835321127e+00, (- 1) * 0.7052370223565544e-01, (- 1) * 0.7052369923405479e-01, (- 1) * 0.3966909368581382e-01, 0.3966934297088857e-01, 0.4130698137268744e-01, 0.4136196771522681e-01, 0.6240742346896508e-01, (- 1) * 0.6553556513852438e-01, (- 1) * 0.3258849904760676e-01, (- 1) * 0.7998036854222177e-01, (- 1) * 0.3988006311955270e+01, 0.1327373751674479e+01, 0.6121789346915312e+02, (- 1) * 0.9251865216627577e+02, 0.4247064992018806e+03, 0.2692553333489150e+04, (- 1) * 0.4374691601489926e+05, (- 1) * 0.3625248208112831e+05, 0.1010975818048476e+07, (- 1) * 0.2859360062580096e+05, (- 1) * 0.1138970241206912e+08, 0.1051097979526042e+08, 0.2284038899211195e+08, (- 1) * 0.2038012515235694e+09, 0.1325194353842857e+10, 0.1937443530361381e+10, (- 1) * 0.2245999018652171e+11, (- 1) * 0.5998903865344352e+10, 0.1793237054876609e+12, (- 1) * 0.8625159882306147e+11, (- 1) * 0.5887763042735203e+12, 0.1345331284205280e+13, (- 1) * 0.2743432269370813e+13, (- 1) * 0.8894942160272255e+13, 0.4276463113794564e+14, 0.2665019886647781e+14, (- 1) * 0.2280727423955498e+15, 0.3686908790553973e+14, 0.5639846318168615e+15, (- 1) * 0.6841529051615703e+15, 0.9901426799966038e+14, 0.2798406605978152e+16, (- 1) * 0.4910062244008171e+16, (- 1) * 0.5126937967581805e+16, 0.1387292951936756e+17, 0.1043295727224325e+16, (- 1) * 0.1565204120687265e+17, 0.1215262806973577e+17, 0.3133802397107054e+16, (- 1) * 0.1801394550807078e+17, 0.4427598668012807e+16, 0.6923499968336864e+16};
+__constant double hank103r_c1p1[] = {(- 1) * 0.4019450270734195e-23, (- 1) * 0.4819240943285824e-23, 0.1087220822839791e-20, 0.1219058342725899e-21, (- 1) * 0.7458149572694168e-19, 0.5677825613414602e-19, 0.8351815799518541e-18, (- 1) * 0.5188585543982425e-17, 0.1221075065755962e-15, 0.1789261470637227e-15, (- 1) * 0.6829972121890858e-14, (- 1) * 0.1497462301804588e-14, 0.1579028042950957e-12, (- 1) * 0.9414960303758800e-13, (- 1) * 0.1127570848999746e-11, 0.3883137940932639e-11, (- 1) * 0.3397569083776586e-10, (- 1) * 0.6779059427459179e-10, 0.1149529442506273e-08, 0.4363087909873751e-09, (- 1) * 0.1620182360840298e-07, 0.6404695607668289e-08, 0.9651461037419628e-07, (- 1) * 0.1948572160668177e-06, 0.6397881896749446e-06, 0.2318661930507743e-05, (- 1) * 0.1983192412396578e-04, (- 1) * 0.1294811208715315e-04, 0.2062663873080766e-03, (- 1) * 0.2867633324735777e-04, (- 1) * 0.1084309075952914e-02, 0.1227880935969686e-02, 0.2538406015667726e-03, (- 1) * 0.1153316815955356e-01, 0.4520140008266983e-01, 0.5693944718258218e-01, (- 1) * 0.9640790976658534e+00, (- 1) * 0.6517135574036008e+00, 0.2051491829570049e+01, (- 1) * 0.1124151010077572e+01, (- 1) * 0.3977380460328048e+01, 0.8200665483661009e+01, (- 1) * 0.7950131652215817e+01, (- 1) * 0.3503037697046647e+02, 0.9607320812492044e+02, 0.7894079689858070e+02, (- 1) * 0.3749002890488298e+03, (- 1) * 0.8153831134140778e+01, 0.7824282518763973e+03, (- 1) * 0.6035276543352174e+03, (- 1) * 0.5004685759675768e+03, 0.2219009060854551e+04, (- 1) * 0.2111301101664672e+04, (- 1) * 0.4035632271617418e+04, 0.7319737262526823e+04, 0.2878734389521922e+04, (- 1) * 0.1087404934318719e+05, 0.3945740567322783e+04, 0.6727823761148537e+04, (- 1) * 0.1253555346597302e+05, 0.3440468371829973e+04, 0.1383240926370073e+05, (- 1) * 0.9324927373036743e+04, (- 1) * 0.6181580304530313e+04, 0.6376198146666679e+04, (- 1) * 0.1033615527971958e+04, (- 1) * 0.1497604891055181e+04, 0.1929025541588262e+04, (- 1) * 0.4219760183545219e+02, (- 1) * 0.4521162915353207e+03};
+__constant double hank103r_c1p2[] = {(- 1) * 0.5641895835431980e+00, (- 1) * 0.5641895835508094e+00, 0.2115710934750869e+00, (- 1) * 0.2115710923186134e+00, (- 1) * 0.6611607335011594e-01, (- 1) * 0.6611615414079688e-01, (- 1) * 0.5783289433408652e-01, 0.5785737744023628e-01, 0.8018419623822896e-01, 0.8189816020440689e-01, 0.1821045296781145e+00, (- 1) * 0.2179738973008740e+00, 0.5544705668143094e+00, 0.2224466316444440e+01, (- 1) * 0.8563271248520645e+02, (- 1) * 0.4394325758429441e+02, 0.2720627547071340e+04, (- 1) * 0.6705390850875292e+03, (- 1) * 0.3936221960600770e+05, 0.5791730432605451e+05, (- 1) * 0.1976787738827811e+06, (- 1) * 0.1502498631245144e+07, 0.2155317823990686e+08, 0.1870953796705298e+08, (- 1) * 0.4703995711098311e+09, 0.3716595906453190e+07, 0.5080557859012385e+10, (- 1) * 0.4534199223888966e+10, (- 1) * 0.1064438211647413e+11, 0.8612243893745942e+11, (- 1) * 0.5466017687785078e+12, (- 1) * 0.8070950386640701e+12, 0.9337074941225827e+13, 0.2458379240643264e+13, (- 1) * 0.7548692171244579e+14, 0.3751093169954336e+14, 0.2460677431350039e+15, (- 1) * 0.5991919372881911e+15, 0.1425679408434606e+16, 0.4132221939781502e+16, (- 1) * 0.2247506469468969e+17, (- 1) * 0.1269771078165026e+17, 0.1297336292749026e+18, (- 1) * 0.2802626909791308e+17, (- 1) * 0.3467137222813017e+18, 0.4773955215582192e+18, (- 1) * 0.2347165776580206e+18, (- 1) * 0.2233638097535785e+19, 0.5382350866778548e+19, 0.4820328886922998e+19, (- 1) * 0.1928978948099345e+20, 0.1575498747750907e+18, 0.3049162180215152e+20, (- 1) * 0.2837046201123502e+20, (- 1) * 0.5429391644354291e+19, 0.6974653380104308e+20, (- 1) * 0.5322120857794536e+20, (- 1) * 0.6739879079691706e+20, 0.6780343087166473e+20, 0.1053455984204666e+20, (- 1) * 0.2218784058435737e+20, 0.1505391868530062e+20};
+void hank103r(cdouble_t z, int *ier, cdouble_t *h0, cdouble_t *h1, int ifexpon)
+{
+
+
+
+
+  cdouble_t cccexp;
+  cdouble_t cd;
+  cdouble_t cdd;
+  double d;
+  double done;
+  cdouble_t ima = cdouble_new(0.0e0, 1.0e0);
+  int m;
+  double thresh1;
+  double thresh2;
+  double thresh3;
+  cdouble_t zz18;
+  * ier = 0;
+  if ((cdouble_real(z) >= 0) && (cdouble_imag(z) <= 0))
+    goto label_1400;
+
+  * ier = 4;
+  return;
+  label_1400:
+  ;
+
+  done = 1;
+  thresh1 = 16;
+  thresh2 = 64;
+  thresh3 = 400;
+  d = cdouble_real(cdouble_mul(z, cdouble_conj(z)));
+  if ((d < thresh1) || (d > thresh3))
+    goto label_3000;
+
+  if (d > thresh2)
+    goto label_2000;
+
+  cccexp = cdouble_fromreal(1);
+  if (ifexpon == 1)
+    cccexp = cdouble_exp(cdouble_mul(ima, z));
+
+  cdd = cdouble_rdivide(done, cdouble_sqrt(z));
+  cd = cdouble_rdivide(done, z);
+  zz18 = cdouble_powr(z, 18);
+  m = 35;
+  hank103p((__constant cdouble_t *) (& (* hank103r_c0p1)), m, cd, & (* h0));
+  * h0 = cdouble_mul(cdouble_mul(cdouble_mul(* h0, cdd), cccexp), zz18);
+  hank103p((__constant cdouble_t *) (& (* hank103r_c1p1)), m, cd, & (* h1));
+  * h1 = cdouble_mul(cdouble_mul(cdouble_mul(* h1, cdd), cccexp), zz18);
+  return;
+  label_2000:
+  ;
+
+  cd = cdouble_rdivide(done, z);
+  cdd = cdouble_sqrt(cd);
+  cccexp = cdouble_fromreal(1);
+  if (ifexpon == 1)
+    cccexp = cdouble_exp(cdouble_mul(ima, z));
+
+  m = 27;
+  hank103p((__constant cdouble_t *) (& (* hank103r_c0p2)), m, cd, & (* h0));
+  * h0 = cdouble_mul(cdouble_mul(* h0, cccexp), cdd);
+  m = 31;
+  hank103p((__constant cdouble_t *) (& (* hank103r_c1p2)), m, cd, & (* h1));
+  * h1 = cdouble_mul(cdouble_mul(* h1, cccexp), cdd);
+  return;
+  label_3000:
+  ;
+
+  if (d > 50.e0)
+    goto label_4000;
+
+  hank103l(z, & (* h0), & (* h1), ifexpon);
+  return;
+  label_4000:
+  ;
+
+  hank103a(z, & (* h0), & (* h1), ifexpon);
+}
+
diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py
index 1b41ce67d0ce9b7543c6f76944edf29c1a082ff0..d745741d2091264058d7dc1c29275db110e948e6 100644
--- a/pyopencl/clmath.py
+++ b/pyopencl/clmath.py
@@ -240,13 +240,28 @@ def _bessel_yn(result, n, x):
                                          np.dtype(type(n)), x.dtype)
 
 
+@cl_array.elwise_kernel_runner
+def _hankel_01(h0, h1, x):
+    if h0.dtype != h1.dtype:
+        raise TypeError("types of h0 and h1 must match")
+    return elementwise.get_hankel_01_kernel(
+            h0.context, h0.dtype, x.dtype)
+
+
 def bessel_jn(n, x, queue=None):
     result = x._new_like_me(queue=queue)
-    _bessel_jn(result, n, x)
+    _bessel_jn(result, n, x, queue=queue)
     return result
 
 
 def bessel_yn(n, x, queue=None):
     result = x._new_like_me(queue=queue)
-    _bessel_yn(result, n, x)
+    _bessel_yn(result, n, x, queue=queue)
     return result
+
+
+def hankel_01(x, queue=None):
+    h0 = x._new_like_me(queue=queue)
+    h1 = x._new_like_me(queue=queue)
+    _hankel_01(h0, h1, x, queue=queue)
+    return h0, h1
diff --git a/pyopencl/compyte b/pyopencl/compyte
index 5d54e1b2b7f28d3e779029ac0b4aa5f957829f23..fb6ba114d9d906403d47b0aaf69e2fe4cef382f2 160000
--- a/pyopencl/compyte
+++ b/pyopencl/compyte
@@ -1 +1 @@
-Subproject commit 5d54e1b2b7f28d3e779029ac0b4aa5f957829f23
+Subproject commit fb6ba114d9d906403d47b0aaf69e2fe4cef382f2
diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py
index 398936ff52587b91a48d0febec06a7e172d80019..47fe3d9585b1aebf08cde69624066e5088a081ae 100644
--- a/pyopencl/elementwise.py
+++ b/pyopencl/elementwise.py
@@ -383,7 +383,7 @@ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
 
     args = [
             VectorArg(dtype, "dest%d" % i)
-                for i in range(vec_count)
+            for i in range(vec_count)
             ] + [
                 VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
                 VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
@@ -392,7 +392,7 @@ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
                 for i in range(vec_count)
             ] + [
                 ScalarArg(idx_dtype, "offset%d" % i)
-                    for i in range(vec_count) if with_offsets
+                for i in range(vec_count) if with_offsets
             ]
 
     if with_offsets:
@@ -421,7 +421,7 @@ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
 
     args = [
             VectorArg(dtype, "dest%d" % i, with_offset=True)
-                for i in range(vec_count)
+            for i in range(vec_count)
             ] + [
                 VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
             ] + [
@@ -525,7 +525,6 @@ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z):
 
     x_is_complex = dtype_x.kind == "c"
     y_is_complex = dtype_y.kind == "c"
-    z_is_complex = dtype_z.kind == "c"
 
     if x_is_complex:
         ax = "%s_mul(a, x[i])" % complex_dtype_to_name(dtype_x)
@@ -539,9 +538,15 @@ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z):
     if not x_is_complex and y_is_complex:
         ax = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_y), ax)
 
-    result = "%s + %s" % (ax, by)
-    if z_is_complex:
-        result = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), result)
+    if x_is_complex or y_is_complex:
+        result = (
+                "{root}_add({root}_cast({ax}), {root}_cast({by}))"
+                .format(
+                    ax=ax,
+                    by=by,
+                    root=complex_dtype_to_name(dtype_z)))
+    else:
+        result = "%s + %s" % (ax, by)
 
     return get_elwise_kernel(context,
             "%(tp_z)s *z, %(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y" % {
@@ -562,24 +567,27 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
     z_is_complex = dtype_z.kind == "c"
 
     ax = "a*x[i]"
-    if a_is_complex and x_is_complex:
+    if x_is_complex:
         a = "a"
         x = "x[i]"
 
-        if dtype_a != dtype_z:
-            a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a)
         if dtype_x != dtype_z:
             x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x)
 
-        ax = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+        if a_is_complex:
+            if dtype_a != dtype_z:
+                a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a)
 
-    # The following two are workarounds for Apple on OS X 10.8.
-    # They're not really necessary.
+            ax = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+        else:
+            ax = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
+    elif a_is_complex:
+        a = "a"
+        x = "x[i]"
 
-    elif a_is_complex and not x_is_complex:
-        ax = "a*((%s) x[i])" % dtype_to_ctype(real_dtype(dtype_a))
-    elif not a_is_complex and x_is_complex:
-        ax = "((%s) a)*x[i]" % dtype_to_ctype(real_dtype(dtype_x))
+        if dtype_a != dtype_z:
+            a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a)
+        ax = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x)
 
     b = "b"
     if z_is_complex and not b_is_complex:
@@ -592,6 +600,14 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
         ax = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), ax)
         b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), b)
 
+    if a_is_complex or x_is_complex or b_is_complex:
+        expr = "{root}_add({ax}, {b})".format(
+                ax=ax,
+                b=b,
+                root=complex_dtype_to_name(dtype_z))
+    else:
+        expr = "%s + %s" % (ax, b)
+
     return get_elwise_kernel(context,
             "%(tp_z)s *z, %(tp_a)s a, %(tp_x)s *x,%(tp_b)s b" % {
                 "tp_a": dtype_to_ctype(dtype_a),
@@ -599,7 +615,7 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
                 "tp_b": dtype_to_ctype(dtype_b),
                 "tp_z": dtype_to_ctype(dtype_z),
                 },
-            "z[i] = %s + %s" % (ax, b),
+            "z[i] = " + expr,
             name="axpb")
 
 
@@ -607,7 +623,6 @@ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
 def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z):
     x_is_complex = dtype_x.kind == "c"
     y_is_complex = dtype_y.kind == "c"
-    z_is_complex = dtype_z.kind == "c"
 
     x = "x[i]"
     y = "y[i]"
@@ -619,13 +634,13 @@ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z):
 
     if x_is_complex and y_is_complex:
         xy = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
-
+    elif x_is_complex and not y_is_complex:
+        xy = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+    elif not x_is_complex and y_is_complex:
+        xy = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
     else:
         xy = "%s * %s" % (x, y)
 
-    if z_is_complex:
-        xy = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), xy)
-
     return get_elwise_kernel(context,
             "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % {
                 "tp_x": dtype_to_ctype(dtype_x),
@@ -654,8 +669,9 @@ def get_divide_kernel(context, dtype_x, dtype_y, dtype_z):
     if x_is_complex and y_is_complex:
         xoy = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
     elif not x_is_complex and y_is_complex:
-
         xoy = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
+    elif x_is_complex and not y_is_complex:
+        xoy = "%s_divider(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y)
     else:
         xoy = "%s / %s" % (x, y)
 
@@ -691,7 +707,9 @@ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
     if x_is_complex and y_is_complex:
         yox = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
     elif not y_is_complex and x_is_complex:
-        yox = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_x), y, x)
+        yox = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
+    elif y_is_complex and not x_is_complex:
+        yox = "%s_divider(%s, %s)" % (complex_dtype_to_name(dtype_z), y, x)
     else:
         yox = "%s / %s" % (y, x)
 
@@ -728,16 +746,18 @@ def get_reverse_kernel(context, dtype):
 @context_dependent_memoize
 def get_arange_kernel(context, dtype):
     if dtype.kind == "c":
-        i = "%s_fromreal(i)" % complex_dtype_to_name(dtype)
+        expr = (
+                "{root}_add(start, {root}_rmul(i, step))"
+                .format(root=complex_dtype_to_name(dtype)))
     else:
-        i = "(%s) i" % dtype_to_ctype(dtype)
+        expr = "start + ((%s) i)*step" % dtype_to_ctype(dtype)
 
     return get_elwise_kernel(context, [
         VectorArg(dtype, "z", with_offset=True),
         ScalarArg(dtype, "start"),
         ScalarArg(dtype, "step"),
         ],
-        "z[i] = start + %s*step" % i,
+        "z[i] = " + expr,
         name="arange")
 
 
@@ -912,20 +932,80 @@ def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
 @context_dependent_memoize
 def get_bessel_kernel(context, which_func, out_dtype=np.float64,
                       order_dtype=np.int32, x_dtype=np.float64):
+    if x_dtype.kind != "c":
+        return get_elwise_kernel(context, [
+            VectorArg(out_dtype, "z", with_offset=True),
+            ScalarArg(order_dtype, "ord_n"),
+            VectorArg(x_dtype, "x", with_offset=True),
+            ],
+            "z[i] = bessel_%sn(ord_n, x[i])" % which_func,
+            name="bessel_%sn_kernel" % which_func,
+            preamble="""
+            #if __OPENCL_C_VERSION__ < 120
+            #pragma OPENCL EXTENSION cl_khr_fp64: enable
+            #endif
+            #define PYOPENCL_DEFINE_CDOUBLE
+            #include <pyopencl-bessel-%s.cl>
+            """ % which_func)
+    else:
+        if which_func != "j":
+            raise NotImplementedError("complex arguments for Bessel Y")
+
+        if x_dtype != np.complex128:
+            raise NotImplementedError("non-complex double dtype")
+        if x_dtype != out_dtype:
+            raise NotImplementedError("different input/output types")
+
+        return get_elwise_kernel(context, [
+            VectorArg(out_dtype, "z", with_offset=True),
+            ScalarArg(order_dtype, "ord_n"),
+            VectorArg(x_dtype, "x", with_offset=True),
+            ],
+            """
+            cdouble_t jv_loc;
+            cdouble_t jvp1_loc;
+            bessel_j_complex(ord_n, x[i], &jv_loc, &jvp1_loc);
+            z[i] = jv_loc;
+            """,
+            name="bessel_j_complex_kernel",
+            preamble="""
+            #if __OPENCL_C_VERSION__ < 120
+            #pragma OPENCL EXTENSION cl_khr_fp64: enable
+            #endif
+            #define PYOPENCL_DEFINE_CDOUBLE
+            #include <pyopencl-complex.h>
+            #include <pyopencl-bessel-j-complex.cl>
+            """)
+
+
+@context_dependent_memoize
+def get_hankel_01_kernel(context, out_dtype, x_dtype):
+    if x_dtype != np.complex128:
+        raise NotImplementedError("non-complex double dtype")
+    if x_dtype != out_dtype:
+        raise NotImplementedError("different input/output types")
+
     return get_elwise_kernel(context, [
-        VectorArg(out_dtype, "z", with_offset=True),
-        ScalarArg(order_dtype, "ord_n"),
+        VectorArg(out_dtype, "h0", with_offset=True),
+        VectorArg(out_dtype, "h1", with_offset=True),
         VectorArg(x_dtype, "x", with_offset=True),
         ],
-        "z[i] = bessel_%sn(ord_n, x[i])" % which_func,
-        name="bessel_%sn_kernel" % which_func,
+        """
+        cdouble_t h0_loc;
+        cdouble_t h1_loc;
+        hankel_01_complex(x[i], &h0_loc, &h1_loc, 1);
+        h0[i] = h0_loc;
+        h1[i] = h1_loc;
+        """,
+        name="hankel_complex_kernel",
         preamble="""
         #if __OPENCL_C_VERSION__ < 120
         #pragma OPENCL EXTENSION cl_khr_fp64: enable
         #endif
         #define PYOPENCL_DEFINE_CDOUBLE
-        #include <pyopencl-bessel-%s.cl>
-        """ % which_func)
+        #include <pyopencl-complex.h>
+        #include <pyopencl-hankel-complex.cl>
+        """)
 
 
 @context_dependent_memoize
diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py
index ba7c6dc77f69337c8e535284a782e31eb9b613a4..2e432fc5572f9cc37001011d3dbbbef2bf3acdcd 100644
--- a/pyopencl/reduction.py
+++ b/pyopencl/reduction.py
@@ -41,7 +41,7 @@ import numpy as np
 
 # {{{ kernel source
 
-KERNEL = """//CL//
+KERNEL = r"""//CL//
     #define GROUP_SIZE ${group_size}
     #define READ_AND_MAP(i) (${map_expr})
     #define REDUCE(a, b) (${reduce_expr})
@@ -63,7 +63,8 @@ KERNEL = """//CL//
       __global out_type *out__base, long out__offset, ${arguments},
       unsigned int seq_count, unsigned int n)
     {
-        __global out_type *out = (__global out_type *) ((__global char *) out__base + out__offset);
+        __global out_type *out = (__global out_type *) (
+            (__global char *) out__base + out__offset);
         ${arg_prep}
 
         __local out_type ldata[GROUP_SIZE];
@@ -88,7 +89,7 @@ KERNEL = """//CL//
           cur_size = group_size
         %>
 
-        % while cur_size > no_sync_size:
+        % while cur_size > 1:
             barrier(CLK_LOCAL_MEM_FENCE);
 
             <%
@@ -107,40 +108,6 @@ KERNEL = """//CL//
 
         % endwhile
 
-        % if cur_size > 1:
-            ## we need to synchronize one last time for entry into the
-            ## no-sync region.
-
-            barrier(CLK_LOCAL_MEM_FENCE);
-
-            <%
-            # NB: There's an exact duplicate of this calculation in the
-            # %while loop below.
-
-            new_size = cur_size // 2
-            assert new_size * 2 == cur_size
-            %>
-
-            if (lid < ${new_size})
-            {
-                __local volatile out_type *lvdata = ldata;
-                % while cur_size > 1:
-                    <%
-                    new_size = cur_size // 2
-                    assert new_size * 2 == cur_size
-                    %>
-
-                    lvdata[lid] = REDUCE(
-                      lvdata[lid],
-                      lvdata[lid + ${new_size}]);
-
-                    <% cur_size = new_size %>
-
-                % endwhile
-
-            }
-        % endif
-
         if (lid == 0) out[get_group_id(0)] = ldata[0];
     }
     """
@@ -185,24 +152,6 @@ def _get_reduction_source(
 
     # }}}
 
-    # {{{ compute synchronization-less group size
-
-    def get_dev_no_sync_size(device):
-        from pyopencl.characterize import get_simd_group_size
-        result = get_simd_group_size(device, out_type_size)
-
-        if result is None:
-            from warnings import warn
-            warn("Reduction might be unnecessarily slow: "
-                    "can't query SIMD group size")
-            return 1
-
-        return result
-
-    no_sync_size = min(get_dev_no_sync_size(dev) for dev in devices)
-
-    # }}}
-
     from mako.template import Template
     from pytools import all
     from pyopencl.characterize import has_double_support
@@ -210,7 +159,6 @@ def _get_reduction_source(
         out_type=out_type,
         arguments=", ".join(arg.declarator() for arg in parsed_args),
         group_size=group_size,
-        no_sync_size=no_sync_size,
         neutral=neutral,
         reduce_expr=_process_code_for_macro(reduce_expr),
         map_expr=_process_code_for_macro(map_expr),
@@ -325,8 +273,8 @@ class ReductionKernel:
                 "that have at least one vector argument"
 
     def __call__(self, *args, **kwargs):
-        MAX_GROUP_COUNT = 1024
-        SMALL_SEQ_COUNT = 4
+        MAX_GROUP_COUNT = 1024  # noqa
+        SMALL_SEQ_COUNT = 4  # noqa
 
         from pyopencl.array import empty
 
@@ -392,7 +340,8 @@ class ReductionKernel:
                     use_queue,
                     (group_count*stage_inf.group_size,),
                     (stage_inf.group_size,),
-                    *([result.base_data, result.offset] + invocation_args + [seq_count, sz]),
+                    *([result.base_data, result.offset]
+                        + invocation_args + [seq_count, sz]),
                     **dict(wait_for=wait_for))
             wait_for = [last_evt]
 
@@ -473,7 +422,15 @@ def get_sum_kernel(ctx, dtype_out, dtype_in):
     if dtype_out is None:
         dtype_out = dtype_in
 
-    return ReductionKernel(ctx, dtype_out, "0", "a+b",
+    reduce_expr = "a+b"
+    neutral_expr = "0"
+    if dtype_out.kind == "c":
+        from pyopencl.elementwise import complex_dtype_to_name
+        dtname = complex_dtype_to_name(dtype_out)
+        reduce_expr = "%s_add(a, b)" % dtname
+        neutral_expr = "%s_new(0, 0)" % dtname
+
+    return ReductionKernel(ctx, dtype_out, neutral_expr, reduce_expr,
             arguments="const %(tp)s *in"
             % {"tp": dtype_to_ctype(dtype_in)})
 
@@ -492,49 +449,30 @@ def _get_dot_expr(dtype_out, dtype_a, dtype_b, conjugate_first,
                 dtype_a.type(0), dtype_b.type(0),
                 has_double_support)
 
-    a_real_dtype = dtype_a.type(0).real.dtype
-    b_real_dtype = dtype_b.type(0).real.dtype
-    out_real_dtype = dtype_out.type(0).real.dtype
-
     a_is_complex = dtype_a.kind == "c"
     b_is_complex = dtype_b.kind == "c"
-    out_is_complex = dtype_out.kind == "c"
 
     from pyopencl.elementwise import complex_dtype_to_name
 
-    if a_is_complex and b_is_complex:
-        a = "a[%s]" % index_expr
-        b = "b[%s]" % index_expr
-        if dtype_a != dtype_out:
-            a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a)
-        if dtype_b != dtype_out:
-            b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b)
+    a = "a[%s]" % index_expr
+    b = "b[%s]" % index_expr
 
-        if conjugate_first and a_is_complex:
-            a = "%s_conj(%s)" % (
-                    complex_dtype_to_name(dtype_out), a)
+    if a_is_complex and (dtype_a != dtype_out):
+        a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a)
+    if b_is_complex and (dtype_b != dtype_out):
+        b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b)
 
-        map_expr = "%s_mul(%s, %s)" % (
-                complex_dtype_to_name(dtype_out), a, b)
-    else:
-        a = "a[%s]" % index_expr
-        b = "b[%s]" % index_expr
-
-        if out_is_complex:
-            if a_is_complex and dtype_a != dtype_out:
-                a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), a)
-            if b_is_complex and dtype_b != dtype_out:
-                b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_out), b)
-
-            if not a_is_complex and a_real_dtype != out_real_dtype:
-                a = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), a)
-            if not b_is_complex and b_real_dtype != out_real_dtype:
-                b = "(%s) (%s)" % (dtype_to_ctype(out_real_dtype), b)
-
-        if conjugate_first and a_is_complex:
-            a = "%s_conj(%s)" % (
-                    complex_dtype_to_name(dtype_out), a)
+    if a_is_complex and conjugate_first and a_is_complex:
+        a = "%s_conj(%s)" % (
+                complex_dtype_to_name(dtype_out), a)
 
+    if a_is_complex and not b_is_complex:
+        map_expr = "%s_mulr(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+    elif not a_is_complex and b_is_complex:
+        map_expr = "%s_rmul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+    elif a_is_complex and b_is_complex:
+        map_expr = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_out), a, b)
+    else:
         map_expr = "%s*%s" % (a, b)
 
     return map_expr, dtype_out, dtype_b
@@ -548,14 +486,22 @@ def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None,
             dtype_out, dtype_a, dtype_b, conjugate_first,
             has_double_support=has_double_support(ctx.devices[0]))
 
-    return ReductionKernel(ctx, dtype_out, neutral="0",
-            reduce_expr="a+b", map_expr=map_expr,
-            arguments=
-            "const %(tp_a)s *a, "
-            "const %(tp_b)s *b" % {
-                "tp_a": dtype_to_ctype(dtype_a),
-                "tp_b": dtype_to_ctype(dtype_b),
-                })
+    reduce_expr = "a+b"
+    neutral_expr = "0"
+    if dtype_out.kind == "c":
+        from pyopencl.elementwise import complex_dtype_to_name
+        dtname = complex_dtype_to_name(dtype_out)
+        reduce_expr = "%s_add(a, b)" % dtname
+        neutral_expr = "%s_new(0, 0)" % dtname
+
+    return ReductionKernel(ctx, dtype_out, neutral=neutral_expr,
+            reduce_expr=reduce_expr, map_expr=map_expr,
+            arguments=(
+                "const %(tp_a)s *a, "
+                "const %(tp_b)s *b" % {
+                    "tp_a": dtype_to_ctype(dtype_a),
+                    "tp_b": dtype_to_ctype(dtype_b),
+                    }))
 
 
 @context_dependent_memoize
@@ -570,14 +516,14 @@ def get_subset_dot_kernel(ctx, dtype_out, dtype_subset, dtype_a=None, dtype_b=No
     # important: lookup_tbl must be first--it controls the length
     return ReductionKernel(ctx, dtype_out, neutral="0",
             reduce_expr="a+b", map_expr=map_expr,
-            arguments=
-            "const %(tp_lut)s *lookup_tbl, "
-            "const %(tp_a)s *a, "
-            "const %(tp_b)s *b" % {
-            "tp_lut": dtype_to_ctype(dtype_subset),
-            "tp_a": dtype_to_ctype(dtype_a),
-            "tp_b": dtype_to_ctype(dtype_b),
-            })
+            arguments=(
+                "const %(tp_lut)s *lookup_tbl, "
+                "const %(tp_a)s *a, "
+                "const %(tp_b)s *b" % {
+                    "tp_lut": dtype_to_ctype(dtype_subset),
+                    "tp_a": dtype_to_ctype(dtype_a),
+                    "tp_b": dtype_to_ctype(dtype_b),
+                    }))
 
 
 def get_minmax_neutral(what, dtype):
@@ -628,12 +574,13 @@ def get_subset_minmax_kernel(ctx, what, dtype, dtype_subset):
             neutral=get_minmax_neutral(what, dtype),
             reduce_expr="%(reduce_expr)s" % {"reduce_expr": reduce_expr},
             map_expr="in[lookup_tbl[i]]",
-            arguments=
-            "const %(tp_lut)s *lookup_tbl, "
-            "const %(tp)s *in" % {
-            "tp": dtype_to_ctype(dtype),
-            "tp_lut": dtype_to_ctype(dtype_subset),
-            }, preamble="#define MY_INFINITY (1./0)")
+            arguments=(
+                "const %(tp_lut)s *lookup_tbl, "
+                "const %(tp)s *in" % {
+                    "tp": dtype_to_ctype(dtype),
+                    "tp_lut": dtype_to_ctype(dtype_subset),
+                    }),
+            preamble="#define MY_INFINITY (1./0)")
 
 # }}}
 
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index 43e0d18b1c6e142036e7c30b095d93618b69c689..10a64ee57e8d844723a584ce1795da530902c073 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -1419,7 +1419,7 @@ void ${name_prefix}_debug_scan(
     const index_type N)
 {
     scan_type item = ${neutral};
-    scan_type prev_item;
+    scan_type last_item;
 
     for (index_type i = 0; i < N; ++i)
     {
@@ -1427,7 +1427,7 @@ void ${name_prefix}_debug_scan(
             ${arg_ctypes[arg_name]} ${name};
             %if ife_offset < 0:
                 if (i+${ife_offset} >= 0)
-                    ${name} = ${arg_name}[i+offset];
+                    ${name} = ${arg_name}[i+${ife_offset}];
             %else:
                 ${name} = ${arg_name}[i];
             %endif
@@ -1435,12 +1435,12 @@ void ${name_prefix}_debug_scan(
 
         scan_type my_val = INPUT_EXPR(i);
 
-        prev_item = item;
+        last_item = item;
         %if is_segmented:
             bool is_seg_start = IS_SEG_START(i, my_val);
         %endif
 
-        item = SCAN_EXPR(prev_item, my_val,
+        item = SCAN_EXPR(last_item, my_val,
             %if is_segmented:
                 is_seg_start
             %else:
diff --git a/pyopencl/version.py b/pyopencl/version.py
index 2d917d29fc041929fe4c94362fd212f13cafc685..a0b428c42f2ebf49667fa8e7a9a48f643c23b915 100644
--- a/pyopencl/version.py
+++ b/pyopencl/version.py
@@ -1,3 +1,3 @@
-VERSION = (2014, 1)
+VERSION = (2015, 1)
 VERSION_STATUS = ""
 VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS
diff --git a/test/test_clmath.py b/test/test_clmath.py
index 586e9e075275aadb11c3067b3127f8c955afb625..53f017e49bfda6b5330b001ef7f9558a3be7cb53 100644
--- a/test/test_clmath.py
+++ b/test/test_clmath.py
@@ -1,4 +1,4 @@
-from __future__ import division
+from __future__ import division, print_function
 
 __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
 
@@ -21,9 +21,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
+
 import math
 import numpy as np
 
+import pytest
+
 
 def have_cl():
     try:
@@ -323,6 +326,127 @@ def test_bessel(ctx_factory):
                 pt.show()
 
 
+@pytest.mark.parametrize("ref_src", ["pyfmmlib", "scipy"])
+def test_complex_bessel(ctx_factory, ref_src):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    if not has_double_support(ctx.devices[0]):
+        from pytest import skip
+        skip("no double precision support--cannot test complex bessel function")
+
+    v = 40
+    n = 10**5
+
+    np.random.seed(13)
+    z = (
+        np.logspace(-5, 2, n)
+        * np.exp(1j * 2 * np.pi * np.random.rand(n)))
+
+    if ref_src == "pyfmmlib":
+        pyfmmlib = pytest.importorskip("pyfmmlib")
+
+        jv_ref = np.zeros(len(z), 'complex')
+
+        vin = v+1
+
+        for i in range(len(z)):
+            ier, fjs, _, _ = pyfmmlib.jfuns2d(vin, z[i], 1, 0, 10000)
+            assert ier == 0
+            jv_ref[i] = fjs[v]
+
+    elif ref_src == "scipy":
+        spec = pytest.importorskip("scipy.special")
+        jv_ref = spec.jv(v, z)
+
+    else:
+        raise ValueError("ref_src")
+
+    z_dev = cl_array.to_device(queue, z)
+
+    jv_dev = clmath.bessel_jn(v, z_dev)
+
+    abs_err_jv = np.abs(jv_dev.get() - jv_ref)
+    abs_jv_ref = np.abs(jv_ref)
+    rel_err_jv = abs_err_jv/abs_jv_ref
+
+    # use absolute error instead if the function value itself is too small
+    tiny = 1e-300
+    ind = abs_jv_ref < tiny
+    rel_err_jv[ind] = abs_err_jv[ind]
+
+    # if the reference value is inf or nan, set the error to zero
+    ind1 = np.isinf(abs_jv_ref)
+    ind2 = np.isnan(abs_jv_ref)
+
+    rel_err_jv[ind1] = 0
+    rel_err_jv[ind2] = 0
+
+    if 0:
+        print(abs(z))
+        print(np.abs(jv_ref))
+        print(np.abs(jv_dev.get()))
+        print(rel_err_jv)
+
+    max_err = np.max(rel_err_jv)
+    assert max_err <= 2e-13, max_err
+
+    print("Jv", np.max(rel_err_jv))
+
+    if 0:
+        import matplotlib.pyplot as pt
+        pt.loglog(np.abs(z), rel_err_jv)
+        pt.show()
+
+
+@pytest.mark.parametrize("ref_src", ["pyfmmlib", "scipy"])
+def test_hankel_01_complex(ctx_factory, ref_src):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    n = 10**6
+    np.random.seed(11)
+    z = (
+        np.logspace(-5, 2, n)
+        * np.exp(1j * 2 * np.pi * np.random.rand(n)))
+
+    def get_err(check, ref):
+        return np.max(np.abs(check-ref)) / np.max(np.abs(ref))
+
+    if ref_src == "pyfmmlib":
+        pyfmmlib = pytest.importorskip("pyfmmlib")
+        h0_ref, h1_ref = pyfmmlib.hank103_vec(z, ifexpon=1)
+    elif ref_src == "scipy":
+        spec = pytest.importorskip("scipy.special")
+        h0_ref = spec.hankel1(0, z)
+        h1_ref = spec.hankel1(1, z)
+
+    else:
+        raise ValueError("ref_src")
+
+    z_dev = cl_array.to_device(queue, z)
+
+    h0_dev, h1_dev = clmath.hankel_01(z_dev)
+
+    rel_err_h0 = np.abs(h0_dev.get() - h0_ref)/np.abs(h0_ref)
+    rel_err_h1 = np.abs(h1_dev.get() - h1_ref)/np.abs(h1_ref)
+
+    max_rel_err_h0 = np.max(rel_err_h0)
+    max_rel_err_h1 = np.max(rel_err_h1)
+
+    print("H0", max_rel_err_h0)
+    print("H1", max_rel_err_h1)
+
+    assert max_rel_err_h0 < 4e-13
+    assert max_rel_err_h1 < 2e-13
+
+    if 0:
+        import matplotlib.pyplot as pt
+        pt.loglog(np.abs(z), rel_err_h0)
+        pt.loglog(np.abs(z), rel_err_h1)
+        pt.show()
+
+
 if __name__ == "__main__":
     import sys
     if len(sys.argv) > 1:
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index 971ca27d6f959b9adb91843acc5038e35d305505..870e3c5d946f321cbe93730369f17b26da5d44c4 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -62,7 +62,7 @@ def test_get_info(ctx_factory):
             (cl.Program, cl.program_info.KERNEL_NAMES),
             (cl.Program, cl.program_info.NUM_KERNELS),
         ])
-    CRASH_QUIRKS = [
+    CRASH_QUIRKS = [  # noqa
             (("NVIDIA Corporation", "NVIDIA CUDA",
                 "OpenCL 1.0 CUDA 3.0.1"),
                 [
@@ -92,7 +92,7 @@ def test_get_info(ctx_factory):
                     (cl.Program, cl.program_info.SOURCE),
                     ]),
             ]
-    QUIRKS = []
+    QUIRKS = []  # noqa
 
     plat_quirk_key = (
             platform.vendor,
@@ -684,7 +684,7 @@ def test_platform_get_devices(platform):
                         getattr(cl.device_type, 'CUSTOM', None)):
             continue
         for dev in devs:
-            assert dev.type == dev_type
+            assert dev.type & dev_type == dev_type
 
 
 def test_user_event(ctx_factory):