diff --git a/pyopencl/bitonic_sort.py b/pyopencl/bitonic_sort.py
index d4beaba2c7aee15c4cf82ba5fc2f07daed8bcebe..151174e52dcf6f74a937ff1f95691d992b40cf2e 100644
--- a/pyopencl/bitonic_sort.py
+++ b/pyopencl/bitonic_sort.py
@@ -121,8 +121,10 @@ class BitonicSort(object):
             if aux:
                 last_evt = knl(
                         queue, (nt,), wg, arr.data, idx.data,
-                        cl.LocalMemory(wg[0]*arr.dtype.itemsize),
-                        cl.LocalMemory(wg[0]*idx.dtype.itemsize),
+                        cl.LocalMemory(
+                            _tmpl.LOCAL_MEM_FACTOR*wg[0]*arr.dtype.itemsize),
+                        cl.LocalMemory(
+                            _tmpl.LOCAL_MEM_FACTOR*wg[0]*idx.dtype.itemsize),
                         wait_for=[last_evt])
             for knl, nt, wg, _ in run_queue[1:]:
                 last_evt = knl(
@@ -133,7 +135,8 @@ class BitonicSort(object):
             if aux:
                 last_evt = knl(
                         queue, (nt,), wg, arr.data,
-                        cl.LocalMemory(wg[0]*4*arr.dtype.itemsize),
+                        cl.LocalMemory(
+                            _tmpl.LOCAL_MEM_FACTOR*wg[0]*4*arr.dtype.itemsize),
                         wait_for=[last_evt])
             for knl, nt, wg, _ in run_queue[1:]:
                 last_evt = knl(queue, (nt,), wg, arr.data, wait_for=[last_evt])
@@ -184,9 +187,9 @@ class BitonicSort(object):
 
         available_lmem = dev.local_mem_size
         while True:
-            lmem_size = wg*key_dtype.itemsize
+            lmem_size = _tmpl.LOCAL_MEM_FACTOR*wg*key_dtype.itemsize
             if argsort:
-                lmem_size += wg*idx_dtype.itemsize
+                lmem_size += _tmpl.LOCAL_MEM_FACTOR*wg*idx_dtype.itemsize
 
             if lmem_size + 512 > available_lmem:
                 wg //= 2
diff --git a/pyopencl/bitonic_sort_templates.py b/pyopencl/bitonic_sort_templates.py
index 9b4f14e82a5943698cdbf5ff75e1b6522c09a8ed..4aafa439530e01d149b34a3af28d5cde36b0a48a 100644
--- a/pyopencl/bitonic_sort_templates.py
+++ b/pyopencl/bitonic_sort_templates.py
@@ -31,10 +31,17 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
 
+LOCAL_MEM_FACTOR = 1
+
 
 # {{{ defines
 
 defines = """//CL//
+
+% if dtype == "double":
+    #pragma OPENCL EXTENSION cl_khr_fp64: enable
+% endif
+
 typedef ${dtype} data_t;
 typedef ${idxtype} idx_t;
 typedef ${idxtype}2 idx_t2;
@@ -318,6 +325,8 @@ __kernel void run(__global data_t * data\\
 
 # {{{ C4
 
+# IF YOU REENABLE THIS, YOU NEED TO ADJUST LOCAL_MEM_FACTOR TO 4
+
 ParallelBitonic_C4 = """//CL//
 //ParallelBitonic_C4
 __kernel void run\\
diff --git a/test/test_algorithm.py b/test/test_algorithm.py
index c9811cd8105d5816525323af5ce1768b5803288f..8f44cc594bab73126af98849811b35d99eebd059 100644
--- a/test/test_algorithm.py
+++ b/test/test_algorithm.py
@@ -846,13 +846,17 @@ def test_key_value_sorter(ctx_factory):
 @pytest.mark.parametrize("dtype", [
     np.int32,
     np.float32,
-    # np.float64
+    np.float64
     ])
 @pytest.mark.bitonic
 def test_bitonic_sort(ctx_factory, size, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
 
+    if (ctx.devices[0].platform.name == "Portable Computing Language"
+            and dtype == np.float64):
+        pytest.xfail("Double precision bitonic sort doesn't work on POCL")
+
     import pyopencl.clrandom as clrandom
     from pyopencl.bitonic_sort import BitonicSort
 
@@ -871,13 +875,17 @@ def test_bitonic_sort(ctx_factory, size, dtype):
 @pytest.mark.parametrize("dtype", [
     np.int32,
     np.float32,
-    # np.float64
+    np.float64
     ])
 @pytest.mark.bitonic
 def test_bitonic_argsort(ctx_factory, size, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
 
+    if (ctx.devices[0].platform.name == "Portable Computing Language"
+            and dtype == np.float64):
+        pytest.xfail("Double precision bitonic sort doesn't work on POCL")
+
     import pyopencl.clrandom as clrandom
     from pyopencl.bitonic_sort import BitonicSort