From 09f678b9b48beede068e96742549ce74c6a9599c Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 6 May 2013 22:39:39 -0400
Subject: [PATCH] Add cl.array.cumsum().

---
 pyopencl/array.py | 26 ++++++++++++++++++++++++++
 pyopencl/scan.py  | 23 ++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/pyopencl/array.py b/pyopencl/array.py
index 62e153c3..9466b41c 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1400,6 +1400,7 @@ def minimum(a, b, out=None, queue=None):
 # }}}
 
 # {{{ reductions
+_builtin_sum = sum
 _builtin_min = min
 _builtin_max = max
 
@@ -1460,4 +1461,29 @@ subset_max.__doc__ = """.. versionadded:: 2011.1"""
 
 # }}}
 
+# {{{ scans
+
+def cumsum(a, output_dtype=None, queue=None, wait_for=None, return_event=False):
+    # undocumented for now
+
+    """
+    .. versionadded:: 2013.1
+    """
+
+    if output_dtype is None:
+        output_dtype = a.dtype
+
+    result = a._new_like_me(output_dtype)
+
+    from pyopencl.scan import get_cumsum_kernel
+    krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
+    evt = krnl(a, result, queue=queue, wait_for=wait_for)
+
+    if return_event:
+        return evt, result
+    else:
+        return result
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/pyopencl/scan.py b/pyopencl/scan.py
index a2125f4b..8d99e802 100644
--- a/pyopencl/scan.py
+++ b/pyopencl/scan.py
@@ -35,7 +35,9 @@ import pyopencl as cl
 import pyopencl.array
 from pyopencl.tools import (dtype_to_ctype, bitlog2,
         KernelTemplateBase, _process_code_for_macro,
-        get_arg_list_scalar_arg_dtypes)
+        get_arg_list_scalar_arg_dtypes,
+        context_dependent_memoize,
+        )
 import pyopencl._mymako as mako
 from pyopencl._cluda import CLUDA_PREAMBLE
 
@@ -1570,4 +1572,23 @@ class ScanTemplate(KernelTemplateBase):
 
 # }}}
 
+# {{{ 'canned' scan kernels
+
+@context_dependent_memoize
+def get_cumsum_kernel(context, input_dtype, output_dtype):
+    from pyopencl.tools import VectorArg
+    return GenericScanKernel(
+        context, output_dtype,
+        arguments=[
+            VectorArg(input_dtype, "input"),
+            VectorArg(output_dtype, "output"),
+            ],
+        input_expr="input[i]",
+        scan_expr="a+b", neutral="0",
+        output_statement="""
+            output[i] = item;
+            """)
+
+# }}}
+
 # vim: filetype=pyopencl:fdm=marker
-- 
GitLab