From 39d4fadbc4527972c2ac268a72cacfa143eff8f1 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 15 Jan 2021 17:57:55 -0600
Subject: [PATCH] Avoid a few calls to len() in enqueue_nd_range_kernel

---
 src/wrap_cl.hpp | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp
index de35513c..6679ed3f 100644
--- a/src/wrap_cl.hpp
+++ b/src/wrap_cl.hpp
@@ -389,14 +389,15 @@
     \
     if (py_wait_for.ptr() != Py_None) \
     { \
-      event_wait_list.resize(len(py_wait_for)); \
       for (py::handle evt: py_wait_for) \
-        event_wait_list[num_events_in_wait_list++] = \
-          evt.cast<const event &>().data(); \
+      { \
+        event_wait_list.push_back(evt.cast<const event &>().data()); \
+        ++num_events_in_wait_list; \
+      } \
     }
 
 #define PYOPENCL_WAITLIST_ARGS \
-    num_events_in_wait_list, event_wait_list.empty( ) ? nullptr : &event_wait_list.front()
+    num_events_in_wait_list, (num_events_in_wait_list == 0) ? nullptr : &event_wait_list.front()
 
 #define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \
     try \
@@ -4657,36 +4658,35 @@ namespace pyopencl
   {
     PYOPENCL_PARSE_WAIT_FOR;
 
-    cl_uint work_dim = len(py_global_work_size);
-
     std::array<size_t, MAX_WS_DIM_COUNT> global_work_size;
-    unsigned gws_index = 0;
-    COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_size, gws_index);
+    unsigned gws_size = 0;
+    COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_size, gws_size);
+    cl_uint work_dim = gws_size;
 
     std::array<size_t, MAX_WS_DIM_COUNT> local_work_size;
-    unsigned lws_index = 0;
+    unsigned lws_size = 0;
     size_t *local_work_size_ptr = nullptr;
 
     if (py_local_work_size.ptr() != Py_None)
     {
+      COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, local_work_size, lws_size);
+
       if (g_times_l)
-        work_dim = std::max(work_dim, unsigned(len(py_local_work_size)));
+        work_dim = std::max(work_dim, lws_size);
       else
-        if (work_dim != unsigned(len(py_local_work_size)))
+        if (work_dim != lws_size)
           throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
               "global/local work sizes have differing dimensions");
 
-      COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, local_work_size, lws_index);
-
-      while (lws_index < work_dim)
-        local_work_size[lws_index++] = 1;
-      while (gws_index < work_dim)
-        global_work_size[gws_index++] = 1;
+      while (lws_size < work_dim)
+        local_work_size[lws_size++] = 1;
+      while (gws_size < work_dim)
+        global_work_size[gws_size++] = 1;
 
       local_work_size_ptr = &local_work_size.front();
     }
 
-    if (g_times_l && lws_index)
+    if (g_times_l && lws_size)
     {
       for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
         global_work_size[work_axis] *= local_work_size[work_axis];
@@ -4694,15 +4694,15 @@ namespace pyopencl
 
     size_t *global_work_offset_ptr = nullptr;
     std::array<size_t, MAX_WS_DIM_COUNT> global_work_offset;
-    unsigned gwo_index = 0;
     if (py_global_work_offset.ptr() != Py_None)
     {
-      if (work_dim != unsigned(len(py_global_work_offset)))
+      unsigned gwo_size = 0;
+      COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_offset, gwo_size);
+
+      if (work_dim != gwo_size)
         throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
             "global work size and offset have differing dimensions");
 
-      COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_offset, gwo_index);
-
       if (g_times_l && local_work_size_ptr)
       {
         for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
-- 
GitLab