From e076f854c9f7232044c2346dd32a04bdf6d7bacd Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Thu, 30 May 2013 17:34:24 -0400
Subject: [PATCH] PEP8-ify loopy.compiled.

---
 loopy/compiled.py | 139 +++++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 63 deletions(-)

diff --git a/loopy/compiled.py b/loopy/compiled.py
index c69eea427..8e5f82185 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -23,8 +23,6 @@ THE SOFTWARE.
 """
 
 
-
-
 import pyopencl as cl
 import pyopencl.array as cl_array
 
@@ -35,8 +33,6 @@ from pytools import Record, memoize_method
 AUTO_TEST_SKIP_RUN = False
 
 
-
-
 # {{{ argument checking
 
 def _arg_matches_spec(arg, val, other_args):
@@ -69,6 +65,7 @@ def _arg_matches_spec(arg, val, other_args):
 
 # }}}
 
+
 # {{{ compiled kernel object
 
 def _get_kernel_from_iterable(iterable):
@@ -89,16 +86,18 @@ def _get_kernel_from_iterable(iterable):
 
     return result
 
+
 class _KernelInfo(Record):
     pass
 
+
 class CompiledKernel:
     def __init__(self, context, kernel, options=[], codegen_kwargs={}):
         """
         :arg kernel: may be a loopy.LoopKernel, a generator returning kernels
-          (a warning will be issued if more than one is returned). If the kernel
-          has not yet been loop-scheduled, that is done, too, with no specific
-          arguments.
+          (a warning will be issued if more than one is returned). If the
+          kernel has not yet been loop-scheduled, that is done, too, with no
+          specific arguments.
         """
 
         import loopy as lp
@@ -125,9 +124,7 @@ class CompiledKernel:
         kernel = self.kernel
 
         import loopy as lp
-        from loopy.kernel.tools import (
-                add_argument_dtypes,
-                get_arguments_with_incomplete_dtype)
+        from loopy.kernel.tools import add_argument_dtypes
 
         if arg_to_dtype_set:
             kernel = add_argument_dtypes(kernel, dict(arg_to_dtype_set))
@@ -146,7 +143,9 @@ class CompiledKernel:
                     if arg_to_has_offset[arg.name]:
                         offset_arg_name = vng(arg.name+"_offset")
                         new_args.append(arg.copy(offset=offset_arg_name))
-                        new_args.append(lp.ValueArg(offset_arg_name, kernel.index_dtype))
+                        new_args.append(
+                                lp.ValueArg(
+                                    offset_arg_name, kernel.index_dtype))
                     else:
                         new_args.append(arg.copy(offset=0))
                 else:
@@ -163,21 +162,25 @@ class CompiledKernel:
 
         gsize_expr, lsize_expr = kernel.get_grid_sizes_as_exprs()
 
-        if not gsize_expr: gsize_expr = (1,)
-        if not lsize_expr: lsize_expr = (1,)
+        if not gsize_expr:
+            gsize_expr = (1,)
+        if not lsize_expr:
+            lsize_expr = (1,)
 
         # }}}
 
         from pymbolic import compile
         return _KernelInfo(
                 kernel=kernel,
-                global_size_func = compile(gsize_expr, kernel.scalar_loop_args),
+                global_size_func=compile(gsize_expr, kernel.scalar_loop_args),
                 local_size_func=compile(lsize_expr, kernel.scalar_loop_args),
                 )
 
     @memoize_method
-    def get_cl_kernel(self, arg_to_dtype_set, arg_to_has_offset_set, code_op=False):
-        kernel_info = self.get_kernel_info(arg_to_dtype_set, arg_to_has_offset_set)
+    def get_cl_kernel(self,
+            arg_to_dtype_set, arg_to_has_offset_set, code_op=False):
+        kernel_info = self.get_kernel_info(
+                arg_to_dtype_set, arg_to_has_offset_set)
         kernel = kernel_info.kernel
 
         from loopy.codegen import generate_code
@@ -199,13 +202,13 @@ class CompiledKernel:
         except KeyboardInterrupt:
             raise
         except:
-            print "[Loopy] ----------------------------------------------------"
+            print "[Loopy] "+70*"-"
             print "[Loopy] build failed, here's the source code:"
-            print "[Loopy] ----------------------------------------------------"
+            print "[Loopy] "+70*"-"
             print code
-            print "[Loopy] ----------------------------------------------------"
+            print "[Loopy] "+70*"-"
             print "[Loopy] end source code"
-            print "[Loopy] ----------------------------------------------------"
+            print "[Loopy] "+70*"-"
             raise
 
         from loopy.kernel.data import ValueArg
@@ -235,7 +238,8 @@ class CompiledKernel:
         return generate_code(kernel_info.kernel, **self.codegen_kwargs)
 
     def get_highlighted_code(self, arg_to_dtype=None, arg_to_has_offset=None):
-        return get_highlighted_code(self.get_code(arg_to_dtype, arg_to_has_offset))
+        return get_highlighted_code(
+                self.get_code(arg_to_dtype, arg_to_has_offset))
 
     @property
     def code(self):
@@ -248,8 +252,8 @@ class CompiledKernel:
     # }}}
 
     def __call__(self, queue, **kwargs):
-        """If all array arguments are :mod:`numpy` arrays, defaults to returning
-        numpy arrays as well.
+        """If all array arguments are :mod:`numpy` arrays, defaults to
+        returning numpy arrays as well.
 
         If you want offset arguments (see
         :attr:`loopy.kernel.data.GlobalArg.offset`) to be set automatically, it
@@ -307,6 +311,7 @@ class CompiledKernel:
         args = []
         outputs = []
         encountered_numpy = False
+        encountered_cl = False
 
         kwargs_copy = kwargs.copy()
 
@@ -323,7 +328,7 @@ class CompiledKernel:
                     # be set automatically, it must occur *after* the
                     # corresponding array argument.
 
-                    ofs, remdr =  divmod(val.offset, val.dtype.itemsize)
+                    ofs, remdr = divmod(val.offset, val.dtype.itemsize)
                     assert remdr == 0
                     kwargs_copy.setdefault(arg.offset, ofs)
                     del ofs
@@ -340,12 +345,15 @@ class CompiledKernel:
                         warn("argument '%s' was passed as a numpy array, "
                                 "performing implicit transfer" % arg.name,
                                 stacklevel=2)
+                else:
+                    encountered_cl = True
 
                 # }}}
 
             if val is None:
                 if not is_written:
-                    raise TypeError("must supply input argument '%s'" % arg.name)
+                    raise TypeError(
+                            "must supply input argument '%s'" % arg.name)
 
                 if isinstance(arg, lp.ImageArg):
                     raise RuntimeError("write-mode image '%s' must "
@@ -362,12 +370,14 @@ class CompiledKernel:
                         + arg.dtype.itemsize)
 
                 if allocator is None:
-                    storage = cl.Buffer(queue.context, cl.mem_flags.READ_WRITE, alloc_size)
+                    storage = cl.Buffer(
+                            queue.context, cl.mem_flags.READ_WRITE, alloc_size)
                 else:
                     storage = allocator(alloc_size)
 
                 val = cl_array.Array(queue, shape, arg.dtype,
-                        strides=numpy_strides, data=storage, allocator=allocator)
+                        strides=numpy_strides, data=storage,
+                        allocator=allocator)
             else:
                 assert _arg_matches_spec(arg, val, kwargs)
 
@@ -391,7 +401,7 @@ class CompiledKernel:
                     *args,
                     g_times_l=True, wait_for=wait_for)
 
-        if out_host is None and encountered_numpy:
+        if out_host is None and (encountered_numpy and not encountered_cl):
             out_host = True
         if out_host:
             outputs = [o.get(queue=queue) for o in outputs]
@@ -401,8 +411,6 @@ class CompiledKernel:
 # }}}
 
 
-
-
 def get_highlighted_code(text):
     try:
         from pygments import highlight
@@ -415,8 +423,6 @@ def get_highlighted_code(text):
         return highlight(text, CLexer(), TerminalFormatter())
 
 
-
-
 # {{{ automatic testing
 
 def fill_rand(ary):
@@ -430,12 +436,10 @@ def fill_rand(ary):
         fill_rand(ary, luxury=0)
 
 
-
-
-
 class TestArgInfo(Record):
     pass
 
+
 def make_ref_args(kernel, queue, parameters, fill_value):
     import loopy as lp
     from loopy.kernel.data import ValueArg, GlobalArg, ImageArg
@@ -472,7 +476,8 @@ def make_ref_args(kernel, queue, parameters, fill_value):
             is_image = isinstance(arg, ImageArg)
 
             if is_image:
-                storage_array = ary = cl_array.empty(queue, shape, arg.dtype, order="C")
+                storage_array = ary = cl_array.empty(
+                        queue, shape, arg.dtype, order="C")
                 numpy_strides = None
                 alloc_size = None
                 strides = None
@@ -489,7 +494,8 @@ def make_ref_args(kernel, queue, parameters, fill_value):
                 dtype = arg.dtype
                 if dtype is None:
                     raise RuntimeError("dtype for argument '%s' is not yet "
-                            "known. Perhaps you want to use loopy.add_argument_dtypes "
+                            "known. Perhaps you want to use "
+                            "loopy.add_argument_dtypes "
                             "or loopy.infer_argument_dtypes?"
                             % arg.name)
 
@@ -515,7 +521,8 @@ def make_ref_args(kernel, queue, parameters, fill_value):
                 fill_rand(storage_array)
                 if isinstance(arg, ImageArg):
                     # must be contiguous
-                    ref_args[arg.name] = cl.image_from_array(queue.context, ary.get())
+                    ref_args[arg.name] = cl.image_from_array(
+                            queue.context, ary.get())
                 else:
                     ref_args[arg.name] = ary
 
@@ -535,8 +542,6 @@ def make_ref_args(kernel, queue, parameters, fill_value):
     return ref_args, arg_descriptors
 
 
-
-
 def make_args(queue, kernel, arg_descriptors, parameters,
         fill_value):
     import loopy as lp
@@ -610,12 +615,16 @@ def make_args(queue, kernel, arg_descriptors, parameters,
                 # create host array with test shape (but not strides)
                 host_contig_array = np.empty(shape, dtype=arg.dtype)
 
-                common_len = min(len(host_ref_flat_array), len(host_contig_array.ravel()))
-                host_contig_array.ravel()[:common_len] = host_ref_flat_array[:common_len]
+                common_len = min(
+                        len(host_ref_flat_array),
+                        len(host_contig_array.ravel()))
+                host_contig_array.ravel()[:common_len] = \
+                        host_ref_flat_array[:common_len]
 
                 # create host array with test shape and storage layout
                 host_storage_array = np.empty(alloc_size, arg.dtype)
-                host_array = as_strided(host_storage_array, shape, numpy_strides)
+                host_array = as_strided(
+                        host_storage_array, shape, numpy_strides)
                 host_array[:] = host_contig_array
 
                 host_contig_array = arg_desc.ref_storage_array.get()
@@ -637,12 +646,16 @@ def make_args(queue, kernel, arg_descriptors, parameters,
     return args
 
 
-
-
 def _default_check_result(result, ref_result):
     if not np.allclose(ref_result, result, rtol=1e-3, atol=1e-3):
-        l2_err = np.sum(np.abs(ref_result-result)**2)/np.sum(np.abs(ref_result)**2)
-        linf_err = np.max(np.abs(ref_result-result))/np.max(np.abs(ref_result-result))
+        l2_err = (
+                np.sum(np.abs(ref_result-result)**2)
+                /
+                np.sum(np.abs(ref_result)**2))
+        linf_err = (
+                np.max(np.abs(ref_result-result))
+                /
+                np.max(np.abs(ref_result-result)))
         return (False,
                 "results do not match(rel) l_2 err: %g, l_inf err: %g"
                 % (l2_err, linf_err))
@@ -650,8 +663,6 @@ def _default_check_result(result, ref_result):
         return True, None
 
 
-
-
 def _enumerate_cl_devices_for_ref_test():
     noncpu_devs = []
     cpu_devs = []
@@ -676,9 +687,9 @@ def _enumerate_cl_devices_for_ref_test():
         raise RuntimeError("no CL device found for test")
 
     if not cpu_devs:
-        warn("No CPU device found for reference test. The reference computation "
-                "will either fail because of a timeout or take a *very* long "
-                "time.")
+        warn("No CPU device found for reference test. The reference "
+                "computation will either fail because of a timeout "
+                "or take a *very* long time.")
 
     for dev in cpu_devs:
         yield dev
@@ -687,9 +698,8 @@ def _enumerate_cl_devices_for_ref_test():
         yield dev
 
 
-
-
-def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], parameters={},
+def auto_test_vs_ref(
+        ref_knl, ctx, kernel_gen, op_count=[], op_label=[], parameters={},
         print_ref_code=False, print_code=True, warmup_rounds=2,
         code_op=None, dump_binary=False, codegen_kwargs={},
         options=[],
@@ -699,8 +709,8 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
     `kernel_gen`.
 
     :arg check_result: a callable with :class:`numpy.ndarray` arguments
-        *(result, reference_result)* returning a a tuple (class:`bool`, message)
-        indicating correctness/acceptability of the result
+        *(result, reference_result)* returning a a tuple (class:`bool`,
+        message) indicating correctness/acceptability of the result
     """
 
     if isinstance(op_count, (int, float)):
@@ -786,7 +796,6 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
             print get_highlighted_code(ref_compiled.code)
             print 75*"-"
 
-
         ref_queue.finish()
         ref_start = time()
 
@@ -807,7 +816,8 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
         break
 
     if not found_ref_device:
-        raise RuntimeError("could not find a suitable device for the reference computation.\n"
+        raise RuntimeError("could not find a suitable device for the "
+                "reference computation.\n"
                 "These errors were encountered:\n"+"\n".join(ref_errors))
 
     # }}}
@@ -896,11 +906,13 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
             evt_start.wait()
             evt_end.wait()
 
-            elapsed = (1e-9*events[-1].profile.END-1e-9*events[0].profile.SUBMIT) \
+            elapsed = (1e-9*events[-1].profile.END
+                    - 1e-9*events[0].profile.SUBMIT) \
                     / timing_rounds
             try:
                 elapsed_evt_2 = "%g" % \
-                        ((1e-9*evt_end.profile.START-1e-9*evt_start.profile.START) \
+                        ((1e-9*evt_end.profile.START
+                            - 1e-9*evt_start.profile.START)
                         / timing_rounds)
             except cl.RuntimeError:
                 elapsed_evt_2 = "<unavailable>"
@@ -916,8 +928,9 @@ def auto_test_vs_ref(ref_knl, ctx, kernel_gen, op_count=[], op_label=[], paramet
         for cnt, lbl in zip(op_count, op_label):
             rates += " %g %s/s" % (cnt/elapsed_wall, lbl)
 
-        print "elapsed: %g s event, %s s marker-event %g s wall (%d rounds)%s" % (
-                elapsed, elapsed_evt_2, elapsed_wall, timing_rounds, rates)
+        print("elapsed: %g s event, %s s marker-event %g s wall "
+                "(%d rounds)%s" % (
+                elapsed, elapsed_evt_2, elapsed_wall, timing_rounds, rates))
 
         if do_check:
             ref_rates = ""
-- 
GitLab