From 44bdb6f6c4459cff56ead446d7124e30d2a44269 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sun, 19 May 2013 12:32:24 -0500
Subject: [PATCH] Unify "auto"/"infer" symbols into loopy.auto.

---
 loopy/__init__.py           |  5 ++--
 loopy/codegen/expression.py |  4 +--
 loopy/cse.py                |  4 +--
 loopy/kernel/creation.py    | 30 ++++++++++---------
 loopy/kernel/data.py        | 57 ++++++++++++++++++++++++-------------
 loopy/preprocess.py         |  4 +--
 test/test_loopy.py          | 20 ++++++-------
 7 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 668e4ddaa..186b269e2 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -50,7 +50,6 @@ class LoopyAdvisory(UserWarning):
 # {{{ imported user interface
 
 from loopy.kernel.data import (
-        auto_shape, auto_strides,
         ValueArg, ScalarArg, GlobalArg, ArrayArg, ConstantArg, ImageArg,
 
         default_function_mangler, single_arg_function_mangler, opencl_function_mangler,
@@ -76,7 +75,7 @@ from loopy.compiled import CompiledKernel, auto_test_vs_ref
 from loopy.check import check_kernels
 
 __all__ = [
-        "auto_shape", "auto_strides",
+        "auto",
         "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg",
         "LoopKernel",
         "Instruction",
@@ -101,7 +100,7 @@ __all__ = [
         "split_arg_axis", "find_padding_multiple", "add_padding"
         ]
 
-class infer_type:
+class auto:
     pass
 
 # }}}
diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py
index f8b3d6775..4700e1df7 100644
--- a/loopy/codegen/expression.py
+++ b/loopy/codegen/expression.py
@@ -167,8 +167,8 @@ class TypeInferenceMapper(CombineMapper):
             # name is not a temporary variable, ok
             pass
         else:
-            from loopy import infer_type
-            if tv.dtype is infer_type:
+            import loopy as lp
+            if tv.dtype is lp.auto:
                 raise DependencyTypeInferenceFailure("attempted type inference on "
                         "variable requiring type inference")
 
diff --git a/loopy/cse.py b/loopy/cse.py
index 7ed6e797a..17f25660d 100644
--- a/loopy/cse.py
+++ b/loopy/cse.py
@@ -862,9 +862,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None,
 
     # {{{ set up temp variable
 
-    from loopy import infer_type
+    import loopy as lp
     if dtype is None:
-        dtype = infer_type
+        dtype = lp.auto
     else:
         dtype = np.dtype(dtype)
 
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index a4fa3d87b..392089938 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -252,8 +252,8 @@ def parse_insn(insn):
             if groups["temp_var_type"]:
                 temp_var_type = np.dtype(groups["temp_var_type"])
             else:
-                from loopy import infer_type
-                temp_var_type = infer_type
+                import loopy as lp
+                temp_var_type = lp.auto
         else:
             temp_var_type = None
 
@@ -471,6 +471,7 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, s
             return single_valued(irf.index_ranks)
 
     from loopy.kernel.data import ValueArg, GlobalArg
+    import loopy as lp
     for arg_name in sorted(new_arg_names):
         if arg_name in all_params:
             kernel_args.append(ValueArg(arg_name))
@@ -480,7 +481,7 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, s
             # It's not a temp var, and thereby not a domain parameter--the only
             # other writable type of variable is an argument.
 
-            kernel_args.append(GlobalArg(arg_name, shape="auto"))
+            kernel_args.append(GlobalArg(arg_name, shape=lp.auto))
             continue
 
         irank = find_index_rank(arg_name)
@@ -488,7 +489,7 @@ def guess_kernel_args_if_requested(domains, instructions, temporary_variables, s
             # read-only, no indices
             kernel_args.append(ValueArg(arg_name))
         else:
-            kernel_args.append(GlobalArg(arg_name, shape="auto"))
+            kernel_args.append(GlobalArg(arg_name, shape=lp.auto))
 
     return kernel_args
 
@@ -623,8 +624,8 @@ def expand_cses(knl):
         new_var_name = var_name_gen(base_name)
 
         if dtype is None:
-            from loopy import infer_type
-            dtype = infer_type
+            import loopy as lp
+            dtype = lp.auto
         else:
             dtype=np.dtype(dtype)
 
@@ -754,7 +755,8 @@ def apply_default_order_to_args(kernel, default_order):
 # {{{ duplicate arguments and expand defines in shapes
 
 def dup_args_and_expand_defines_in_shapes(kernel, defines):
-    from loopy.kernel.data import ShapedArg, auto_shape, auto_strides
+    import loopy as lp
+    from loopy.kernel.data import ShapedArg
     from loopy.kernel.creation import expand_defines_in_expr
 
     processed_args = []
@@ -765,9 +767,9 @@ def dup_args_and_expand_defines_in_shapes(kernel, defines):
 
             new_arg = arg.copy(name=arg_name)
             if isinstance(arg, ShapedArg):
-                if arg.shape is not None and arg.shape is not auto_shape:
+                if arg.shape is not None and arg.shape is not lp.auto:
                     new_arg = new_arg.copy(shape=expand_defines_in_expr(arg.shape, defines))
-                if arg.strides is not None and arg.strides is not auto_strides:
+                if arg.strides is not None and arg.strides is not lp.auto:
                     new_arg = new_arg.copy(strides=expand_defines_in_expr(arg.strides, defines))
 
             processed_args.append(new_arg)
@@ -781,7 +783,8 @@ def dup_args_and_expand_defines_in_shapes(kernel, defines):
 def guess_arg_shape_if_requested(kernel, default_order):
     new_args = []
 
-    from loopy.kernel.data import ShapedArg, auto_shape, auto_strides
+    import loopy as lp
+    from loopy.kernel.data import ShapedArg
     from loopy.symbolic import SubstitutionRuleExpander
 
     submap = SubstitutionRuleExpander(kernel.substitutions,
@@ -789,7 +792,7 @@ def guess_arg_shape_if_requested(kernel, default_order):
 
     for arg in kernel.args:
         if isinstance(arg, ShapedArg) and (
-                arg.shape is auto_shape or arg.strides is auto_strides):
+                arg.shape is lp.auto or arg.strides is lp.auto):
             armap = AccessRangeMapper(arg.name)
 
             for insn in kernel.instructions:
@@ -797,7 +800,6 @@ def guess_arg_shape_if_requested(kernel, default_order):
                 armap(submap(insn.assignee, insn.id), domain)
                 armap(submap(insn.expression, insn.id), domain)
 
-
             if armap.access_range is None:
                 # no subscripts found, let's call it a scalar
                 shape = ()
@@ -811,9 +813,9 @@ def guess_arg_shape_if_requested(kernel, default_order):
                             constants_only=False))
                         for i in xrange(armap.access_range.dim(dim_type.set)))
 
-            if arg.shape is auto_shape:
+            if arg.shape is lp.auto:
                 arg = arg.copy(shape=shape)
-            if arg.strides is auto_strides:
+            if arg.strides is lp.auto:
                 from loopy.kernel.data import make_strides
                 arg = arg.copy(strides=make_strides(shape, default_order))
 
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index e4cadc98d..d2ea3fd1d 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -136,12 +136,6 @@ def parse_tag(tag):
 
 # {{{ arguments
 
-class auto_shape:
-    pass
-
-class auto_strides:
-    pass
-
 def make_strides(shape, order):
     from pyopencl.compyte.array import (
             f_contiguous_strides,
@@ -160,10 +154,29 @@ class ShapedArg(Record):
         """
         All of the following are optional. Specify either strides or shape.
 
-        :arg shape:
-        :arg strides: like numpy strides, but in multiples of
-            data type size
-        :arg order:
+        :arg name: May contain multiple names separated by
+            commas, in which case multiple arguments,
+            each with identical properties are created
+            for each name.
+        :arg shape: like :attr:`numpy.ndarray.shape`.
+            Also allowed to be :class:`loopy.auto`, in
+            which case shape is determined by finding the
+            access footprint.
+
+            This is also allowed to be an expression involving
+            kernel parameters, or a (potentially-comma separated)
+            string that can be parsed to such an expression.
+        :arg strides:  like :attr:`numpy.ndarray.strides`,
+            but in multiples of data type size.
+            Also allowed to be :class:`loopy.auto`, in which
+            case strides are determined from shape and
+            *default_order* of :func:`loopy.make_kernel`.
+
+            This is also allowed to be an expression involving
+            kernel parameters, or a (potentially-comma separated)
+            string that can be parsed to such an expression.
+        :arg order: "F" or "C" for C (row major) or Fortran
+            (column major)
         :arg offset: Offset from the beginning of the vector from which
             the strides are counted.
         """
@@ -178,19 +191,23 @@ class ShapedArg(Record):
                 return x
 
         def process_tuple(x):
+            if x == "auto":
+                from warnings import warn
+                warn("use of 'auto' as a shape or stride won't work "
+                        "any more--use loopy.auto instead",
+                        stacklevel=3)
             x = parse_if_necessary(x)
+            if isinstance(x, lp.auto):
+                return x
             if not isinstance(x, tuple):
+                assert x is not lp.auto
                 x = (x,)
 
             return tuple(parse_if_necessary(xi) for xi in x)
 
-        if strides == "auto":
-            strides = auto_strides
-        if shape == "auto":
-            shape = auto_shape
-
-        strides_known = strides is not None and strides is not auto_strides
-        shape_known = shape is not None and shape is not auto_shape
+        import loopy as lp
+        strides_known = strides is not None and strides is not lp.auto
+        shape_known = shape is not None and shape is not lp.auto
 
         if strides_known:
             strides = process_tuple(strides)
@@ -222,16 +239,18 @@ class ShapedArg(Record):
         return len(self.strides)
 
     def __str__(self):
+        import loopy as lp
+
         if self.shape is None:
             shape = "unknown"
-        elif self.shape is auto_shape:
+        elif self.shape is lp.auto:
             shape = "auto"
         else:
             shape = ",".join(str(i) for i in self.shape)
 
         if self.strides is None:
             strides = "unknown"
-        elif self.strides is auto_strides:
+        elif self.strides is lp.auto:
             strides = "auto"
         else:
             strides = ",".join(str(i) for i in self.strides)
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 855108311..bb168b347 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -75,9 +75,9 @@ def infer_types_of_temporaries(kernel):
     # queue contains temporary variables
     queue = []
 
-    from loopy import infer_type
+    import loopy as lp
     for tv in kernel.temporary_variables.itervalues():
-        if tv.dtype is infer_type:
+        if tv.dtype is lp.auto:
             queue.append(tv)
 
     # }}}
diff --git a/test/test_loopy.py b/test/test_loopy.py
index d925f8575..1071521bc 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -696,10 +696,10 @@ def test_dependent_loop_bounds(ctx_factory):
                 "a_sum[i] = sum(jj, a_values[[a_rowstarts[i]+jj]])",
                 ],
             [
-                lp.GlobalArg("a_rowstarts", np.int32, shape="auto"),
-                lp.GlobalArg("a_indices", np.int32, shape="auto"),
+                lp.GlobalArg("a_rowstarts", np.int32, shape=lp.auto),
+                lp.GlobalArg("a_indices", np.int32, shape=lp.auto),
                 lp.GlobalArg("a_values", dtype),
-                lp.GlobalArg("a_sum", dtype, shape="auto"),
+                lp.GlobalArg("a_sum", dtype, shape=lp.auto),
                 lp.ValueArg("n", np.int32),
                 ],
             assumptions="n>=1 and row_len>=1")
@@ -727,10 +727,10 @@ def test_dependent_loop_bounds_2(ctx_factory):
                 "ax[i] = sum(jj, a_values[[row_start+jj]])",
                 ],
             [
-                lp.GlobalArg("a_rowstarts", np.int32, shape="auto"),
-                lp.GlobalArg("a_indices", np.int32, shape="auto"),
+                lp.GlobalArg("a_rowstarts", np.int32, shape=lp.auto),
+                lp.GlobalArg("a_indices", np.int32, shape=lp.auto),
                 lp.GlobalArg("a_values", dtype),
-                lp.GlobalArg("ax", dtype, shape="auto"),
+                lp.GlobalArg("ax", dtype, shape=lp.auto),
                 lp.ValueArg("n", np.int32),
                 ],
             assumptions="n>=1 and row_len>=1")
@@ -765,7 +765,7 @@ def test_dependent_loop_bounds_3(ctx_factory):
                 "a[i,jj] = 1",
                 ],
             [
-                lp.GlobalArg("a_row_lengths", np.int32, shape="auto"),
+                lp.GlobalArg("a_row_lengths", np.int32, shape=lp.auto),
                 lp.GlobalArg("a", dtype, shape=("n,n"), order="C"),
                 lp.ValueArg("n", np.int32),
                 ])
@@ -1095,9 +1095,9 @@ def test_arg_shape_guessing(ctx_factory):
                 c[i+j, j] = b[j,i]
                 """,
             [
-                lp.GlobalArg("a", shape=lp.auto_shape),
-                lp.GlobalArg("b", shape=lp.auto_shape),
-                lp.GlobalArg("c", shape=lp.auto_shape),
+                lp.GlobalArg("a", shape=lp.auto),
+                lp.GlobalArg("b", shape=lp.auto),
+                lp.GlobalArg("c", shape=lp.auto),
                 lp.ValueArg("n"),
                 ],
             assumptions="n>=1")
-- 
GitLab