From ee8783cbc0b1b962d32b8a387b274ea6cbad615b Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 9 Nov 2016 16:37:03 -0600
Subject: [PATCH 1/2] Centralize everything to do with type inference in a
 single file

---
 loopy/__init__.py                    |   8 +-
 loopy/auto_test.py                   |   4 +-
 loopy/codegen/__init__.py            |   2 +-
 loopy/execution.py                   |   2 +-
 loopy/expression.py                  | 266 +--------------
 loopy/kernel/tools.py                |   4 +-
 loopy/preprocess.py                  | 191 +----------
 loopy/target/c/codegen/expression.py |   3 +-
 loopy/target/python.py               |   2 +-
 loopy/type_inference.py              | 486 +++++++++++++++++++++++++++
 10 files changed, 505 insertions(+), 463 deletions(-)
 create mode 100644 loopy/type_inference.py

diff --git a/loopy/__init__.py b/loopy/__init__.py
index 21a41b11c..73a02479d 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -109,8 +109,8 @@ from loopy.transform.parameter import assume, fix_parameters
 
 # }}}
 
-from loopy.preprocess import (preprocess_kernel, realize_reduction,
-        infer_unknown_types)
+from loopy.type_inference import infer_unknown_types
+from loopy.preprocess import preprocess_kernel, realize_reduction
 from loopy.schedule import generate_loop_schedules, get_one_scheduled_kernel
 from loopy.statistics import (get_op_poly, sum_ops_to_dtypes,
         get_gmem_access_poly,
@@ -213,7 +213,9 @@ __all__ = [
         "add_dtypes",
         "add_and_infer_dtypes",
 
-        "preprocess_kernel", "realize_reduction", "infer_unknown_types",
+        "infer_unknown_types",
+
+        "preprocess_kernel", "realize_reduction",
         "generate_loop_schedules", "get_one_scheduled_kernel",
         "GeneratedProgram", "CodeGenerationResult",
         "PreambleInfo",
diff --git a/loopy/auto_test.py b/loopy/auto_test.py
index cfdc96f8a..6a4d55975 100644
--- a/loopy/auto_test.py
+++ b/loopy/auto_test.py
@@ -422,7 +422,7 @@ def auto_test_vs_ref(
 
     # {{{ compile and run reference code
 
-    from loopy.preprocess import infer_unknown_types
+    from loopy.type_inference import infer_unknown_types
     ref_knl = infer_unknown_types(ref_knl, expect_completion=True)
 
     found_ref_device = False
@@ -530,7 +530,7 @@ def auto_test_vs_ref(
 
     test_kernel_count = 0
 
-    from loopy.preprocess import infer_unknown_types
+    from loopy.type_inference import infer_unknown_types
     for i, kernel in enumerate(test_kernels):
         test_kernel_count += 1
         if test_kernel_count > max_test_kernel_count:
diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index 79d824a44..ffd291d53 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -409,7 +409,7 @@ def generate_code_v2(kernel):
 
     # }}}
 
-    from loopy.preprocess import infer_unknown_types
+    from loopy.type_inference import infer_unknown_types
     kernel = infer_unknown_types(kernel, expect_completion=True)
 
     from loopy.check import pre_codegen_checks
diff --git a/loopy/execution.py b/loopy/execution.py
index 802684247..abb71325e 100644
--- a/loopy/execution.py
+++ b/loopy/execution.py
@@ -160,7 +160,7 @@ class KernelExecutorBase(object):
 
             kernel = add_dtypes(kernel, var_to_dtype)
 
-            from loopy.preprocess import infer_unknown_types
+            from loopy.type_inference import infer_unknown_types
             kernel = infer_unknown_types(kernel, expect_completion=True)
 
         if kernel.schedule is None:
diff --git a/loopy/expression.py b/loopy/expression.py
index 991f4a93e..3269bc09f 100644
--- a/loopy/expression.py
+++ b/loopy/expression.py
@@ -25,14 +25,10 @@ THE SOFTWARE.
 
 import numpy as np
 
-from pymbolic.mapper import CombineMapper, RecursiveMapper
+from pymbolic.mapper import RecursiveMapper
 
-from loopy.tools import is_integer
-from loopy.types import NumpyType
 from loopy.codegen import Unvectorizable
-from loopy.diagnostic import (
-        LoopyError,
-        TypeInferenceFailure, DependencyTypeInferenceFailure)
+from loopy.diagnostic import LoopyError
 
 
 # type_context may be:
@@ -57,264 +53,6 @@ def dtype_to_type_context(target, dtype):
     return None
 
 
-# {{{ type inference
-
-class TypeInferenceMapper(CombineMapper):
-    def __init__(self, kernel, new_assignments=None):
-        """
-        :arg new_assignments: mapping from names to either
-            :class:`loopy.kernel.data.TemporaryVariable`
-            or
-            :class:`loopy.kernel.data.KernelArgument`
-            instances
-        """
-        self.kernel = kernel
-        if new_assignments is None:
-            new_assignments = {}
-        self.new_assignments = new_assignments
-
-    # /!\ Introduce caches with care--numpy.float32(x) and numpy.float64(x)
-    # are Python-equal (for many common constants such as integers).
-
-    def with_assignments(self, names_to_vars):
-        new_ass = self.new_assignments.copy()
-        new_ass.update(names_to_vars)
-        return type(self)(self.kernel, new_ass)
-
-    @staticmethod
-    def combine(dtypes):
-        # dtypes may just be a generator expr
-        dtypes = list(dtypes)
-
-        from loopy.types import LoopyType, NumpyType
-        assert all(isinstance(dtype, LoopyType) for dtype in dtypes)
-
-        if not all(isinstance(dtype, NumpyType) for dtype in dtypes):
-            from pytools import is_single_valued, single_valued
-            if not is_single_valued(dtypes):
-                raise TypeInferenceFailure(
-                        "Nothing known about operations between '%s'"
-                        % ", ".join(str(dt) for dt in dtypes))
-
-            return single_valued(dtypes)
-
-        dtypes = [dtype.dtype for dtype in dtypes]
-
-        result = dtypes.pop()
-        while dtypes:
-            other = dtypes.pop()
-
-            if result.fields is None and other.fields is None:
-                if (result, other) in [
-                        (np.int32, np.float32), (np.float32, np.int32)]:
-                    # numpy makes this a double. I disagree.
-                    result = np.dtype(np.float32)
-                else:
-                    result = (
-                            np.empty(0, dtype=result)
-                            + np.empty(0, dtype=other)
-                            ).dtype
-
-            elif result.fields is None and other.fields is not None:
-                # assume the non-native type takes over
-                # (This is used for vector types.)
-                result = other
-            elif result.fields is not None and other.fields is None:
-                # assume the non-native type takes over
-                # (This is used for vector types.)
-                pass
-            else:
-                if result is not other:
-                    raise TypeInferenceFailure(
-                            "nothing known about result of operation on "
-                            "'%s' and '%s'" % (result, other))
-
-        return NumpyType(result)
-
-    def map_sum(self, expr):
-        dtypes = []
-        small_integer_dtypes = []
-        for child in expr.children:
-            dtype = self.rec(child)
-            if is_integer(child) and abs(child) < 1024:
-                small_integer_dtypes.append(dtype)
-            else:
-                dtypes.append(dtype)
-
-        from pytools import all
-        if all(dtype.is_integral() for dtype in dtypes):
-            dtypes.extend(small_integer_dtypes)
-
-        return self.combine(dtypes)
-
-    map_product = map_sum
-
-    def map_quotient(self, expr):
-        n_dtype = self.rec(expr.numerator)
-        d_dtype = self.rec(expr.denominator)
-
-        if n_dtype.is_integral() and d_dtype.is_integral():
-            # both integers
-            return NumpyType(np.dtype(np.float64))
-
-        else:
-            return self.combine([n_dtype, d_dtype])
-
-    def map_constant(self, expr):
-        if is_integer(expr):
-            for tp in [np.int32, np.int64]:
-                iinfo = np.iinfo(tp)
-                if iinfo.min <= expr <= iinfo.max:
-                    return NumpyType(np.dtype(tp))
-
-            else:
-                raise TypeInferenceFailure("integer constant '%s' too large" % expr)
-
-        dt = np.asarray(expr).dtype
-        if hasattr(expr, "dtype"):
-            return NumpyType(expr.dtype)
-        elif isinstance(expr, np.number):
-            # Numpy types are sized
-            return NumpyType(np.dtype(type(expr)))
-        elif dt.kind == "f":
-            # deduce the smaller type by default
-            return NumpyType(np.dtype(np.float32))
-        elif dt.kind == "c":
-            if np.complex64(expr) == np.complex128(expr):
-                # (COMPLEX_GUESS_LOGIC)
-                # No precision is lost by 'guessing' single precision, use that.
-                # This at least covers simple cases like '1j'.
-                return NumpyType(np.dtype(np.complex64))
-
-            # Codegen for complex types depends on exactly correct types.
-            # Refuse temptation to guess.
-            raise TypeInferenceFailure("Complex constant '%s' needs to "
-                    "be sized for type inference " % expr)
-        else:
-            raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr)
-
-    def map_subscript(self, expr):
-        return self.rec(expr.aggregate)
-
-    def map_linear_subscript(self, expr):
-        return self.rec(expr.aggregate)
-
-    def map_call(self, expr, multiple_types_ok=False):
-        from pymbolic.primitives import Variable
-
-        identifier = expr.function
-        if isinstance(identifier, Variable):
-            identifier = identifier.name
-
-        if identifier in ["indexof", "indexof_vec"]:
-            return self.kernel.index_dtype
-
-        arg_dtypes = tuple(self.rec(par) for par in expr.parameters)
-
-        mangle_result = self.kernel.mangle_function(identifier, arg_dtypes)
-        if multiple_types_ok:
-            if mangle_result is not None:
-                return mangle_result.result_dtypes
-        else:
-            if mangle_result is not None:
-                if len(mangle_result.result_dtypes) != 1 and not multiple_types_ok:
-                    raise LoopyError("functions with more or fewer than one "
-                            "return value may only be used in direct assignments")
-
-                return mangle_result.result_dtypes[0]
-
-        raise RuntimeError("unable to resolve "
-                "function '%s' with %d given arguments"
-                % (identifier, len(arg_dtypes)))
-
-    def map_variable(self, expr):
-        if expr.name in self.kernel.all_inames():
-            return self.kernel.index_dtype
-
-        result = self.kernel.mangle_symbol(
-                self.kernel.target.get_device_ast_builder(),
-                expr.name)
-
-        if result is not None:
-            result_dtype, _ = result
-            return result_dtype
-
-        obj = self.new_assignments.get(expr.name)
-
-        if obj is None:
-            obj = self.kernel.arg_dict.get(expr.name)
-
-        if obj is None:
-            obj = self.kernel.temporary_variables.get(expr.name)
-
-        if obj is None:
-            raise TypeInferenceFailure("name not known in type inference: %s"
-                    % expr.name)
-
-        from loopy.kernel.data import TemporaryVariable, KernelArgument
-        import loopy as lp
-        if isinstance(obj, TemporaryVariable):
-            result = obj.dtype
-            if result is lp.auto:
-                raise DependencyTypeInferenceFailure(
-                        "temporary variable '%s'" % expr.name,
-                        expr.name)
-            else:
-                return result
-
-        elif isinstance(obj, KernelArgument):
-            result = obj.dtype
-            if result is None:
-                raise DependencyTypeInferenceFailure(
-                        "argument '%s'" % expr.name,
-                        expr.name)
-            else:
-                return result
-
-        else:
-            raise RuntimeError("unexpected type inference "
-                    "object type for '%s'" % expr.name)
-
-    map_tagged_variable = map_variable
-
-    def map_lookup(self, expr):
-        agg_result = self.rec(expr.aggregate)
-        field = agg_result.numpy_dtype.fields[expr.name]
-        dtype = field[0]
-        return NumpyType(dtype)
-
-    def map_comparison(self, expr):
-        # "bool" is unusable because OpenCL's bool has indeterminate memory
-        # format.
-        return NumpyType(np.dtype(np.int32))
-
-    map_logical_not = map_comparison
-    map_logical_and = map_comparison
-    map_logical_or = map_comparison
-
-    def map_group_hw_index(self, expr, *args):
-        return self.kernel.index_dtype
-
-    def map_local_hw_index(self, expr, *args):
-        return self.kernel.index_dtype
-
-    def map_reduction(self, expr, multiple_types_ok=False):
-        result = expr.operation.result_dtypes(
-                self.kernel, self.rec(expr.expr), expr.inames)
-
-        if multiple_types_ok:
-            return result
-
-        else:
-            if len(result) != 1 and not multiple_types_ok:
-                raise LoopyError("reductions with more or fewer than one "
-                        "return value may only be used in direct assignments")
-
-            return result[0]
-
-# }}}
-
-
 # {{{ vetorizability checker
 
 class VectorizabilityChecker(RecursiveMapper):
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 7e9bd549f..ad48491fa 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -116,14 +116,14 @@ def add_and_infer_dtypes(knl, dtype_dict):
 
     knl = add_dtypes(knl, processed_dtype_dict)
 
-    from loopy.preprocess import infer_unknown_types
+    from loopy.type_inference import infer_unknown_types
     return infer_unknown_types(knl, expect_completion=True)
 
 
 def _add_and_infer_dtypes_overdetermined(knl, dtype_dict):
     knl = _add_dtypes_overdetermined(knl, dtype_dict)
 
-    from loopy.preprocess import infer_unknown_types
+    from loopy.type_inference import infer_unknown_types
     return infer_unknown_types(knl, expect_completion=True)
 
 # }}}
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index c0f42e55a..027e98ae5 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -35,6 +35,8 @@ from pytools.persistent_dict import PersistentDict
 from loopy.tools import LoopyKeyBuilder
 from loopy.version import DATA_MODEL_VERSION
 from loopy.kernel.data import make_assignment
+# for the benefit of loopy.statistics, for now
+from loopy.type_inference import infer_unknown_types
 
 import logging
 logger = logging.getLogger(__name__)
@@ -109,193 +111,6 @@ def check_reduction_iname_uniqueness(kernel):
 # }}}
 
 
-# {{{ infer types
-
-def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
-    if var_name in kernel.all_params():
-        return kernel.index_dtype, []
-
-    def debug(s):
-        logger.debug("%s: %s" % (kernel.name, s))
-
-    dtypes = []
-
-    import loopy as lp
-
-    symbols_with_unavailable_types = []
-
-    from loopy.diagnostic import DependencyTypeInferenceFailure
-    for writer_insn_id in kernel.writer_map().get(var_name, []):
-        writer_insn = kernel.id_to_insn[writer_insn_id]
-        if not isinstance(writer_insn, lp.MultiAssignmentBase):
-            continue
-
-        expr = subst_expander(writer_insn.expression)
-
-        try:
-            debug("             via expr %s" % expr)
-            if isinstance(writer_insn, lp.Assignment):
-                result = type_inf_mapper(expr)
-            elif isinstance(writer_insn, lp.CallInstruction):
-                result_dtypes = type_inf_mapper(expr, multiple_types_ok=True)
-
-                result = None
-                for assignee, comp_dtype in zip(
-                        writer_insn.assignee_var_names(), result_dtypes):
-                    if assignee == var_name:
-                        result = comp_dtype
-                        break
-
-                assert result is not None
-
-            debug("             result: %s" % result)
-
-            dtypes.append(result)
-
-        except DependencyTypeInferenceFailure as e:
-            debug("             failed: %s" % e)
-            symbols_with_unavailable_types.append(e.symbol)
-
-    if not dtypes:
-        return None, symbols_with_unavailable_types
-
-    result = type_inf_mapper.combine(dtypes)
-
-    return result, []
-
-
-class _DictUnionView:
-    def __init__(self, children):
-        self.children = children
-
-    def get(self, key):
-        try:
-            return self[key]
-        except KeyError:
-            return None
-
-    def __getitem__(self, key):
-        for ch in self.children:
-            try:
-                return ch[key]
-            except KeyError:
-                pass
-
-        raise KeyError(key)
-
-
-def infer_unknown_types(kernel, expect_completion=False):
-    """Infer types on temporaries and arguments."""
-
-    logger.debug("%s: infer types" % kernel.name)
-
-    def debug(s):
-        logger.debug("%s: %s" % (kernel.name, s))
-
-    unexpanded_kernel = kernel
-    if kernel.substitutions:
-        from loopy.transform.subst import expand_subst
-        kernel = expand_subst(kernel)
-
-    new_temp_vars = kernel.temporary_variables.copy()
-    new_arg_dict = kernel.arg_dict.copy()
-
-    # {{{ fill queue
-
-    # queue contains temporary variables
-    queue = []
-
-    import loopy as lp
-    for tv in six.itervalues(kernel.temporary_variables):
-        if tv.dtype is lp.auto:
-            queue.append(tv)
-
-    for arg in kernel.args:
-        if arg.dtype is None:
-            queue.append(arg)
-
-    # }}}
-
-    from loopy.expression import TypeInferenceMapper
-    type_inf_mapper = TypeInferenceMapper(kernel,
-            _DictUnionView([
-                new_temp_vars,
-                new_arg_dict
-                ]))
-
-    from loopy.symbolic import SubstitutionRuleExpander
-    subst_expander = SubstitutionRuleExpander(kernel.substitutions)
-
-    # {{{ work on type inference queue
-
-    from loopy.kernel.data import TemporaryVariable, KernelArgument
-
-    failed_names = set()
-    while queue:
-        item = queue.pop(0)
-
-        debug("inferring type for %s %s" % (type(item).__name__, item.name))
-
-        result, symbols_with_unavailable_types = \
-                _infer_var_type(kernel, item.name, type_inf_mapper, subst_expander)
-
-        failed = result is None
-        if not failed:
-            debug("     success: %s" % result)
-            if isinstance(item, TemporaryVariable):
-                new_temp_vars[item.name] = item.copy(dtype=result)
-            elif isinstance(item, KernelArgument):
-                new_arg_dict[item.name] = item.copy(dtype=result)
-            else:
-                raise LoopyError("unexpected item type in type inference")
-        else:
-            debug("     failure")
-
-        if failed:
-            if item.name in failed_names:
-                # this item has failed before, give up.
-                advice = ""
-                if symbols_with_unavailable_types:
-                    advice += (
-                            " (need type of '%s'--check for missing arguments)"
-                            % ", ".join(symbols_with_unavailable_types))
-
-                if expect_completion:
-                    raise LoopyError(
-                            "could not determine type of '%s'%s"
-                            % (item.name, advice))
-
-                else:
-                    # We're done here.
-                    break
-
-            # remember that this item failed
-            failed_names.add(item.name)
-
-            queue_names = set(qi.name for qi in queue)
-
-            if queue_names == failed_names:
-                # We did what we could...
-                print(queue_names, failed_names, item.name)
-                assert not expect_completion
-                break
-
-            # can't infer type yet, put back into queue
-            queue.append(item)
-        else:
-            # we've made progress, reset failure markers
-            failed_names = set()
-
-    # }}}
-
-    return unexpanded_kernel.copy(
-            temporary_variables=new_temp_vars,
-            args=[new_arg_dict[arg.name] for arg in kernel.args],
-            )
-
-# }}}
-
-
 # {{{ decide temporary scope
 
 def _get_compute_inames_tagged(kernel, insn, tag_base):
@@ -462,7 +277,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True):
     var_name_gen = kernel.get_var_name_generator()
     new_temporary_variables = kernel.temporary_variables.copy()
 
-    from loopy.expression import TypeInferenceMapper
+    from loopy.type_inference import TypeInferenceMapper
     type_inf_mapper = TypeInferenceMapper(kernel)
 
     # {{{ sequential
diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py
index 91c42c542..51cfc7fe6 100644
--- a/loopy/target/c/codegen/expression.py
+++ b/loopy/target/c/codegen/expression.py
@@ -36,7 +36,8 @@ import pymbolic.primitives as p
 from pymbolic import var
 
 
-from loopy.expression import dtype_to_type_context, TypeInferenceMapper
+from loopy.expression import dtype_to_type_context
+from loopy.type_inference import TypeInferenceMapper
 
 from loopy.diagnostic import LoopyError, LoopyWarning
 from loopy.tools import is_integer
diff --git a/loopy/target/python.py b/loopy/target/python.py
index 591161d81..036e60ab1 100644
--- a/loopy/target/python.py
+++ b/loopy/target/python.py
@@ -29,7 +29,7 @@ import numpy as np
 
 from pymbolic.mapper import Mapper
 from pymbolic.mapper.stringifier import StringifyMapper
-from loopy.expression import TypeInferenceMapper
+from loopy.type_inference import TypeInferenceMapper
 from loopy.kernel.data import ValueArg
 from loopy.diagnostic import LoopyError  # noqa
 from loopy.target import ASTBuilderBase
diff --git a/loopy/type_inference.py b/loopy/type_inference.py
new file mode 100644
index 000000000..b33917255
--- /dev/null
+++ b/loopy/type_inference.py
@@ -0,0 +1,486 @@
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2012-16 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import six
+
+from pymbolic.mapper import CombineMapper
+import numpy as np
+
+from loopy.tools import is_integer
+from loopy.types import NumpyType
+
+from loopy.diagnostic import (
+        LoopyError,
+        TypeInferenceFailure, DependencyTypeInferenceFailure)
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+# {{{ type inference
+
+class TypeInferenceMapper(CombineMapper):
+    def __init__(self, kernel, new_assignments=None):
+        """
+        :arg new_assignments: mapping from names to either
+            :class:`loopy.kernel.data.TemporaryVariable`
+            or
+            :class:`loopy.kernel.data.KernelArgument`
+            instances
+        """
+        self.kernel = kernel
+        if new_assignments is None:
+            new_assignments = {}
+        self.new_assignments = new_assignments
+
+    # /!\ Introduce caches with care--numpy.float32(x) and numpy.float64(x)
+    # are Python-equal (for many common constants such as integers).
+
+    def with_assignments(self, names_to_vars):
+        new_ass = self.new_assignments.copy()
+        new_ass.update(names_to_vars)
+        return type(self)(self.kernel, new_ass)
+
+    @staticmethod
+    def combine(dtypes):
+        # dtypes may just be a generator expr
+        dtypes = list(dtypes)
+
+        from loopy.types import LoopyType, NumpyType
+        assert all(isinstance(dtype, LoopyType) for dtype in dtypes)
+
+        if not all(isinstance(dtype, NumpyType) for dtype in dtypes):
+            from pytools import is_single_valued, single_valued
+            if not is_single_valued(dtypes):
+                raise TypeInferenceFailure(
+                        "Nothing known about operations between '%s'"
+                        % ", ".join(str(dt) for dt in dtypes))
+
+            return single_valued(dtypes)
+
+        dtypes = [dtype.dtype for dtype in dtypes]
+
+        result = dtypes.pop()
+        while dtypes:
+            other = dtypes.pop()
+
+            if result.fields is None and other.fields is None:
+                if (result, other) in [
+                        (np.int32, np.float32), (np.float32, np.int32)]:
+                    # numpy makes this a double. I disagree.
+                    result = np.dtype(np.float32)
+                else:
+                    result = (
+                            np.empty(0, dtype=result)
+                            + np.empty(0, dtype=other)
+                            ).dtype
+
+            elif result.fields is None and other.fields is not None:
+                # assume the non-native type takes over
+                # (This is used for vector types.)
+                result = other
+            elif result.fields is not None and other.fields is None:
+                # assume the non-native type takes over
+                # (This is used for vector types.)
+                pass
+            else:
+                if result is not other:
+                    raise TypeInferenceFailure(
+                            "nothing known about result of operation on "
+                            "'%s' and '%s'" % (result, other))
+
+        return NumpyType(result)
+
+    def map_sum(self, expr):
+        dtypes = []
+        small_integer_dtypes = []
+        for child in expr.children:
+            dtype = self.rec(child)
+            if is_integer(child) and abs(child) < 1024:
+                small_integer_dtypes.append(dtype)
+            else:
+                dtypes.append(dtype)
+
+        from pytools import all
+        if all(dtype.is_integral() for dtype in dtypes):
+            dtypes.extend(small_integer_dtypes)
+
+        return self.combine(dtypes)
+
+    map_product = map_sum
+
+    def map_quotient(self, expr):
+        n_dtype = self.rec(expr.numerator)
+        d_dtype = self.rec(expr.denominator)
+
+        if n_dtype.is_integral() and d_dtype.is_integral():
+            # both integers
+            return NumpyType(np.dtype(np.float64))
+
+        else:
+            return self.combine([n_dtype, d_dtype])
+
+    def map_constant(self, expr):
+        if is_integer(expr):
+            for tp in [np.int32, np.int64]:
+                iinfo = np.iinfo(tp)
+                if iinfo.min <= expr <= iinfo.max:
+                    return NumpyType(np.dtype(tp))
+
+            else:
+                raise TypeInferenceFailure("integer constant '%s' too large" % expr)
+
+        dt = np.asarray(expr).dtype
+        if hasattr(expr, "dtype"):
+            return NumpyType(expr.dtype)
+        elif isinstance(expr, np.number):
+            # Numpy types are sized
+            return NumpyType(np.dtype(type(expr)))
+        elif dt.kind == "f":
+            # deduce the smaller type by default
+            return NumpyType(np.dtype(np.float32))
+        elif dt.kind == "c":
+            if np.complex64(expr) == np.complex128(expr):
+                # (COMPLEX_GUESS_LOGIC)
+                # No precision is lost by 'guessing' single precision, use that.
+                # This at least covers simple cases like '1j'.
+                return NumpyType(np.dtype(np.complex64))
+
+            # Codegen for complex types depends on exactly correct types.
+            # Refuse temptation to guess.
+            raise TypeInferenceFailure("Complex constant '%s' needs to "
+                    "be sized for type inference " % expr)
+        else:
+            raise TypeInferenceFailure("Cannot deduce type of constant '%s'" % expr)
+
+    def map_subscript(self, expr):
+        return self.rec(expr.aggregate)
+
+    def map_linear_subscript(self, expr):
+        return self.rec(expr.aggregate)
+
+    def map_call(self, expr, multiple_types_ok=False):
+        from pymbolic.primitives import Variable
+
+        identifier = expr.function
+        if isinstance(identifier, Variable):
+            identifier = identifier.name
+
+        if identifier in ["indexof", "indexof_vec"]:
+            return self.kernel.index_dtype
+
+        arg_dtypes = tuple(self.rec(par) for par in expr.parameters)
+
+        mangle_result = self.kernel.mangle_function(identifier, arg_dtypes)
+        if multiple_types_ok:
+            if mangle_result is not None:
+                return mangle_result.result_dtypes
+        else:
+            if mangle_result is not None:
+                if len(mangle_result.result_dtypes) != 1 and not multiple_types_ok:
+                    raise LoopyError("functions with more or fewer than one "
+                            "return value may only be used in direct assignments")
+
+                return mangle_result.result_dtypes[0]
+
+        raise RuntimeError("unable to resolve "
+                "function '%s' with %d given arguments"
+                % (identifier, len(arg_dtypes)))
+
+    def map_variable(self, expr):
+        if expr.name in self.kernel.all_inames():
+            return self.kernel.index_dtype
+
+        result = self.kernel.mangle_symbol(
+                self.kernel.target.get_device_ast_builder(),
+                expr.name)
+
+        if result is not None:
+            result_dtype, _ = result
+            return result_dtype
+
+        obj = self.new_assignments.get(expr.name)
+
+        if obj is None:
+            obj = self.kernel.arg_dict.get(expr.name)
+
+        if obj is None:
+            obj = self.kernel.temporary_variables.get(expr.name)
+
+        if obj is None:
+            raise TypeInferenceFailure("name not known in type inference: %s"
+                    % expr.name)
+
+        from loopy.kernel.data import TemporaryVariable, KernelArgument
+        import loopy as lp
+        if isinstance(obj, TemporaryVariable):
+            result = obj.dtype
+            if result is lp.auto:
+                raise DependencyTypeInferenceFailure(
+                        "temporary variable '%s'" % expr.name,
+                        expr.name)
+            else:
+                return result
+
+        elif isinstance(obj, KernelArgument):
+            result = obj.dtype
+            if result is None:
+                raise DependencyTypeInferenceFailure(
+                        "argument '%s'" % expr.name,
+                        expr.name)
+            else:
+                return result
+
+        else:
+            raise RuntimeError("unexpected type inference "
+                    "object type for '%s'" % expr.name)
+
+    map_tagged_variable = map_variable
+
+    def map_lookup(self, expr):
+        agg_result = self.rec(expr.aggregate)
+        field = agg_result.numpy_dtype.fields[expr.name]
+        dtype = field[0]
+        return NumpyType(dtype)
+
+    def map_comparison(self, expr):
+        # "bool" is unusable because OpenCL's bool has indeterminate memory
+        # format.
+        return NumpyType(np.dtype(np.int32))
+
+    map_logical_not = map_comparison
+    map_logical_and = map_comparison
+    map_logical_or = map_comparison
+
+    def map_group_hw_index(self, expr, *args):
+        return self.kernel.index_dtype
+
+    def map_local_hw_index(self, expr, *args):
+        return self.kernel.index_dtype
+
+    def map_reduction(self, expr, multiple_types_ok=False):
+        result = expr.operation.result_dtypes(
+                self.kernel, self.rec(expr.expr), expr.inames)
+
+        if multiple_types_ok:
+            return result
+
+        else:
+            if len(result) != 1 and not multiple_types_ok:
+                raise LoopyError("reductions with more or fewer than one "
+                        "return value may only be used in direct assignments")
+
+            return result[0]
+
+# }}}
+
+
+# {{{ infer types
+
+def _infer_var_type(kernel, var_name, type_inf_mapper, subst_expander):
+    if var_name in kernel.all_params():
+        return kernel.index_dtype, []
+
+    def debug(s):
+        logger.debug("%s: %s" % (kernel.name, s))
+
+    dtypes = []
+
+    import loopy as lp
+
+    symbols_with_unavailable_types = []
+
+    from loopy.diagnostic import DependencyTypeInferenceFailure
+    for writer_insn_id in kernel.writer_map().get(var_name, []):
+        writer_insn = kernel.id_to_insn[writer_insn_id]
+        if not isinstance(writer_insn, lp.MultiAssignmentBase):
+            continue
+
+        expr = subst_expander(writer_insn.expression)
+
+        try:
+            debug("             via expr %s" % expr)
+            if isinstance(writer_insn, lp.Assignment):
+                result = type_inf_mapper(expr)
+            elif isinstance(writer_insn, lp.CallInstruction):
+                result_dtypes = type_inf_mapper(expr, multiple_types_ok=True)
+
+                result = None
+                for assignee, comp_dtype in zip(
+                        writer_insn.assignee_var_names(), result_dtypes):
+                    if assignee == var_name:
+                        result = comp_dtype
+                        break
+
+                assert result is not None
+
+            debug("             result: %s" % result)
+
+            dtypes.append(result)
+
+        except DependencyTypeInferenceFailure as e:
+            debug("             failed: %s" % e)
+            symbols_with_unavailable_types.append(e.symbol)
+            #dtypes = None
+            #break
+
+    if not dtypes:
+        return None, symbols_with_unavailable_types
+
+    result = type_inf_mapper.combine(dtypes)
+
+    return result, []
+
+
+class _DictUnionView:
+    def __init__(self, children):
+        self.children = children
+
+    def get(self, key):
+        try:
+            return self[key]
+        except KeyError:
+            return None
+
+    def __getitem__(self, key):
+        for ch in self.children:
+            try:
+                return ch[key]
+            except KeyError:
+                pass
+
+        raise KeyError(key)
+
+
+def infer_unknown_types(kernel, expect_completion=False):
+    """Infer types on temporaries and arguments."""
+
+    logger.debug("%s: infer types" % kernel.name)
+
+    def debug(s):
+        logger.debug("%s: %s" % (kernel.name, s))
+
+    unexpanded_kernel = kernel
+    if kernel.substitutions:
+        from loopy.transform.subst import expand_subst
+        kernel = expand_subst(kernel)
+
+    new_temp_vars = kernel.temporary_variables.copy()
+    new_arg_dict = kernel.arg_dict.copy()
+
+    # {{{ fill queue
+
+    # queue contains temporary variables
+    queue = []
+
+    import loopy as lp
+    for tv in six.itervalues(kernel.temporary_variables):
+        if tv.dtype is lp.auto:
+            queue.append(tv)
+
+    for arg in kernel.args:
+        if arg.dtype is None:
+            queue.append(arg)
+
+    # }}}
+
+    type_inf_mapper = TypeInferenceMapper(kernel,
+            _DictUnionView([
+                new_temp_vars,
+                new_arg_dict
+                ]))
+
+    from loopy.symbolic import SubstitutionRuleExpander
+    subst_expander = SubstitutionRuleExpander(kernel.substitutions)
+
+    # {{{ work on type inference queue
+
+    from loopy.kernel.data import TemporaryVariable, KernelArgument
+
+    failed_names = set()
+    while queue:
+        item = queue.pop(0)
+
+        debug("inferring type for %s %s" % (type(item).__name__, item.name))
+
+        result, symbols_with_unavailable_types = \
+                _infer_var_type(kernel, item.name, type_inf_mapper, subst_expander)
+
+        failed = result is None
+        if not failed:
+            debug("     success: %s" % result)
+            if isinstance(item, TemporaryVariable):
+                new_temp_vars[item.name] = item.copy(dtype=result)
+            elif isinstance(item, KernelArgument):
+                new_arg_dict[item.name] = item.copy(dtype=result)
+            else:
+                raise LoopyError("unexpected item type in type inference")
+        else:
+            debug("     failure")
+
+        if failed:
+            if item.name in failed_names:
+                # this item has failed before, give up.
+                advice = ""
+                if symbols_with_unavailable_types:
+                    advice += (
+                            " (need type of '%s'--check for missing arguments)"
+                            % ", ".join(symbols_with_unavailable_types))
+
+                if expect_completion:
+                    raise LoopyError(
+                            "could not determine type of '%s'%s"
+                            % (item.name, advice))
+
+                else:
+                    # We're done here.
+                    break
+
+            # remember that this item failed
+            failed_names.add(item.name)
+
+            queue_names = set(qi.name for qi in queue)
+
+            if queue_names == failed_names:
+                # We did what we could...
+                print(queue_names, failed_names, item.name)
+                assert not expect_completion
+                break
+
+            # can't infer type yet, put back into queue
+            queue.append(item)
+        else:
+            # we've made progress, reset failure markers
+            failed_names = set()
+
+    # }}}
+
+    return unexpanded_kernel.copy(
+            temporary_variables=new_temp_vars,
+            args=[new_arg_dict[arg.name] for arg in kernel.args],
+            )
+
+# }}}
+
+# vim: foldmethod=marker
-- 
GitLab


From 2106baa3ae1f7af9f5fba2d473ffc3d41bd005e9 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 9 Nov 2016 17:31:58 -0600
Subject: [PATCH 2/2] Fix straggler type inf import

---
 loopy/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loopy/statistics.py b/loopy/statistics.py
index 47abfe53a..a4662f8d7 100755
--- a/loopy/statistics.py
+++ b/loopy/statistics.py
@@ -114,7 +114,7 @@ class ExpressionOpCounter(CombineMapper):
 
     def __init__(self, knl):
         self.knl = knl
-        from loopy.expression import TypeInferenceMapper
+        from loopy.type_inference import TypeInferenceMapper
         self.type_inf = TypeInferenceMapper(knl)
 
     def combine(self, values):
@@ -245,7 +245,7 @@ class GlobalSubscriptCounter(CombineMapper):
 
     def __init__(self, knl):
         self.knl = knl
-        from loopy.expression import TypeInferenceMapper
+        from loopy.type_inference import TypeInferenceMapper
         self.type_inf = TypeInferenceMapper(knl)
 
     def combine(self, values):
-- 
GitLab