From 3c4aa273dfc3855d9675e9c41deaeddf8897d106 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 29 May 2013 10:56:28 -0400
Subject: [PATCH] Automatically deduce domain parameters, if possible.

---
 MEMO                     |  6 ++---
 loopy/compiled.py        |  3 +++
 loopy/kernel/__init__.py |  5 ++++
 loopy/kernel/tools.py    | 57 ++++++++++++++++++++++++++++++++++++++++
 loopy/symbolic.py        | 56 +++------------------------------------
 test/test_loopy.py       |  2 +-
 6 files changed, 73 insertions(+), 56 deletions(-)

diff --git a/MEMO b/MEMO
index 839979e9a..a92570307 100644
--- a/MEMO
+++ b/MEMO
@@ -107,9 +107,6 @@ Future ideas
 
 - Float4 joining on fetch/store?
 
-- How can one automatically generate something like microblocks?
-  -> Some sort of axis-adding transform?
-
 - Better for loop bound generation
   -> Try a triangular loop
 
@@ -134,6 +131,9 @@ Future ideas
 Dealt with
 ^^^^^^^^^^
 
+- How can one automatically generate something like microblocks?
+  -> Some sort of axis-adding transform?
+
 - ExpandingIdentityMapper
   extract_subst -> needs WalkMapper [actually fine as is]
   padding [DONE]
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 2652d332f..ef6743570 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -298,6 +298,9 @@ class CompiledKernel:
 
         import loopy as lp
 
+        kwargs.update(
+                kernel.domain_parameter_finder()(kwargs))
+
         domain_parameters = dict((name, int(kwargs[name]))
                 for name in kernel.scalar_loop_args)
 
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index ac42438d4..175bf1d48 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -861,6 +861,11 @@ class LoopKernel(Record):
 
     # }}}
 
+    @memoize_method
+    def domain_parameter_finder(self):
+        from loopy.kernel.tools import DomainParameterFinder
+        return DomainParameterFinder(self)
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py
index 560745084..dadd2c04e 100644
--- a/loopy/kernel/tools.py
+++ b/loopy/kernel/tools.py
@@ -291,4 +291,61 @@ def get_dot_dependency_graph(kernel, iname_cluster=False, iname_edge=True):
 
 # }}}
 
+# {{{ domain parameter finder
+
+class DomainParameterFinder:
+    """Finds parameters from shapes of passed arguments."""
+
+    def __init__(self, kernel):
+        # a mapping from parameter names to a list of tuples
+        # (arg_name, axis_nr, function), where function is a
+        # unary function of kernel.arg_dict[arg_name].shape[axis_nr]
+        # returning the desired parameter.
+        self.param_to_sources = param_to_sources = {}
+
+        param_names = kernel.all_params()
+
+        from loopy.kernel.data import GlobalArg
+        from loopy.symbolic import DependencyMapper
+        from pymbolic import compile
+        dep_map = DependencyMapper()
+
+        from pymbolic import var
+        for arg in kernel.args:
+            if isinstance(arg, GlobalArg):
+                for axis_nr, shape_i in enumerate(arg.shape):
+                    deps = dep_map(shape_i)
+                    if len(deps) == 1:
+                        dep, = deps
+
+                        if dep.name in param_names:
+                            from pymbolic.algorithm import solve_affine_equations_for
+                            try:
+                                # friggin' overkill :)
+                                param_expr = solve_affine_equations_for(
+                                        [dep.name], [(shape_i, var("shape_i"))]) \
+                                                [dep.name]
+                            except:
+                                # went wrong? oh well
+                                pass
+                            else:
+                                param_func = compile(param_expr, ["shape_i"])
+                                param_to_sources.setdefault(dep.name, []).append(
+                                        (arg.name, axis_nr, param_func))
+
+    def __call__(self, kwargs):
+        result = {}
+
+        for param_name, sources in self.param_to_sources.iteritems():
+            if param_name not in kwargs:
+                for arg_name, axis_nr, shape_func in sources:
+                    if arg_name in kwargs:
+                        result[param_name] = shape_func(
+                                kwargs[arg_name].shape[axis_nr])
+                        continue
+
+        return result
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index 7267d7be6..b6a389e46 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -47,6 +47,8 @@ from pymbolic.mapper.stringifier import \
         StringifyMapper as StringifyMapperBase
 from pymbolic.mapper.dependency import \
         DependencyMapper as DependencyMapperBase
+from pymbolic.mapper.coefficient import \
+        CoefficientCollector as CoefficientCollectorBase
 from pymbolic.mapper.unifier import UnidirectionalUnifier \
         as UnidirectionalUnifierBase
 
@@ -666,58 +668,8 @@ def parse(expr_str):
 
 # {{{ coefficient collector
 
-class CoefficientCollector(RecursiveMapper):
-    def map_sum(self, expr):
-        stride_dicts = [self.rec(ch) for ch in expr.children]
-
-        result = {}
-        for stride_dict in stride_dicts:
-            for var, stride in stride_dict.iteritems():
-                if var in result:
-                    result[var] += stride
-                else:
-                    result[var] = stride
-
-        return result
-
-    def map_product(self, expr):
-        result = {}
-
-        children_coeffs = [self.rec(child) for child in expr.children]
-
-        idx_of_child_with_vars = None
-        for i, child_coeffs in enumerate(children_coeffs):
-            for k in child_coeffs:
-                if isinstance(k, str):
-                    if (idx_of_child_with_vars is not None
-                            and idx_of_child_with_vars != i):
-                        raise RuntimeError(
-                                "nonlinear expression")
-                    idx_of_child_with_vars = i
-
-        other_coeffs = 1
-        for i, child_coeffs in enumerate(children_coeffs):
-            if i != idx_of_child_with_vars:
-                assert len(child_coeffs) == 1
-                other_coeffs *= child_coeffs[1]
-
-        if idx_of_child_with_vars is None:
-            return {1: other_coeffs}
-        else:
-            return dict(
-                    (var, other_coeffs*coeff)
-                    for var, coeff in
-                    children_coeffs[idx_of_child_with_vars].iteritems())
-
-        return result
-
-    def map_constant(self, expr):
-        return {1: expr}
-
-    def map_variable(self, expr):
-        return {expr.name: 1}
-
-    map_tagged_variable = map_variable
+class CoefficientCollector(CoefficientCollectorBase):
+    map_tagged_variable = CoefficientCollectorBase.map_variable
 
     def map_subscript(self, expr):
         raise RuntimeError("cannot gather coefficients--indirect addressing in use")
diff --git a/test/test_loopy.py b/test/test_loopy.py
index bc90c6906..dd44dae52 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1201,7 +1201,7 @@ def test_array_with_offset(ctx_factory):
     a = a_full[3:10]
 
     print cknl.get_highlighted_code({"a": a.dtype}, {"a": True, "b": False})
-    evt, (b,) = cknl(queue, a=a, n=a.shape[0], m=a.shape[1])
+    evt, (b,) = cknl(queue, a=a)
 
     import numpy.linalg as la
     assert la.norm(b.get() - 2*a.get()) < 1e-13
-- 
GitLab