From 128d1c4cc58fa922c7413cc263fed37446db1c4d Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 21 Nov 2016 15:32:57 +0100 Subject: [PATCH 1/5] Add a first implementation of an FMA node * Add a node 'FusedMultiplyAdd' to loopy.symbolic * Implement handlers all over the place * In textual language, have a function fma(a,b,c) which is translated to the node * Throw away the node in favor of sum/product in ExpressionToCExpressionMapper Not yet implemented (only as a dummy): type inference --- loopy/symbolic.py | 49 +++++++++++++++++++++++++++- loopy/target/c/codegen/expression.py | 6 ++++ loopy/type_inference.py | 4 +++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 74bb5c1d1..e49309d84 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -109,6 +109,12 @@ class IdentityMapperMixin(object): map_rule_argument = map_group_hw_index + def map_fused_multiply_add(self, expr, *args): + return FusedMultiplyAdd(self.rec(expr.mul_op1), + self.rec(expr.mul_op2), + self.rec(expr.add_op), + ) + class IdentityMapper(IdentityMapperBase, IdentityMapperMixin): pass @@ -151,6 +157,11 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index + def map_fused_multiply_add(self, expr, *args): + self.rec(expr.mul_op1, *args) + self.rec(expr.mul_op2, *args) + self.rec(expr.add_op, *args) + class CallbackMapper(CallbackMapperBase, IdentityMapper): map_reduction = CallbackMapperBase.map_constant @@ -209,6 +220,12 @@ class StringifyMapper(StringifyMapperBase): def map_rule_argument(self, expr, enclosing_prec): return "" % expr.index + def map_fused_multiply_add(self, expr, enclosing_prec): + from pymbolic.mapper.stringifier import PREC_NONE + return "fma(%s*%s+%s)" % (self.rec(expr.mul_op1, PREC_NONE), + self.rec(expr.mul_op2, PREC_NONE), + self.rec(expr.add_op, PREC_NONE)) + class UnidirectionalUnifier(UnidirectionalUnifierBase): def map_reduction(self, expr, other, unis): @@ -263,6 +280,12 @@ class DependencyMapper(DependencyMapperBase): map_linear_subscript = DependencyMapperBase.map_subscript + def map_fused_multiply_add(self, expr): + return self.combine((self.rec(expr.mul_op1), + self.rec(expr.mul_op2), + self.rec(expr.add_op) + )) + class SubstitutionRuleExpander(IdentityMapper): def __init__(self, rules): @@ -540,6 +563,25 @@ class RuleArgument(Expression): mapper_method = intern("map_rule_argument") + +class FusedMultiplyAdd(Expression): + """ Represents an FMA operation """ + + init_arg_names = ("mul_op1", "mul_op2", "add_op") + + def __init__(self, mul_op1, mul_op2, add_op): + self.mul_op1 = mul_op1 + self.mul_op2 = mul_op2 + self.add_op = add_op + + def __getinitargs__(self): + return (self.mul_op1, self.mul_op2, self.add_op) + + def stringifier(self): + return StringifyMapper + + mapper_method = intern("map_fused_multiply_add") + # }}} @@ -914,7 +956,7 @@ class VarToTaggedVarMapper(IdentityMapper): class FunctionToPrimitiveMapper(IdentityMapper): - """Looks for invocations of a function called 'cse' or 'reduce' and + """Looks for invocations of a function called 'cse', 'reduce' or 'fma' and turns those into the actual pymbolic primitives used for that. """ @@ -982,6 +1024,11 @@ class FunctionToPrimitiveMapper(IdentityMapper): else: raise TypeError("if takes three arguments") + elif name == 'fma': + if len(expr.parameters) == 3: + return FusedMultiplyAdd(*tuple(self.rec(p) for p in expr.parameters)) + else: + raise TypeError("FMA takes 3 arguments: fma(a,b,c) -> a*b + c") else: # see if 'name' is an existing reduction op diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index bd5a74782..2a2621a90 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -658,6 +658,12 @@ class ExpressionToCExpressionMapper(IdentityMapper): return base_impl(expr, type_context) + def map_fused_multiply_add(self, expr, type_context): + mul_op1 = self.rec(expr.mul_op1, type_context) + mul_op2 = self.rec(expr.mul_op2, type_context) + add_op = self.rec(expr.add_op, type_context) + return mul_op1 * mul_op2 + add_op + # }}} def map_group_hw_index(self, expr, type_context): diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 16be9605c..40ee9474b 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -374,6 +374,10 @@ class TypeInferenceMapper(CombineMapper): return [result[0]] + # TODO This is a dummy implementation! + def map_fused_multiply_add(self, expr): + return self.rec(expr.mul_op1) + # }}} -- GitLab From d48873a642cdc51db800eef0e1e55c27b768acce Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 21 Nov 2016 18:44:29 +0100 Subject: [PATCH 2/5] Call self.visit in WalkMapper handler for FMA node --- loopy/symbolic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index e49309d84..55081280c 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -158,6 +158,9 @@ class WalkMapper(WalkMapperBase): map_rule_argument = map_group_hw_index def map_fused_multiply_add(self, expr, *args): + if not self.visit(expr): + return + self.rec(expr.mul_op1, *args) self.rec(expr.mul_op2, *args) self.rec(expr.add_op, *args) -- GitLab From 42e33b69ea896e493e6b637f9b354632d4bd5a76 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Tue, 22 Nov 2016 14:33:24 +0100 Subject: [PATCH 3/5] Do not bypass map_sum and map_product of ExpressionToCExpressionMapper --- loopy/target/c/codegen/expression.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 2a2621a90..74d58c197 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -659,10 +659,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): return base_impl(expr, type_context) def map_fused_multiply_add(self, expr, type_context): - mul_op1 = self.rec(expr.mul_op1, type_context) - mul_op2 = self.rec(expr.mul_op2, type_context) - add_op = self.rec(expr.add_op, type_context) - return mul_op1 * mul_op2 + add_op + return self.rec(expr.mul_op1 * expr.mul_op2 + expr.add_op, type_context) # }}} -- GitLab From 74cf9f50ff0582b7d9a7c699a502f5868fce3cf3 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Thu, 9 Mar 2017 14:34:34 +0100 Subject: [PATCH 4/5] [bugfix] Pass arguments into recursion IdentityMapper.map_fused_multiply_add --- loopy/symbolic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 55081280c..25f0e15bb 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -110,9 +110,9 @@ class IdentityMapperMixin(object): map_rule_argument = map_group_hw_index def map_fused_multiply_add(self, expr, *args): - return FusedMultiplyAdd(self.rec(expr.mul_op1), - self.rec(expr.mul_op2), - self.rec(expr.add_op), + return FusedMultiplyAdd(self.rec(expr.mul_op1, *args), + self.rec(expr.mul_op2, *args), + self.rec(expr.add_op, *args), ) -- GitLab From 869ebc8c8eb61f816ea8dabe0d62b93b56f21012 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Thu, 9 Mar 2017 14:36:02 +0100 Subject: [PATCH 5/5] Add VectorizabilityChecker mapper implementation of FMA node --- loopy/expression.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/expression.py b/loopy/expression.py index 3269bc09f..ec52cb9d2 100644 --- a/loopy/expression.py +++ b/loopy/expression.py @@ -175,6 +175,9 @@ class VectorizabilityChecker(RecursiveMapper): # FIXME: Do this more carefully raise Unvectorizable() + def vectorizability_map_fused_multiply_add(self, expr): + return all((self.rec(expr.mul_op1), self.rec(expr.mul_op2), self.rec(expr.add_op))) + # }}} # vim: fdm=marker -- GitLab