diff --git a/loopy/diagnostic.py b/loopy/diagnostic.py index 512e4ac8619f33856d0a8ed929de0b574f7da014..16e859e0be38808e5af9e26af5cf540547b704f3 100644 --- a/loopy/diagnostic.py +++ b/loopy/diagnostic.py @@ -107,6 +107,10 @@ class UnscheduledInstructionError(LoopyError): class ReductionIsNotTriangularError(LoopyError): pass + +class ExpressionNotAffineError(LoopyError): + pass + # }}} diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 96933f57a003aaca58ed00d2d73c3301b0c448c7..a4e6036cbac0235590d7cc66a201c47ac87d6030 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -473,7 +473,7 @@ class TemporaryVariable(ArrayBase): return False elif self.scope == temp_var_scope.GLOBAL: raise LoopyError("TemporaryVariable.is_local called on " - "global temporary variable '%s'" % self.name) + "global temporary variable '%s'" % self.name) else: raise LoopyError("unexpected value of TemporaryVariable.scope") diff --git a/loopy/statistics.py b/loopy/statistics.py index 21c8be307af15782ab116c9572062348a3c4b4a1..a2dcb684620e5cceb821990b5085f5283d252af1 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -30,7 +30,8 @@ import islpy as isl from pytools import memoize_in from pymbolic.mapper import CombineMapper from functools import reduce -from loopy.kernel.data import MultiAssignmentBase +from loopy.kernel.data import ( + MultiAssignmentBase, TemporaryVariable, temp_var_scope) from loopy.diagnostic import warn_with_kernel, LoopyError @@ -794,7 +795,8 @@ class LocalMemAccessCounter(MemAccessCounter): sub_map = ToCountMap() if name in self.knl.temporary_variables: array = self.knl.temporary_variables[name] - if array.is_local: + if isinstance(array, TemporaryVariable) and ( + array.scope == temp_var_scope.LOCAL): sub_map[MemAccess(mtype='local', dtype=dtype)] = 1 return sub_map @@ -902,7 +904,13 @@ class GlobalMemAccessCounter(MemAccessCounter): for idx, axis_tag in zip(index, array.dim_tags): from loopy.symbolic import simplify_using_aff - coeffs = CoefficientCollector()(simplify_using_aff(self.knl, idx)) + from loopy.diagnostic import ExpressionNotAffineError + try: + coeffs = CoefficientCollector()( + simplify_using_aff(self.knl, idx)) + except ExpressionNotAffineError: + total_stride = None + break # check if he contains the lid 0 guy try: coeff_min_lid = coeffs[Variable(min_lid)] @@ -1319,37 +1327,47 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False): access_counter_g = GlobalMemAccessCounter(knl) access_counter_l = LocalMemAccessCounter(knl) + from loopy.kernel.instruction import ( + CallInstruction, CInstruction, Assignment, + NoOpInstruction, BarrierInstruction) + for insn in knl.instructions: - access_expr = ( - access_counter_g(insn.expression) - + access_counter_l(insn.expression) - ).with_set_attributes(direction="load") - - access_assignee_g = access_counter_g(insn.assignee).with_set_attributes( - direction="store") - - # FIXME: (!!!!) for now, don't count writes to local mem - - # use count excluding local index tags for uniform accesses - for key, val in six.iteritems(access_expr.count_map): - is_uniform = (key.mtype == 'global' and - isinstance(key.stride, int) and - key.stride == 0) - access_map = ( - access_map - + ToCountMap({key: val}) - * get_insn_count(knl, insn.id, is_uniform)) - #currently not counting stride of local mem access - - for key, val in six.iteritems(access_assignee_g.count_map): - is_uniform = (key.mtype == 'global' and - isinstance(key.stride, int) and - key.stride == 0) - access_map = ( - access_map - + ToCountMap({key: val}) - * get_insn_count(knl, insn.id, is_uniform)) - # for now, don't count writes to local mem + if isinstance(insn, (CallInstruction, CInstruction, Assignment)): + access_expr = ( + access_counter_g(insn.expression) + + access_counter_l(insn.expression) + ).with_set_attributes(direction="load") + + access_assignee_g = access_counter_g(insn.assignee).with_set_attributes( + direction="store") + + # FIXME: (!!!!) for now, don't count writes to local mem + + # use count excluding local index tags for uniform accesses + for key, val in six.iteritems(access_expr.count_map): + is_uniform = (key.mtype == 'global' and + isinstance(key.stride, int) and + key.stride == 0) + access_map = ( + access_map + + ToCountMap({key: val}) + * get_insn_count(knl, insn.id, is_uniform)) + #currently not counting stride of local mem access + + for key, val in six.iteritems(access_assignee_g.count_map): + is_uniform = (key.mtype == 'global' and + isinstance(key.stride, int) and + key.stride == 0) + access_map = ( + access_map + + ToCountMap({key: val}) + * get_insn_count(knl, insn.id, is_uniform)) + # for now, don't count writes to local mem + elif isinstance(insn, (NoOpInstruction, BarrierInstruction)): + pass + else: + raise NotImplementedError("unexpected instruction item type: '%s'" + % type(insn).__name__) if numpy_types: # FIXME: Don't modify in-place diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 2d31c63ef13774599de27ae871be64bc5acb7514..9e16c3a598246aa71e125ce3d04f372d7c90f28e 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1177,7 +1177,9 @@ class CoefficientCollector(CoefficientCollectorBase): map_tagged_variable = CoefficientCollectorBase.map_variable def map_subscript(self, expr): - raise RuntimeError("cannot gather coefficients--indirect addressing in use") + from loopy.diagnostic import ExpressionNotAffineError + raise ExpressionNotAffineError("cannot gather coefficients--" + "indirect addressing in use") # }}}