diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 6224d9709f5f796f84c3fd177125e0703d92d173..3ef7c8f6ad6c8af09dd01bf9e1341179d2be0be7 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -126,10 +126,13 @@ def generate_assignment_instruction_code(codegen_state, insn): # }}} - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript lhs = insn.assignee + if isinstance(lhs, Lookup): + lhs = lhs.aggregate + if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () @@ -145,6 +148,8 @@ def generate_assignment_instruction_code(codegen_state, insn): else: raise RuntimeError("invalid lvalue '%s'" % lhs) + del lhs + result = codegen_state.ast_builder.emit_assignment(codegen_state, insn) # {{{ tracing diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index a334462049634fff1e3137ffd09acd3ef254bb51..0110a06095fa0bd690045f050136027d7bed3a28 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -465,12 +465,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): else: inner_ast = inner.current_ast(codegen_state) + + from loopy.isl_helpers import simplify_pw_aff + result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, - pw_aff_to_expr(lbound), pw_aff_to_expr(ubound), inner_ast))) + pw_aff_to_expr(simplify_pw_aff(lbound, kernel.assumptions)), + pw_aff_to_expr(simplify_pw_aff(ubound, kernel.assumptions)), + inner_ast))) return merge_codegen_results(codegen_state, result) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 602830de38e457c5ff4a55d7685dc346a7b4de35..0ebe90fbca0d31c05eaee64321e2b73709292331 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -142,6 +142,55 @@ def iname_rel_aff(space, iname, rel, aff): raise ValueError("unknown value of 'rel': %s" % rel) +# {{{ simplify_pw_aff + +def simplify_pw_aff(pw_aff, context=None): + if context is not None: + pw_aff = pw_aff.gist_params(context) + + old_pw_aff = pw_aff + + while True: + restart = False + did_something = False + + pieces = pw_aff.get_pieces() + for i, (dom_i, aff_i) in enumerate(pieces): + for j, (dom_j, aff_j) in enumerate(pieces): + if i == j: + continue + + if aff_i.gist(dom_j).is_equal(aff_j): + # aff_i is sufficient to conver aff_j, eliminate aff_j + new_pieces = pieces[:] + if i < j: + new_pieces.pop(j) + new_pieces.pop(i) + else: + new_pieces.pop(i) + new_pieces.pop(j) + + pw_aff = isl.PwAff.alloc(dom_i | dom_j, aff_i) + for dom, aff in new_pieces: + pw_aff = pw_aff.union_max(isl.PwAff.alloc(dom, aff)) + + restart = True + did_something = True + break + + if restart: + break + + if not did_something: + break + + assert pw_aff.get_aggregate_domain() <= pw_aff.eq_set(old_pw_aff) + + return pw_aff + +# }}} + + # {{{ static_*_of_pw_aff def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 6eedfcc20e7e59f129c8f19e2d96c07a80714533..14b18150f5b84218f39ba23662eb6106ffb596a0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -448,7 +448,7 @@ def parse_insn(groups, insn_options): "the following error occurred:" % groups["rhs"]) raise - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import TypeAnnotation if not isinstance(lhs, tuple): @@ -469,11 +469,15 @@ def parse_insn(groups, insn_options): else: temp_var_types.append(None) + inner_lhs_i = lhs_i + if isinstance(inner_lhs_i, Lookup): + inner_lhs_i = inner_lhs_i.aggregate + from loopy.symbolic import LinearSubscript - if isinstance(lhs_i, Variable): - assignee_names.append(lhs_i.name) - elif isinstance(lhs_i, (Subscript, LinearSubscript)): - assignee_names.append(lhs_i.aggregate.name) + if isinstance(inner_lhs_i, Variable): + assignee_names.append(inner_lhs_i.name) + elif isinstance(inner_lhs_i, (Subscript, LinearSubscript)): + assignee_names.append(inner_lhs_i.aggregate.name) else: raise LoopyError("left hand side of assignment '%s' must " "be variable or subscript" % (lhs_i,)) @@ -1638,11 +1642,11 @@ def _resolve_dependencies(knl, insn, deps): new_deps.append(other_insn.id) found_any = True - if not found_any: + if not found_any and knl.options.check_dep_resolution: raise LoopyError("instruction '%s' declared a depency on '%s', " "which did not resolve to any instruction present in the " - "kernel '%s'" - % (insn.id, dep, knl.name)) + "kernel '%s'. Set the kernel option 'check_dep_resolution'" + "to False to disable this check." % (insn.id, dep, knl.name)) for dep_id in new_deps: if dep_id not in knl.id_to_insn: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index fdd8f1d3764ec03ca40a8338dc512b8cd2ae38cf..0d22dbb88ed99c7c92480d1d39b924cc2198cc3f 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -455,9 +455,12 @@ class InstructionBase(ImmutableRecord): def _get_assignee_var_name(expr): - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript + if isinstance(expr, Lookup): + expr = expr.aggregate + if isinstance(expr, Variable): return expr.name @@ -477,9 +480,12 @@ def _get_assignee_var_name(expr): def _get_assignee_subscript_deps(expr): - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript, get_dependencies + if isinstance(expr, Lookup): + expr = expr.aggregate + if isinstance(expr, Variable): return frozenset() elif isinstance(expr, Subscript): @@ -770,9 +776,9 @@ class Assignment(MultiAssignmentBase): if isinstance(expression, str): expression = parse(expression) - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript - if not isinstance(assignee, (Variable, Subscript, LinearSubscript)): + if not isinstance(assignee, (Variable, Subscript, LinearSubscript, Lookup)): raise LoopyError("invalid lvalue '%s'" % assignee) self.assignee = assignee @@ -993,6 +999,11 @@ class CallInstruction(MultiAssignmentBase): if field_name in ["assignees", "expression"]: key_builder.update_for_pymbolic_expression( key_hash, getattr(self, field_name)) + elif field_name == "predicates": + preds = sorted(self.predicates, key=str) + for pred in preds: + key_builder.update_for_pymbolic_expression( + key_hash, pred) else: key_builder.rec(key_hash, getattr(self, field_name)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 539bfbed06572b07491c215770a0330963764d1d..2033425236836ecf000d6c341c46dcb8b087a29a 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1341,17 +1341,30 @@ def draw_dependencies_as_unicode_arrows( uniform_length = min(n_columns[0], max_columns) + added_ellipsis = [False] + def conform_to_uniform_length(s): if len(s) <= uniform_length: return s + " "*(uniform_length-len(s)) else: - return s[:uniform_length] + "..." + added_ellipsis[0] = True + return s[:uniform_length] + u"…" - return [ + rows = [ (conform_to_uniform_length(row), conform_to_uniform_length(extender)) for row, extender in rows] + if added_ellipsis[0]: + uniform_length += 1 + + rows = [ + (conform_to_uniform_length(row), + conform_to_uniform_length(extender)) + for row, extender in rows] + + return rows + # }}} # vim: foldmethod=marker diff --git a/loopy/options.py b/loopy/options.py index 7c778681dced904b31e0cc39cff529b8c026640d..25bb7014ce07a30c49f7f78d5a6325eaba36291d 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -95,6 +95,11 @@ class Options(ImmutableRecord): determining whether an iname duplication is necessary for the kernel to be schedulable. + .. attribute:: check_dep_resolution + + Whether loopy should issue an error if a dependency + expression does not match any instructions in the kernel. + .. rubric:: Invocation-related options .. attribute:: skip_arg_checks @@ -200,6 +205,7 @@ class Options(ImmutableRecord): allow_terminal_colors_def), disable_global_barriers=kwargs.get("disable_global_barriers", False), + check_dep_resolution=kwargs.get("check_dep_resolution", True), ) # {{{ legacy compatibility diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 2b6d97c38a12b47e5b4653297c18b24c40ed938b..0d8e771954cf26cc11747e745946389420fa5e1b 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -539,7 +539,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on.add(prev_id) new_insn_add_no_sync_with.add((prev_id, "any")) - new_insn_add_within_inames.add(stage_exec_iname or base_exec_iname) + new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname) if nresults == 1: assert len(acc_vars) == 1 diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 88e3898e2cceeeb62edea306283fcb718c3b088d..e7a86300f9d040cba1688e5bb0f3dcbbd926f783 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -63,7 +63,7 @@ class _BatchVariableChanger(RuleAwareIdentityMapper): if not self.needs_batch_subscript(expr.aggregate.name): return super(_BatchVariableChanger, self).map_subscript(expr, expn_state) - idx = expr.index + idx = self.rec(expr.index, expn_state) if not isinstance(idx, tuple): idx = (idx,) @@ -73,7 +73,7 @@ class _BatchVariableChanger(RuleAwareIdentityMapper): if not self.needs_batch_subscript(expr.name): return super(_BatchVariableChanger, self).map_variable(expr, expn_state) - return expr.aggregate[self.batch_iname_expr] + return expr[self.batch_iname_expr] def _add_unique_dim_name(name, dim_names): diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c35b5064365293ac78cdd01af537c9d28bd67193..ea90abfe27c8de69daf39021b3d0ea5463a2e4c8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -986,8 +986,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Emit a warning that we needed boostable_into from warnings import warn from loopy.diagnostic import LoopyWarning - warn("Kernel '%s' required the deprecated 'boostable_into" - "field in order to be schedulable!" % knl.name, LoopyWarning) + warn("Kernel '%s' required the deprecated 'boostable_into' " + "instruction attribute in order to be schedulable!" % knl.name, + LoopyWarning) # Return to avoid yielding the duplication # options without boostable_into @@ -1198,7 +1199,8 @@ class _ReductionSplitter(RuleAwareIdentityMapper): return Reduction(expr.operation, tuple(self.inames), Reduction(expr.operation, tuple(leftover_inames), self.rec(expr.expr, expn_state), - expr.allow_simultaneous)) + expr.allow_simultaneous), + expr.allow_simultaneous) else: assert False else: diff --git a/loopy/version.py b/loopy/version.py index 5c6ad47f8571ceb9100f4f7f8dece9d80a35d10c..77d0e21bdd2ef5383c5f874656c25fe1ede21a70 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v59-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v60-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index ec6dd5d3fc802918297942a2c907af9df615f239..82994c386062cf4eb4bf691f8464c56ae0d398c0 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2152,6 +2152,41 @@ def test_global_barrier_error_if_unordered(): knl.global_barrier_order +def test_struct_assignment(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + bbhit = np.dtype([ + ("tmin", np.float32), + ("tmax", np.float32), + ("bi", np.int32), + ("hit", np.int32)]) + + bbhit, bbhit_c_decl = cl.tools.match_dtype_to_c_struct( + ctx.devices[0], "bbhit", bbhit) + bbhit = cl.tools.get_or_register_dtype('bbhit', bbhit) + + preamble = bbhit_c_decl + + knl = lp.make_kernel( + "{ [i]: 0<=i<N }", + """ + for i + result[i].hit = i % 2 + result[i].tmin = i + result[i].tmax = i+10 + result[i].bi = i + end + """, + [ + lp.GlobalArg("result", shape=("N",), dtype=bbhit), + "..."], + preambles=[("000", preamble)]) + + knl = lp.set_options(knl, write_cl=True) + knl(queue, N=200) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff --git a/test/test_reduction.py b/test/test_reduction.py index 5887df7a628c46fbf09539fdd48c08aaacd8e409..86e72c0c6644b7b9837a6d74da756c58344b1d6f 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -181,7 +181,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory): # FIXME: Actually test functionality. -@pytest.mark.parametrize("size", [128, 5, 113, 67]) +@pytest.mark.parametrize("size", [128, 5, 113, 67, 1]) def test_local_parallel_reduction(ctx_factory, size): ctx = ctx_factory()