diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 6224d9709f5f796f84c3fd177125e0703d92d173..3ef7c8f6ad6c8af09dd01bf9e1341179d2be0be7 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -126,10 +126,13 @@ def generate_assignment_instruction_code(codegen_state, insn): # }}} - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript lhs = insn.assignee + if isinstance(lhs, Lookup): + lhs = lhs.aggregate + if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () @@ -145,6 +148,8 @@ def generate_assignment_instruction_code(codegen_state, insn): else: raise RuntimeError("invalid lvalue '%s'" % lhs) + del lhs + result = codegen_state.ast_builder.emit_assignment(codegen_state, insn) # {{{ tracing diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index a334462049634fff1e3137ffd09acd3ef254bb51..0110a06095fa0bd690045f050136027d7bed3a28 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -465,12 +465,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index): else: inner_ast = inner.current_ast(codegen_state) + + from loopy.isl_helpers import simplify_pw_aff + result.append( inner.with_new_ast( codegen_state, astb.emit_sequential_loop( codegen_state, loop_iname, kernel.index_dtype, - pw_aff_to_expr(lbound), pw_aff_to_expr(ubound), inner_ast))) + pw_aff_to_expr(simplify_pw_aff(lbound, kernel.assumptions)), + pw_aff_to_expr(simplify_pw_aff(ubound, kernel.assumptions)), + inner_ast))) return merge_codegen_results(codegen_state, result) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 602830de38e457c5ff4a55d7685dc346a7b4de35..0ebe90fbca0d31c05eaee64321e2b73709292331 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -142,6 +142,55 @@ def iname_rel_aff(space, iname, rel, aff): raise ValueError("unknown value of 'rel': %s" % rel) +# {{{ simplify_pw_aff + +def simplify_pw_aff(pw_aff, context=None): + if context is not None: + pw_aff = pw_aff.gist_params(context) + + old_pw_aff = pw_aff + + while True: + restart = False + did_something = False + + pieces = pw_aff.get_pieces() + for i, (dom_i, aff_i) in enumerate(pieces): + for j, (dom_j, aff_j) in enumerate(pieces): + if i == j: + continue + + if aff_i.gist(dom_j).is_equal(aff_j): + # aff_i is sufficient to conver aff_j, eliminate aff_j + new_pieces = pieces[:] + if i < j: + new_pieces.pop(j) + new_pieces.pop(i) + else: + new_pieces.pop(i) + new_pieces.pop(j) + + pw_aff = isl.PwAff.alloc(dom_i | dom_j, aff_i) + for dom, aff in new_pieces: + pw_aff = pw_aff.union_max(isl.PwAff.alloc(dom, aff)) + + restart = True + did_something = True + break + + if restart: + break + + if not did_something: + break + + assert pw_aff.get_aggregate_domain() <= pw_aff.eq_set(old_pw_aff) + + return pw_aff + +# }}} + + # {{{ static_*_of_pw_aff def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 6eedfcc20e7e59f129c8f19e2d96c07a80714533..14b18150f5b84218f39ba23662eb6106ffb596a0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -448,7 +448,7 @@ def parse_insn(groups, insn_options): "the following error occurred:" % groups["rhs"]) raise - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import TypeAnnotation if not isinstance(lhs, tuple): @@ -469,11 +469,15 @@ def parse_insn(groups, insn_options): else: temp_var_types.append(None) + inner_lhs_i = lhs_i + if isinstance(inner_lhs_i, Lookup): + inner_lhs_i = inner_lhs_i.aggregate + from loopy.symbolic import LinearSubscript - if isinstance(lhs_i, Variable): - assignee_names.append(lhs_i.name) - elif isinstance(lhs_i, (Subscript, LinearSubscript)): - assignee_names.append(lhs_i.aggregate.name) + if isinstance(inner_lhs_i, Variable): + assignee_names.append(inner_lhs_i.name) + elif isinstance(inner_lhs_i, (Subscript, LinearSubscript)): + assignee_names.append(inner_lhs_i.aggregate.name) else: raise LoopyError("left hand side of assignment '%s' must " "be variable or subscript" % (lhs_i,)) @@ -1638,11 +1642,11 @@ def _resolve_dependencies(knl, insn, deps): new_deps.append(other_insn.id) found_any = True - if not found_any: + if not found_any and knl.options.check_dep_resolution: raise LoopyError("instruction '%s' declared a depency on '%s', " "which did not resolve to any instruction present in the " - "kernel '%s'" - % (insn.id, dep, knl.name)) + "kernel '%s'. Set the kernel option 'check_dep_resolution'" + "to False to disable this check." % (insn.id, dep, knl.name)) for dep_id in new_deps: if dep_id not in knl.id_to_insn: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 581f090547370ca1b8cc4752dc70e9408e6ab37c..85c5019293c6aa79ad853cb938cbe5fe5267a351 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -455,9 +455,12 @@ class InstructionBase(ImmutableRecord): def _get_assignee_var_name(expr): - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript + if isinstance(expr, Lookup): + expr = expr.aggregate + if isinstance(expr, Variable): return expr.name @@ -477,9 +480,12 @@ def _get_assignee_var_name(expr): def _get_assignee_subscript_deps(expr): - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript, get_dependencies + if isinstance(expr, Lookup): + expr = expr.aggregate + if isinstance(expr, Variable): return frozenset() elif isinstance(expr, Subscript): @@ -770,9 +776,9 @@ class Assignment(MultiAssignmentBase): if isinstance(expression, str): expression = parse(expression) - from pymbolic.primitives import Variable, Subscript + from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript - if not isinstance(assignee, (Variable, Subscript, LinearSubscript)): + if not isinstance(assignee, (Variable, Subscript, LinearSubscript, Lookup)): raise LoopyError("invalid lvalue '%s'" % assignee) self.assignee = assignee @@ -993,6 +999,11 @@ class CallInstruction(MultiAssignmentBase): if field_name in ["assignees", "expression"]: key_builder.update_for_pymbolic_expression( key_hash, getattr(self, field_name)) + elif field_name == "predicates": + preds = sorted(self.predicates, key=str) + for pred in preds: + key_builder.update_for_pymbolic_expression( + key_hash, pred) else: key_builder.rec(key_hash, getattr(self, field_name)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 539bfbed06572b07491c215770a0330963764d1d..2033425236836ecf000d6c341c46dcb8b087a29a 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1341,17 +1341,30 @@ def draw_dependencies_as_unicode_arrows( uniform_length = min(n_columns[0], max_columns) + added_ellipsis = [False] + def conform_to_uniform_length(s): if len(s) <= uniform_length: return s + " "*(uniform_length-len(s)) else: - return s[:uniform_length] + "..." + added_ellipsis[0] = True + return s[:uniform_length] + u"…" - return [ + rows = [ (conform_to_uniform_length(row), conform_to_uniform_length(extender)) for row, extender in rows] + if added_ellipsis[0]: + uniform_length += 1 + + rows = [ + (conform_to_uniform_length(row), + conform_to_uniform_length(extender)) + for row, extender in rows] + + return rows + # }}} # vim: foldmethod=marker diff --git a/loopy/options.py b/loopy/options.py index 7c778681dced904b31e0cc39cff529b8c026640d..25bb7014ce07a30c49f7f78d5a6325eaba36291d 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -95,6 +95,11 @@ class Options(ImmutableRecord): determining whether an iname duplication is necessary for the kernel to be schedulable. + .. attribute:: check_dep_resolution + + Whether loopy should issue an error if a dependency + expression does not match any instructions in the kernel. + .. rubric:: Invocation-related options .. attribute:: skip_arg_checks @@ -200,6 +205,7 @@ class Options(ImmutableRecord): allow_terminal_colors_def), disable_global_barriers=kwargs.get("disable_global_barriers", False), + check_dep_resolution=kwargs.get("check_dep_resolution", True), ) # {{{ legacy compatibility diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 5ece0db1dffd2cde118bc3104b90ce6faa14a448..5f62d1a9d3eb40b4a5e9ac29212916b33b04d844 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -806,7 +806,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on.add(prev_id) new_insn_add_no_sync_with.add((prev_id, "any")) - new_insn_add_within_inames.add(stage_exec_iname or base_exec_iname) + new_insn_add_within_inames.add(base_exec_iname or stage_exec_iname) if nresults == 1: assert len(acc_vars) == 1 diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 88e3898e2cceeeb62edea306283fcb718c3b088d..e7a86300f9d040cba1688e5bb0f3dcbbd926f783 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -63,7 +63,7 @@ class _BatchVariableChanger(RuleAwareIdentityMapper): if not self.needs_batch_subscript(expr.aggregate.name): return super(_BatchVariableChanger, self).map_subscript(expr, expn_state) - idx = expr.index + idx = self.rec(expr.index, expn_state) if not isinstance(idx, tuple): idx = (idx,) @@ -73,7 +73,7 @@ class _BatchVariableChanger(RuleAwareIdentityMapper): if not self.needs_batch_subscript(expr.name): return super(_BatchVariableChanger, self).map_variable(expr, expn_state) - return expr.aggregate[self.batch_iname_expr] + return expr[self.batch_iname_expr] def _add_unique_dim_name(name, dim_names): diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index b9a386b2b69ab1c3136f5f91075bc0129e320748..35c12700806ccf4c0cb56f0ac458d98631cfdc19 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -986,8 +986,9 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Emit a warning that we needed boostable_into from warnings import warn from loopy.diagnostic import LoopyWarning - warn("Kernel '%s' required the deprecated 'boostable_into" - "field in order to be schedulable!" % knl.name, LoopyWarning) + warn("Kernel '%s' required the deprecated 'boostable_into' " + "instruction attribute in order to be schedulable!" % knl.name, + LoopyWarning) # Return to avoid yielding the duplication # options without boostable_into diff --git a/test/test_loopy.py b/test/test_loopy.py index 1cd025c99a311184587272af05b5b41e659b18df..7719607833872127aa1878fbe735d73da1e48bac 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2140,6 +2140,41 @@ def test_multi_argument_reduction_type_inference(): == [(int32, int32)]) +def test_struct_assignment(ctx_factory): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + bbhit = np.dtype([ + ("tmin", np.float32), + ("tmax", np.float32), + ("bi", np.int32), + ("hit", np.int32)]) + + bbhit, bbhit_c_decl = cl.tools.match_dtype_to_c_struct( + ctx.devices[0], "bbhit", bbhit) + bbhit = cl.tools.get_or_register_dtype('bbhit', bbhit) + + preamble = bbhit_c_decl + + knl = lp.make_kernel( + "{ [i]: 0<=i<N }", + """ + for i + result[i].hit = i % 2 + result[i].tmin = i + result[i].tmax = i+10 + result[i].bi = i + end + """, + [ + lp.GlobalArg("result", shape=("N",), dtype=bbhit), + "..."], + preambles=[("000", preamble)]) + + knl = lp.set_options(knl, write_cl=True) + knl(queue, N=200) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) diff --git a/test/test_reduction.py b/test/test_reduction.py index 1dd11b492bf6fd12bb9a5173f7f75f75fd0cd4c4..be11d7c8cada94596dceb1a8e0e678f8adb582e9 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -181,7 +181,7 @@ def test_recursive_nested_dependent_reduction(ctx_factory): # FIXME: Actually test functionality. -@pytest.mark.parametrize("size", [128, 5, 113, 67]) +@pytest.mark.parametrize("size", [128, 5, 113, 67, 1]) def test_local_parallel_reduction(ctx_factory, size): ctx = ctx_factory()