diff --git a/MEMO b/MEMO index 675b8f05793c5b57f6200f753042430b08072434..f8746ea6c72fb0b7d6cf200150731d1da2d236ad 100644 --- a/MEMO +++ b/MEMO @@ -62,9 +62,6 @@ Things to consider TODO ^^^^ -- implemented_domain may end up being smaller than requested in cse - evaluations--check that! - - FIXME: Deal with insns losing a seq iname dep in a CSE realization a <- cse(reduce(stuff)) @@ -88,6 +85,9 @@ TODO Dealt with ^^^^^^^^^^ +- implemented_domain may end up being smaller than requested in cse + evaluations--check that! + - Allow prioritization of loops in scheduling. - Make axpy better. diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index eb0de5869c3dc80efd69ca1e4674b821eae7ecbe..47f0b16a7d8e0ef57a6978d056daad9efafdfd2e 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -9,68 +9,82 @@ import islpy as isl # {{{ support code for AST wrapper objects +class GeneratedInstruction(Record): + """Objects of this type are wrapped around ASTs upon + return from generation calls to collect information about them. + + :ivar implemented_domains: A map from an insn id to a list of + implemented domains, with the purpose of checking that + each instruction's exact iteration space has been covered. + """ + __slots__ = ["insn_id", "implemented_domain", "ast"] + class GeneratedCode(Record): """Objects of this type are wrapped around ASTs upon return from generation calls to collect information about them. + + :ivar implemented_domains: A map from an insn id to a list of + implemented domains, with the purpose of checking that + each instruction's exact iteration space has been covered. """ - __slots__ = ["ast", "num_conditionals"] + __slots__ = ["ast", "implemented_domains"] -def gen_code_block(elements, is_alternatives=False, denest=False): +def gen_code_block(elements): """ :param is_alternatives: a :class:`bool` indicating that only one of the *elements* will effectively be executed. """ - from cgen import Generable, Block + from cgen import Block, Comment, Line - conditional_counts = [] block_els = [] + implemented_domains = {} + for el in elements: if isinstance(el, GeneratedCode): - conditional_counts.append(el.num_conditionals) - if isinstance(el.ast, Block) and denest: + for insn_id, idoms in el.implemented_domains.iteritems(): + implemented_domains.setdefault(insn_id, []).extend(idoms) + + if isinstance(el.ast, Block): block_els.extend(el.ast.contents) else: block_els.append(el.ast) - elif isinstance(el, Generable): + + elif isinstance(el, Comment): block_els.append(el) - else: - raise ValueError("unidentifiable object in block") - if is_alternatives: - num_conditionals = min(conditional_counts) - else: - num_conditionals = sum(conditional_counts) + elif isinstance(el, Line): + assert not el.text + block_els.append(el) + + elif isinstance(el, GeneratedInstruction): + block_els.append(el.ast) + implemented_domains.setdefault(el.insn_id, []).append( + el.implemented_domain) + + else: + raise ValueError("unrecognized object of type '%s' in block" + % type(el)) if len(block_els) == 1: ast, = block_els else: ast = Block(block_els) - return GeneratedCode(ast=ast, num_conditionals=num_conditionals) + return GeneratedCode(ast=ast, implemented_domains=implemented_domains) def wrap_in(cls, *args): inner = args[-1] args = args[:-1] - from cgen import If, Generable - - if isinstance(inner, GeneratedCode): - num_conditionals = inner.num_conditionals - ast = inner.ast - elif isinstance(inner, Generable): - num_conditionals = 0 - ast = inner + if not isinstance(inner, GeneratedCode): + raise ValueError("unrecognized object of type '%s' in block" + % type(inner)) - args = args + (ast,) - ast = cls(*args) + args = args + (inner.ast,) - if isinstance(ast, If): - import re - cond_joiner_re = re.compile(r"\|\||\&\&") - num_conditionals += len(cond_joiner_re.split(ast.condition)) - - return GeneratedCode(ast=ast, num_conditionals=num_conditionals) + return GeneratedCode(ast=cls(*args), + implemented_domains=inner.implemented_domains) def wrap_in_if(condition_codelets, inner): from cgen import If @@ -86,10 +100,12 @@ def add_comment(cmt, code): if cmt is None: return code - from cgen import add_comment, Block - block_with_comment = add_comment(cmt, code.ast) - assert isinstance(block_with_comment, Block) - return gen_code_block(block_with_comment.contents) + from cgen import add_comment + assert isinstance(code, GeneratedCode) + + return GeneratedCode( + ast=add_comment(cmt, code.ast), + implemented_domains=code.implemented_domains) # }}} @@ -109,7 +125,7 @@ class CodeGenerationState(object): def intersect(self, set): return CodeGenerationState( - self.implemented_domain.intersect(set), + self.implemented_domain & set, self.c_code_mapper) def fix(self, iname, aff, space): @@ -160,6 +176,32 @@ def make_initial_assignments(kernel): # }}} +# {{{ sanity-check for implemented domains of each instruction + +def check_implemented_domains(kernel, implemented_domains): + for insn_id, idomains in implemented_domains.iteritems(): + assert idomains + + insn_impl_domain = idomains[0] + for idomain in idomains[1:]: + insn_impl_domain = insn_impl_domain | idomain + insn_impl_domain = insn_impl_domain.coalesce() + + insn = kernel.id_to_insn[insn_id] + desired_domain = (kernel.domain + .eliminate_except(insn.all_inames(), [isl.dim_type.set])) + + if insn_impl_domain != desired_domain: + raise RuntimeError("sanity check failed--implemented and desired " + "domain for insn '%s' do not match\n implemented: %s\n" + " desired:%s" + % (insn_id, insn_impl_domain, desired_domain)) + + # placate the assert at the call site + return True + +# }}} + # {{{ main code generation entrypoint def generate_code(kernel): @@ -301,6 +343,8 @@ def generate_code(kernel): # }}} + assert check_implemented_domains(kernel, gen_code.implemented_domains) + return str(mod) # }}} diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 3fd65c4351996678e4f60a0cda0c237392b93c79..60be2e1dcb607d04481b5253fc7407b9329d7461 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -67,6 +67,7 @@ def generate_ilp_instances(kernel, insn, codegen_state): def generate_instruction_code(kernel, insn, codegen_state): result = [] + from loopy.codegen import GeneratedInstruction for ilpi in generate_ilp_instances(kernel, insn, codegen_state): ccm = codegen_state.c_code_mapper.copy_and_assign_many(ilpi.assignments) @@ -80,7 +81,10 @@ def generate_instruction_code(kernel, insn, codegen_state): ccm, kernel.domain, insn.all_inames(), ilpi.implemented_domain, insn_code) - result.append(insn_code) + result.append(GeneratedInstruction( + insn_id=insn.id, + implemented_domain=ilpi.implemented_domain, + ast=insn_code)) from loopy.codegen import gen_code_block return gen_code_block(result) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 3e18e997c517749a61eb6fc616806e07d682137c..3e5d484aeb987b24b5feaf7a51438db0682d8925 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -221,13 +221,13 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left= if len(slabs) == 1: cmt = None - new_kernel = kernel.copy(domain=kernel.domain.intersect(slab)) + new_kernel = kernel.copy(domain=kernel.domain & slab) inner = set_up_hw_parallel_loops( new_kernel, sched_index, codegen_state, hw_inames_left) result.append(add_comment(cmt, inner)) from loopy.codegen import gen_code_block - return gen_code_block(result, is_alternatives=True) + return gen_code_block(result) # }}} diff --git a/loopy/kernel.py b/loopy/kernel.py index 1f9b3ac23825658e80e2fb55e4db72806953a4af..0ee99b860b1cf6829d44865dc8a4cf09f4c89dec 100644 --- a/loopy/kernel.py +++ b/loopy/kernel.py @@ -590,12 +590,12 @@ class LoopKernel(Record): @memoize_method def get_iname_bounds(self, iname): - lower_bound_pw_aff = (self.domain - .intersect(self.assumptions) + lower_bound_pw_aff = ( + (self.domain & self.assumptions) .dim_min(self.iname_to_dim[iname][1]) .coalesce()) - upper_bound_pw_aff = (self.domain - .intersect(self.assumptions) + upper_bound_pw_aff = ( + (self.domain & self.assumptions) .dim_max(self.iname_to_dim[iname][1]) .coalesce())