diff --git a/examples/fortran/matmul.floopy b/examples/fortran/matmul.floopy new file mode 100644 index 0000000000000000000000000000000000000000..ea85b0c6b198c8bac6270098696504fc9006fd14 --- /dev/null +++ b/examples/fortran/matmul.floopy @@ -0,0 +1,26 @@ +subroutine dgemm(m,n,l,alpha,a,b,c) + implicit none + real*8 temp, a(m,l),b(l,n),c(m,n), alpha + integer m,n,k,i,j,l + + do j = 1,n + do k = 1,l + do i = 1,m + c(i,j) = c(i,j) + alpha*b(k,j)*a(i,k) + end do + end do + end do +end subroutine + +!$loopy begin transform +! dgemm = lp.split_iname(dgemm, "i", 16, +! outer_tag="g.0", inner_tag="l.1") +! dgemm = lp.split_iname(dgemm, "j", 8, +! outer_tag="g.1", inner_tag="l.0") +! dgemm = lp.split_iname(dgemm, "k", 32) +! +! dgemm = lp.extract_subst(dgemm, "a_acc", "a[i1,i2]", parameters="i1, i2") +! dgemm = lp.extract_subst(dgemm, "b_acc", "b[i1,i2]", parameters="i1, i2") +! dgemm = lp.precompute(dgemm, "a_acc", "k_inner,i_inner") +! dgemm = lp.precompute(dgemm, "b_acc", "j_inner,k_inner") +!$loopy end transform diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index d3089fc6a6baad741c1cfc2d1843aa541192cfb3..68f7f131c4c40d2f647fb6c6095959b4884a3c41 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -392,10 +392,9 @@ class LoopKernel(RecordWithoutPickling): iname_set_stack = [] result = [] - writer_map = self.writer_map() + from loopy.kernel.tools import is_domain_dependent_on_inames - for dom in self.domains: - parameters = set(dom.get_var_names(dim_type.param)) + for dom_idx, dom in enumerate(self.domains): inames = set(dom.get_var_names(dim_type.set)) # This next domain may be nested inside the previous domain. @@ -405,37 +404,11 @@ class LoopKernel(RecordWithoutPickling): discard_level_count = 0 while discard_level_count < len(iname_set_stack): - # {{{ check for parenthood by loop bound iname - last_inames = iname_set_stack[-1-discard_level_count] - if last_inames & parameters: - break - - # }}} - - # {{{ check for parenthood by written variable - is_parent_by_variable = False - for par in parameters: - if par in self.temporary_variables: - writer_insns = writer_map[par] - - if len(writer_insns) > 1: - raise RuntimeError("loop bound '%s' " - "may only be written to once" % par) - - writer_insn, = writer_insns - writer_inames = self.insn_inames(writer_insn) - - if writer_inames & last_inames: - is_parent_by_variable = True - break - - if is_parent_by_variable: + if is_domain_dependent_on_inames(self, dom_idx, last_inames): break - # }}} - discard_level_count += 1 if discard_level_count: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index bd1fcb449320b5bab98b5d69cd0239621a61076a..2973cd9e69e46c875e15e6c8674150373be8be1e 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -539,4 +539,41 @@ class DomainParameterFinder(object): # }}} + +# {{{ is domain dependent on inames + +def is_domain_dependent_on_inames(kernel, domain_index, inames): + dom = kernel.domains[domain_index] + dom_parameters = set(dom.get_var_names(dim_type.param)) + + # {{{ check for parenthood by loop bound iname + + if inames & dom_parameters: + return True + + # }}} + + # {{{ check for parenthood by written variable + + for par in dom_parameters: + if par in kernel.temporary_variables: + writer_insns = kernel.writer_map()[par] + + if len(writer_insns) > 1: + raise RuntimeError("loop bound '%s' " + "may only be written to once" % par) + + writer_insn, = writer_insns + writer_inames = kernel.insn_inames(writer_insn) + + if writer_inames & inames: + return True + + # }}} + + return False + +# }}} + + # vim: foldmethod=marker diff --git a/loopy/loop.py b/loopy/loop.py index d2ddece1980ec41c9e0062ccd4a091f3e3d5c301..4592463822a2321745aaf48a316d16c98d4efca3 100644 --- a/loopy/loop.py +++ b/loopy/loop.py @@ -23,6 +23,7 @@ THE SOFTWARE. """ +import islpy as isl import six @@ -39,7 +40,6 @@ def potential_loop_nest_map(kernel): iname_to_insns = kernel.iname_to_insns() # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag for inner_iname in all_inames: inner_result = set() for outer_iname in all_inames: @@ -56,22 +56,67 @@ def potential_loop_nest_map(kernel): def fuse_loop_domains(kernel): - did_something = False + from loopy.kernel.tools import is_domain_dependent_on_inames + while True: lnm = potential_loop_nest_map(kernel) + parents_per_domain = kernel.parents_per_domain() + all_parents_per_domain = kernel.all_parents_per_domain() + + new_domains = None for inner_iname, outer_inames in six.iteritems(lnm): for outer_iname in outer_inames: - inner_do + # {{{ check if it's safe to fuse + + inner_domain_idx = kernel.get_home_domain_index(inner_iname) + outer_domain_idx = kernel.get_home_domain_index(outer_iname) + + if inner_domain_idx == outer_domain_idx: + break + + if ( + outer_domain_idx in all_parents_per_domain[inner_domain_idx] + and not + outer_domain_idx == parents_per_domain[inner_domain_idx]): + # Outer domain is not a direct parent of the inner + # domain. Unable to fuse. + continue + + outer_dom = kernel.domains[outer_domain_idx] + inner_dom = kernel.domains[inner_domain_idx] + outer_inames = set(outer_dom.get_var_names(isl.dim_type.set)) + if is_domain_dependent_on_inames(kernel, inner_domain_idx, + outer_inames): + # Bounds of inner domain depend on outer domain. + # Unable to fuse. + continue + # }}} - print kernel - print lnm - 1/0 + new_domains = kernel.domains[:] + min_idx = min(inner_domain_idx, outer_domain_idx) + max_idx = max(inner_domain_idx, outer_domain_idx) - if not did_something: + del new_domains[max_idx] + del new_domains[min_idx] + + outer_dom, inner_dom = isl.align_two(outer_dom, inner_dom) + + new_domains.insert(min_idx, inner_dom & outer_dom) + break + + if new_domains: + break + + if not new_domains: + # Nothing was accomplished in the last loop trip, time to quit. break + kernel = kernel.copy(domains=new_domains) + return kernel + +# vim: fdm=marker diff --git a/loopy/target/pyopencl/__init__.py b/loopy/target/pyopencl/__init__.py index f1d3bfdde83735a34b9f86f14c561e9aa1bb2124..a1b323d7cf0deaef308952d87b1523b0a020ad68 100644 --- a/loopy/target/pyopencl/__init__.py +++ b/loopy/target/pyopencl/__init__.py @@ -34,6 +34,9 @@ from loopy.target.opencl import OpenCLTarget import pyopencl as cl import pyopencl.characterize as cl_char +# This ensures the dtype registry is populated. +import pyopencl.tools # noqa + import logging logger = logging.getLogger(__name__) @@ -228,7 +231,7 @@ def pyopencl_preamble_generator(target, seen_dtypes, seen_functions): # }}} -# {{{ +# {{{ pyopencl tools class PyOpenCLTarget(OpenCLTarget): def __init__(self, device=None): @@ -236,9 +239,6 @@ class PyOpenCLTarget(OpenCLTarget): self.device = device - # This ensures the dtype registry is populated. - import pyopencl.tools # noqa - hash_fields = ["device"] comparison_fields = ["device"] diff --git a/test/test_fortran.py b/test/test_fortran.py index 9a207bf5fb3bfce216809317356730bf521a1ea8..33826e4063190f7d7d48b1bfcb4122931868bd1e 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -272,6 +272,51 @@ def test_tagged(ctx_factory): assert sum(1 for insn in lp.find_instructions(knl, "*$input")) == 2 + +def test_matmul(ctx_factory): + fortran_src = """ + subroutine dgemm(m,n,l,a,b,c) + implicit none + real*8 temp, a(m,l),b(l,n),c(m,n) + integer m,n,k,i,j,l + + do j = 1,n + do i = 1,m + temp = 0 + do k = 1,l + temp = temp + b(k,j)*a(i,k) + end do + c(i,j) = temp + end do + end do + end subroutine + + !$loopy begin transform + !$loopy end transform + """ + + from loopy.frontend.fortran import f2loopy + knl, = f2loopy(fortran_src) + + assert len(knl.domains) == 1 + + ref_knl = knl + + knl = lp.split_iname(knl, "i", 16, + outer_tag="g.0", inner_tag="l.1") + knl = lp.split_iname(knl, "j", 8, + outer_tag="g.1", inner_tag="l.0") + knl = lp.split_iname(knl, "k", 32) + + knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") + knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") + knl = lp.precompute(knl, "a_acc", "k_inner,i_inner") + knl = lp.precompute(knl, "b_acc", "j_inner,k_inner") + + ctx = ctx_factory() + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5, m=7, l=10)) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1])