Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Showing
with 4014 additions and 2573 deletions
This diff is collapsed.
from __future__ import division
from __future__ import annotations
__copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
......@@ -23,17 +24,26 @@ THE SOFTWARE.
"""
from typing import TYPE_CHECKING
import islpy as isl
from islpy import dim_type
if TYPE_CHECKING:
from loopy.codegen.tools import CodegenOperationCacheManager
from loopy.kernel import LoopKernel
# {{{ approximate, convex bounds check generator
def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domain):
def get_approximate_convex_bounds_checks(domain, check_inames,
implemented_domain, op_cache_manager):
if isinstance(domain, isl.BasicSet):
domain = isl.Set.from_basic_set(domain)
domain = domain.remove_redundancies()
result = domain.eliminate_except(check_inames, [dim_type.set])
result = op_cache_manager.eliminate_except(domain, check_inames,
(dim_type.set,))
# This is ok, because we're really looking for the
# projection, with no remaining constraints from
......@@ -55,55 +65,39 @@ def get_approximate_convex_bounds_checks(domain, check_inames, implemented_domai
# {{{ on which inames may a conditional depend?
def get_usable_inames_for_conditional(kernel, sched_index):
from loopy.schedule import (
find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within)
from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag
result = find_active_inames_at(kernel, sched_index)
crosses_barrier = has_barrier_within(kernel, sched_index)
def get_usable_inames_for_conditional(
kernel: LoopKernel, sched_index: int,
op_cache_manager: CodegenOperationCacheManager) -> frozenset[str]:
active_inames = op_cache_manager.active_inames[sched_index]
crosses_barrier = op_cache_manager.has_barrier_within[sched_index]
# Find our containing subkernel. Grab inames for all insns from there.
within_subkernel = False
for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]):
from loopy.schedule import CallKernel, ReturnFromKernel
if isinstance(sched_item, CallKernel):
within_subkernel = True
subkernel_index = sched_item_index
elif isinstance(sched_item, ReturnFromKernel):
within_subkernel = False
subkernel_index = op_cache_manager.callkernel_index[sched_index]
if not within_subkernel:
if subkernel_index is None:
# Outside all subkernels - use only inames available to host.
return frozenset(result)
insn_ids_for_subkernel = get_insn_ids_for_block_at(
kernel.schedule, subkernel_index)
inames_for_subkernel = (
iname
for insn in insn_ids_for_subkernel
for iname in kernel.insn_inames(insn))
for iname in inames_for_subkernel:
tag = kernel.iname_to_tag.get(iname)
# Parallel inames are defined within a subkernel, BUT:
#
# - local indices may not be used in conditionals that cross barriers.
#
# - ILP indices are not available in loop bounds, they only get defined
# at the innermost level of nesting.
if (
isinstance(tag, ParallelTag)
and not (isinstance(tag, LocalIndexTagBase) and crosses_barrier)
and not isinstance(tag, IlpBaseTag)
):
result.add(iname)
return frozenset(result)
assert isinstance(active_inames, frozenset)
return active_inames
concurrent_inames_in_subkernel = (
op_cache_manager.get_concurrent_inames_in_a_callkernel(
subkernel_index))
# not all parallel inames are usable:
# - local indices may not be used in conditionals that cross barriers.
# - ILP indices and vector lane indices are not available in loop
# bounds, they only get defined at the innermost level of nesting.
from loopy.kernel.data import IlpBaseTag, LocalInameTagBase, VectorizeTag
from loopy.schedule import find_used_inames_within
usable_concurrent_inames_in_subkernel = frozenset(
iname for iname in concurrent_inames_in_subkernel
if (not (kernel.iname_tags_of_type(iname, LocalInameTagBase)
and crosses_barrier)
and not kernel.iname_tags_of_type(iname, VectorizeTag)
and not kernel.iname_tags_of_type(iname, IlpBaseTag))
) & find_used_inames_within(kernel, sched_index)
return active_inames | usable_concurrent_inames_in_subkernel
# }}}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.