Skip to content
Snippets Groups Projects
Commit 164ad102 authored by Matt Wala's avatar Matt Wala
Browse files

[WIP] Improve save and reload towards global scan (see: #62).

* Closes #40, by changing the way storage is computes for save and
reload by using the tags of the accessing instructions, not the inames.
* Allow a single representative per base_storage equivalence class to
be saved and reloaded (see also: #42).
* Removes InstructionQuery class from schedule tools.

Still needs: tests
parent bd12a464
No related branches found
No related tags found
1 merge request!106Improve save and reload towards global scan (see: #62).
Pipeline #
...@@ -505,22 +505,23 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel): ...@@ -505,22 +505,23 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel):
# {{{ check that temporaries are defined in subkernels where used # {{{ check that temporaries are defined in subkernels where used
def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
from loopy.schedule.tools import InstructionQuery
from loopy.kernel.data import temp_var_scope from loopy.kernel.data import temp_var_scope
from loopy.kernel.tools import get_subkernels
insn_query = InstructionQuery(kernel) for subkernel in get_subkernels(kernel):
for subkernel in insn_query.subkernels():
defined_base_storage = set() defined_base_storage = set()
for temporary in insn_query.temporaries_written_in_subkernel(subkernel): from loopy.schedule.tools import (
temporaries_written_in_subkernel, temporaries_read_in_subkernel)
for temporary in temporaries_written_in_subkernel(kernel, subkernel):
tval = kernel.temporary_variables[temporary] tval = kernel.temporary_variables[temporary]
if tval.base_storage is not None: if tval.base_storage is not None:
defined_base_storage.add(tval.base_storage) defined_base_storage.add(tval.base_storage)
for temporary in ( for temporary in (
insn_query.temporaries_read_in_subkernel(subkernel) - temporaries_read_in_subkernel(kernel, subkernel) -
insn_query.temporaries_written_in_subkernel(subkernel)): temporaries_written_in_subkernel(kernel, subkernel)):
tval = kernel.temporary_variables[temporary] tval = kernel.temporary_variables[temporary]
if tval.initializer is not None: if tval.initializer is not None:
...@@ -530,16 +531,17 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): ...@@ -530,16 +531,17 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel):
if tval.base_storage is not None: if tval.base_storage is not None:
if tval.base_storage not in defined_base_storage: if tval.base_storage not in defined_base_storage:
from loopy.diagnostic import MissingDefinitionError from loopy.diagnostic import MissingDefinitionError
raise MissingDefinitionError("temporary variable '%s' gets used " raise MissingDefinitionError("temporary variable '%s' gets "
"in subkernel '%s' and neither it nor its aliases have a " "used in subkernel '%s' and neither it nor its "
"definition" % (temporary, subkernel)) "aliases have a definition" % (temporary, subkernel))
continue continue
if tval.scope in (temp_var_scope.PRIVATE, temp_var_scope.LOCAL): if tval.scope in (temp_var_scope.PRIVATE, temp_var_scope.LOCAL):
from loopy.diagnostic import MissingDefinitionError from loopy.diagnostic import MissingDefinitionError
raise MissingDefinitionError("temporary variable '%s' gets used in " raise MissingDefinitionError("temporary variable '%s' gets used "
"subkernel '%s' without a definition (maybe you forgot to call " "in subkernel '%s' without a definition (maybe you forgot "
"loopy.save_and_reload_temporaries?)" % (temporary, subkernel)) "to call loopy.save_and_reload_temporaries?)"
% (temporary, subkernel))
# }}} # }}}
......
...@@ -23,10 +23,6 @@ THE SOFTWARE. ...@@ -23,10 +23,6 @@ THE SOFTWARE.
""" """
from loopy.kernel.data import temp_var_scope from loopy.kernel.data import temp_var_scope
from loopy.schedule import (BeginBlockItem, CallKernel, EndBlockItem,
RunInstruction, Barrier)
from pytools import memoize_method
# {{{ block boundary finder # {{{ block boundary finder
...@@ -37,6 +33,7 @@ def get_block_boundaries(schedule): ...@@ -37,6 +33,7 @@ def get_block_boundaries(schedule):
:class:`loopy.schedule.BlockBeginItem`s to :class:`loopy.schedule.BlockBeginItem`s to
:class:`loopy.schedule.BlockEndItem`s and vice versa. :class:`loopy.schedule.BlockEndItem`s and vice versa.
""" """
from loopy.schedule import (BeginBlockItem, EndBlockItem)
block_bounds = {} block_bounds = {}
active_blocks = [] active_blocks = []
for idx, sched_item in enumerate(schedule): for idx, sched_item in enumerate(schedule):
...@@ -51,109 +48,24 @@ def get_block_boundaries(schedule): ...@@ -51,109 +48,24 @@ def get_block_boundaries(schedule):
# }}} # }}}
# {{{ instruction query utility # {{{ subkernel tools
class InstructionQuery(object): def temporaries_read_in_subkernel(kernel, subkernel):
from loopy.kernel.tools import get_subkernel_to_insn_id_map
def __init__(self, kernel): insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
self.kernel = kernel return frozenset(tv
block_bounds = get_block_boundaries(kernel.schedule) for insn_id in insn_ids
subkernel_slices = {} for tv in kernel.id_to_insn[insn_id].read_dependency_names()
from six import iteritems if tv in kernel.temporary_variables)
for start, end in iteritems(block_bounds):
sched_item = kernel.schedule[start]
if isinstance(sched_item, CallKernel): def temporaries_written_in_subkernel(kernel, subkernel):
subkernel_slices[sched_item.kernel_name] = slice(start, end + 1) from loopy.kernel.tools import get_subkernel_to_insn_id_map
self.subkernel_slices = subkernel_slices insn_ids = get_subkernel_to_insn_id_map(kernel)[subkernel]
return frozenset(tv
@memoize_method for insn_id in insn_ids
def subkernels(self): for tv in kernel.id_to_insn[insn_id].write_dependency_names()
return frozenset(self.subkernel_slices.keys()) if tv in kernel.temporary_variables)
@memoize_method
def insns_reading_or_writing(self, var):
return frozenset(insn.id for insn in self.kernel.instructions
if var in insn.read_dependency_names()
or var in insn.assignee_var_names())
@memoize_method
def insns_in_subkernel(self, subkernel):
return frozenset(sched_item.insn_id for sched_item
in self.kernel.schedule[self.subkernel_slices[subkernel]]
if isinstance(sched_item, RunInstruction))
@memoize_method
def temporaries_read_in_subkernel(self, subkernel):
return frozenset(
var
for insn in self.insns_in_subkernel(subkernel)
for var in self.kernel.id_to_insn[insn].read_dependency_names()
if var in self.kernel.temporary_variables)
@memoize_method
def temporaries_written_in_subkernel(self, subkernel):
return frozenset(
var
for insn in self.insns_in_subkernel(subkernel)
for var in self.kernel.id_to_insn[insn].assignee_var_names()
if var in self.kernel.temporary_variables)
@memoize_method
def temporaries_read_or_written_in_subkernel(self, subkernel):
return (
self.temporaries_read_in_subkernel(subkernel) |
self.temporaries_written_in_subkernel(subkernel))
@memoize_method
def inames_in_subkernel(self, subkernel):
subkernel_start = self.subkernel_slices[subkernel].start
return frozenset(self.kernel.schedule[subkernel_start].extra_inames)
@memoize_method
def pre_and_post_barriers(self, subkernel):
subkernel_start = self.subkernel_slices[subkernel].start
subkernel_end = self.subkernel_slices[subkernel].stop
def is_global_barrier(item):
return isinstance(item, Barrier) and item.kind == "global"
try:
pre_barrier = next(item for item in
self.kernel.schedule[subkernel_start::-1]
if is_global_barrier(item)).originating_insn_id
except StopIteration:
pre_barrier = None
try:
post_barrier = next(item for item in
self.kernel.schedule[subkernel_end:]
if is_global_barrier(item)).originating_insn_id
except StopIteration:
post_barrier = None
return (pre_barrier, post_barrier)
@memoize_method
def hw_inames(self, insn_id):
"""
Return the inames that insn runs in and that are tagged as hardware
parallel.
"""
from loopy.kernel.data import HardwareParallelTag
return set(iname for iname in self.kernel.insn_inames(insn_id)
if isinstance(self.kernel.iname_to_tag.get(iname),
HardwareParallelTag))
@memoize_method
def common_hw_inames(self, insn_ids):
"""
Return the common set of hardware parallel tagged inames among
the list of instructions.
"""
# Get the list of hardware inames in which the temporary is defined.
if len(insn_ids) == 0:
return set()
return set.intersection(*(self.hw_inames(id) for id in insn_ids))
# }}} # }}}
...@@ -166,23 +78,27 @@ def add_extra_args_to_schedule(kernel): ...@@ -166,23 +78,27 @@ def add_extra_args_to_schedule(kernel):
instructions in the schedule with global temporaries. instructions in the schedule with global temporaries.
""" """
new_schedule = [] new_schedule = []
from loopy.schedule import CallKernel
insn_query = InstructionQuery(kernel)
for sched_item in kernel.schedule: for sched_item in kernel.schedule:
if isinstance(sched_item, CallKernel): if isinstance(sched_item, CallKernel):
subrange_temporaries = (insn_query subkernel = sched_item.kernel_name
.temporaries_read_or_written_in_subkernel(sched_item.kernel_name))
used_temporaries = (
temporaries_read_in_subkernel(kernel, subkernel)
| temporaries_written_in_subkernel(kernel, subkernel))
more_args = set(tv more_args = set(tv
for tv in subrange_temporaries for tv in used_temporaries
if if
kernel.temporary_variables[tv].scope == temp_var_scope.GLOBAL kernel.temporary_variables[tv].scope == temp_var_scope.GLOBAL
and and
kernel.temporary_variables[tv].initializer is None kernel.temporary_variables[tv].initializer is None
and and
tv not in sched_item.extra_args) tv not in sched_item.extra_args)
new_schedule.append(sched_item.copy( new_schedule.append(sched_item.copy(
extra_args=sched_item.extra_args + sorted(more_args))) extra_args=sched_item.extra_args + sorted(more_args)))
else: else:
new_schedule.append(sched_item) new_schedule.append(sched_item)
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment