Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tasmith4/loopy
  • ben_sepanski/loopy
  • arghdos/loopy
  • inducer/loopy
  • wence-/loopy
  • isuruf/loopy
  • fikl2/loopy
  • xywei/loopy
  • kaushikcfd/loopy
  • zweiner2/loopy
10 results
Show changes
Showing
with 203 additions and 221 deletions
...@@ -39,13 +39,13 @@ def defines_to_python_code(defines_str): ...@@ -39,13 +39,13 @@ def defines_to_python_code(defines_str):
import re import re
define_re = re.compile(r"^\#define\s+([a-zA-Z0-9_]+)\s+(.*)$") define_re = re.compile(r"^\#define\s+([a-zA-Z0-9_]+)\s+(.*)$")
result = [] result = []
for l in defines_str.split("\n"): for line in defines_str.split("\n"):
if not l.strip(): if not line.strip():
continue continue
match = define_re.match(l) match = define_re.match(line)
if match is None: if match is None:
raise RuntimeError("#define not understood: '%s'" % l) raise RuntimeError("#define not understood: '%s'" % line)
result.append( result.append(
"%s = %s" % (match.group(1), to_python_literal(match.group(2)))) "%s = %s" % (match.group(1), to_python_literal(match.group(2))))
......
...@@ -388,7 +388,7 @@ def generate_code_v2(kernel): ...@@ -388,7 +388,7 @@ def generate_code_v2(kernel):
from loopy.schedule import get_one_scheduled_kernel from loopy.schedule import get_one_scheduled_kernel
kernel = get_one_scheduled_kernel(kernel) kernel = get_one_scheduled_kernel(kernel)
if kernel.state != KernelState.SCHEDULED: if kernel.state != KernelState.LINEARIZED:
raise LoopyError("cannot generate code for a kernel that has not been " raise LoopyError("cannot generate code for a kernel that has not been "
"scheduled") "scheduled")
......
...@@ -59,6 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): ...@@ -59,6 +59,7 @@ def get_usable_inames_for_conditional(kernel, sched_index):
from loopy.schedule import ( from loopy.schedule import (
find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within) find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within)
from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase, from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase,
VectorizeTag,
IlpBaseTag) IlpBaseTag)
result = find_active_inames_at(kernel, sched_index) result = find_active_inames_at(kernel, sched_index)
...@@ -67,7 +68,7 @@ def get_usable_inames_for_conditional(kernel, sched_index): ...@@ -67,7 +68,7 @@ def get_usable_inames_for_conditional(kernel, sched_index):
# Find our containing subkernel. Grab inames for all insns from there. # Find our containing subkernel. Grab inames for all insns from there.
within_subkernel = False within_subkernel = False
for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]): for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index]):
from loopy.schedule import CallKernel, ReturnFromKernel from loopy.schedule import CallKernel, ReturnFromKernel
if isinstance(sched_item, CallKernel): if isinstance(sched_item, CallKernel):
within_subkernel = True within_subkernel = True
...@@ -92,11 +93,12 @@ def get_usable_inames_for_conditional(kernel, sched_index): ...@@ -92,11 +93,12 @@ def get_usable_inames_for_conditional(kernel, sched_index):
# #
# - local indices may not be used in conditionals that cross barriers. # - local indices may not be used in conditionals that cross barriers.
# #
# - ILP indices are not available in loop bounds, they only get defined # - ILP indices and vector lane indices are not available in loop
# at the innermost level of nesting. # bounds, they only get defined at the innermost level of nesting.
if ( if (
kernel.iname_tags_of_type(iname, ConcurrentTag) kernel.iname_tags_of_type(iname, ConcurrentTag)
and not kernel.iname_tags_of_type(iname, VectorizeTag)
and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase) and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase)
and crosses_barrier) and crosses_barrier)
and not kernel.iname_tags_of_type(iname, IlpBaseTag) and not kernel.iname_tags_of_type(iname, IlpBaseTag)
......
...@@ -24,7 +24,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ...@@ -24,7 +24,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. THE SOFTWARE.
""" """
import six
from loopy.codegen.result import merge_codegen_results, wrap_in_if from loopy.codegen.result import merge_codegen_results, wrap_in_if
import islpy as isl import islpy as isl
from loopy.schedule import ( from loopy.schedule import (
...@@ -33,30 +32,6 @@ from loopy.schedule import ( ...@@ -33,30 +32,6 @@ from loopy.schedule import (
from loopy.diagnostic import LoopyError from loopy.diagnostic import LoopyError
def get_admissible_conditional_inames_for(codegen_state, sched_index):
"""This function disallows conditionals on local-idx tagged
inames if there is a barrier nested somewhere within.
"""
kernel = codegen_state.kernel
from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag,
filter_iname_tags_by_type)
from loopy.schedule import find_active_inames_at, has_barrier_within
result = find_active_inames_at(kernel, sched_index)
has_barrier = has_barrier_within(kernel, sched_index)
for iname, tags in six.iteritems(kernel.iname_to_tags):
if (filter_iname_tags_by_type(tags, HardwareConcurrentTag)
and codegen_state.is_generating_device_code):
if not has_barrier or not filter_iname_tags_by_type(tags, LocalIndexTag):
result.add(iname)
return frozenset(result)
def synthesize_idis_for_extra_args(kernel, schedule_index): def synthesize_idis_for_extra_args(kernel, schedule_index):
""" """
:returns: A list of :class:`loopy.codegen.ImplementedDataInfo` :returns: A list of :class:`loopy.codegen.ImplementedDataInfo`
...@@ -222,14 +197,14 @@ def get_required_predicates(kernel, sched_index): ...@@ -222,14 +197,14 @@ def get_required_predicates(kernel, sched_index):
return result return result
def group_by(l, key, merge): def group_by(entry, key, merge):
if not l: if not entry:
return l return entry
result = [] result = []
previous = l[0] previous = entry[0]
for item in l[1:]: for item in entry[1:]:
if key(previous) == key(item): if key(previous) == key(item):
previous = merge(previous, item) previous = merge(previous, item)
...@@ -302,11 +277,13 @@ def build_loop_nest(codegen_state, schedule_index): ...@@ -302,11 +277,13 @@ def build_loop_nest(codegen_state, schedule_index):
""" """
from loopy.schedule import find_used_inames_within from loopy.schedule import find_used_inames_within
from loopy.codegen.bounds import get_usable_inames_for_conditional
sched_index_info_entries = [ sched_index_info_entries = [
ScheduleIndexInfo( ScheduleIndexInfo(
schedule_indices=[i], schedule_indices=[i],
admissible_cond_inames=( admissible_cond_inames=(
get_admissible_conditional_inames_for(codegen_state, i)), get_usable_inames_for_conditional(kernel, i)),
required_predicates=get_required_predicates(kernel, i), required_predicates=get_required_predicates(kernel, i),
used_inames_within=find_used_inames_within(kernel, i) used_inames_within=find_used_inames_within(kernel, i)
) )
......
...@@ -274,7 +274,7 @@ def generate_c_instruction_code(codegen_state, insn): ...@@ -274,7 +274,7 @@ def generate_c_instruction_code(codegen_state, insn):
if body: if body:
body.append(Line()) body.append(Line())
body.extend(Line(l) for l in insn.code.split("\n")) body.extend(Line(line) for line in insn.code.split("\n"))
return Block(body) return Block(body)
......
...@@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, ...@@ -231,7 +231,7 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func,
kernel = codegen_state.kernel kernel = codegen_state.kernel
from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag, from loopy.kernel.data import (UniqueTag, HardwareConcurrentTag,
LocalIndexTag, GroupIndexTag) LocalIndexTag, GroupIndexTag, VectorizeTag)
from loopy.schedule import get_insn_ids_for_block_at from loopy.schedule import get_insn_ids_for_block_at
insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index) insn_ids_for_block = get_insn_ids_for_block_at(kernel.schedule, schedule_index)
...@@ -242,7 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func, ...@@ -242,7 +242,8 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func,
all_inames_by_insns |= kernel.insn_inames(insn_id) all_inames_by_insns |= kernel.insn_inames(insn_id)
hw_inames_left = [iname for iname in all_inames_by_insns hw_inames_left = [iname for iname in all_inames_by_insns
if kernel.iname_tags_of_type(iname, HardwareConcurrentTag)] if kernel.iname_tags_of_type(iname, HardwareConcurrentTag)
and not kernel.iname_tags_of_type(iname, VectorizeTag)]
if not hw_inames_left: if not hw_inames_left:
return next_func(codegen_state) return next_func(codegen_state)
......
...@@ -86,17 +86,17 @@ def _extract_loopy_lines(source): ...@@ -86,17 +86,17 @@ def _extract_loopy_lines(source):
loopy_lines = [] loopy_lines = []
in_loopy_code = False in_loopy_code = False
for l in lines: for line in lines:
comment_match = comment_re.match(l) comment_match = comment_re.match(line)
if comment_match is None: if comment_match is None:
if in_loopy_code: if in_loopy_code:
raise LoopyError("non-comment source line in loopy block") raise LoopyError("non-comment source line in loopy block")
remaining_lines.append(l) remaining_lines.append(line)
# Preserves line numbers in loopy code, for debuggability # Preserves line numbers in loopy code, for debuggability
loopy_lines.append("# "+l) loopy_lines.append("# "+line)
continue continue
cmt = comment_match.group(1) cmt = comment_match.group(1)
...@@ -108,7 +108,7 @@ def _extract_loopy_lines(source): ...@@ -108,7 +108,7 @@ def _extract_loopy_lines(source):
in_loopy_code = True in_loopy_code = True
# Preserves line numbers in loopy code, for debuggability # Preserves line numbers in loopy code, for debuggability
loopy_lines.append("# "+l) loopy_lines.append("# "+line)
elif cmt_stripped == "$loopy end": elif cmt_stripped == "$loopy end":
if not in_loopy_code: if not in_loopy_code:
...@@ -116,16 +116,16 @@ def _extract_loopy_lines(source): ...@@ -116,16 +116,16 @@ def _extract_loopy_lines(source):
in_loopy_code = False in_loopy_code = False
# Preserves line numbers in loopy code, for debuggability # Preserves line numbers in loopy code, for debuggability
loopy_lines.append("# "+l) loopy_lines.append("# "+line)
elif in_loopy_code: elif in_loopy_code:
loopy_lines.append(cmt) loopy_lines.append(cmt)
else: else:
remaining_lines.append(l) remaining_lines.append(line)
# Preserves line numbers in loopy code, for debuggability # Preserves line numbers in loopy code, for debuggability
loopy_lines.append("# "+l) loopy_lines.append("# "+line)
return "\n".join(remaining_lines), "\n".join(loopy_lines) return "\n".join(remaining_lines), "\n".join(loopy_lines)
......
...@@ -339,11 +339,11 @@ class F2LoopyTranslator(FTreeWalkerBase): ...@@ -339,11 +339,11 @@ class F2LoopyTranslator(FTreeWalkerBase):
return [] return []
map_Logical = map_type_decl map_Logical = map_type_decl # noqa: N815
map_Integer = map_type_decl map_Integer = map_type_decl # noqa: N815
map_Real = map_type_decl map_Real = map_type_decl # noqa: N815
map_Complex = map_type_decl map_Complex = map_type_decl # noqa: N815
map_DoublePrecision = map_type_decl map_DoublePrecision = map_type_decl # noqa: N815
def map_Dimension(self, node): def map_Dimension(self, node):
scope = self.scope_stack[-1] scope = self.scope_stack[-1]
......
...@@ -35,14 +35,13 @@ import islpy as isl ...@@ -35,14 +35,13 @@ import islpy as isl
from islpy import dim_type from islpy import dim_type
import re import re
from pytools import UniqueNameGenerator, generate_unique_names from pytools import UniqueNameGenerator, generate_unique_names, natsorted
from loopy.library.function import ( from loopy.library.function import (
default_function_mangler, default_function_mangler,
single_arg_function_mangler) single_arg_function_mangler)
from loopy.diagnostic import CannotBranchDomainTree, LoopyError from loopy.diagnostic import CannotBranchDomainTree, LoopyError
from loopy.tools import natsorted
from loopy.diagnostic import StaticValueFindingError from loopy.diagnostic import StaticValueFindingError
from loopy.kernel.data import filter_iname_tags_by_type from loopy.kernel.data import filter_iname_tags_by_type
from warnings import warn from warnings import warn
...@@ -99,10 +98,25 @@ class _UniqueVarNameGenerator(UniqueNameGenerator): ...@@ -99,10 +98,25 @@ class _UniqueVarNameGenerator(UniqueNameGenerator):
# {{{ loop kernel object # {{{ loop kernel object
class _deprecated_KernelState_SCHEDULED(object): # noqa
def __init__(self, f):
self.f = f
def __get__(self, obj, klass):
warn(
"'KernelState.SCHEDULED' is deprecated. "
"Use 'KernelState.LINEARIZED'.",
DeprecationWarning, stacklevel=2)
return self.f()
class KernelState: # noqa class KernelState: # noqa
INITIAL = 0 INITIAL = 0
PREPROCESSED = 1 PREPROCESSED = 1
SCHEDULED = 2 LINEARIZED = 2
@_deprecated_KernelState_SCHEDULED
def SCHEDULED(): # pylint:disable=no-method-argument
return KernelState.LINEARIZED
# {{{ kernel_state, KernelState compataibility # {{{ kernel_state, KernelState compataibility
...@@ -228,7 +242,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): ...@@ -228,7 +242,9 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# {{{ constructor # {{{ constructor
def __init__(self, domains, instructions, args=None, schedule=None, def __init__(self, domains, instructions, args=None,
schedule=None,
linearization=None,
name="loopy_kernel", name="loopy_kernel",
preambles=None, preambles=None,
preamble_generators=None, preamble_generators=None,
...@@ -333,10 +349,27 @@ class LoopKernel(ImmutableRecordWithoutPickling): ...@@ -333,10 +349,27 @@ class LoopKernel(ImmutableRecordWithoutPickling):
if state not in [ if state not in [
KernelState.INITIAL, KernelState.INITIAL,
KernelState.PREPROCESSED, KernelState.PREPROCESSED,
KernelState.SCHEDULED, KernelState.LINEARIZED,
]: ]:
raise ValueError("invalid value for 'state'") raise ValueError("invalid value for 'state'")
# `linearization` is replacing `schedule`, but we're not changing
# this under the hood yet, so for now, store it inside `schedule`
# and raise deprecation warning anyway
if schedule is not None:
if linearization is not None:
# these should not both be present
raise ValueError(
"received both `schedule` and `linearization` args, "
"'LoopKernel.schedule' is deprecated. "
"Use 'LoopKernel.linearization'.")
warn(
"'LoopKernel.schedule' is deprecated. "
"Use 'LoopKernel.linearization'.",
DeprecationWarning, stacklevel=2)
elif linearization is not None:
schedule = linearization
from collections import defaultdict from collections import defaultdict
assert not isinstance(iname_to_tags, defaultdict) assert not isinstance(iname_to_tags, defaultdict)
...@@ -1345,7 +1378,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): ...@@ -1345,7 +1378,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
if "schedule" in what and kernel.schedule is not None: if "schedule" in what and kernel.schedule is not None:
lines.extend(sep) lines.extend(sep)
if show_labels: if show_labels:
lines.append("SCHEDULE:") lines.append("LINEARIZATION:")
from loopy.schedule import dump_schedule from loopy.schedule import dump_schedule
lines.append(dump_schedule(kernel, kernel.schedule)) lines.append(dump_schedule(kernel, kernel.schedule))
...@@ -1395,6 +1428,14 @@ class LoopKernel(ImmutableRecordWithoutPickling): ...@@ -1395,6 +1428,14 @@ class LoopKernel(ImmutableRecordWithoutPickling):
# }}} # }}}
# {{{ handle linearization variable that doesn't yet exist
@property
def linearization(self):
return self.schedule
# }}}
# {{{ direct execution # {{{ direct execution
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
......
...@@ -186,7 +186,7 @@ class LoopedIlpTag(IlpBaseTag): ...@@ -186,7 +186,7 @@ class LoopedIlpTag(IlpBaseTag):
# }}} # }}}
class VectorizeTag(UniqueTag): class VectorizeTag(UniqueTag, HardwareConcurrentTag):
def __str__(self): def __str__(self):
return "vec" return "vec"
......
...@@ -66,7 +66,8 @@ class InstructionBase(ImmutableRecord): ...@@ -66,7 +66,8 @@ class InstructionBase(ImmutableRecord):
.. attribute:: depends_on_is_final .. attribute:: depends_on_is_final
A :class:`bool` determining whether :attr:`depends_on` constitutes A :class:`bool` determining whether :attr:`depends_on` constitutes
the *entire* list of iname dependencies. the *entire* list of iname dependencies. If *not* marked final,
various semi-broken heuristics will try to add further dependencies.
Defaults to *False*. Defaults to *False*.
...@@ -344,10 +345,13 @@ class InstructionBase(ImmutableRecord): ...@@ -344,10 +345,13 @@ class InstructionBase(ImmutableRecord):
""" """
raise NotImplementedError raise NotImplementedError
def with_transformed_expressions(self, f, *args): def with_transformed_expressions(self, f, assignee_f=None):
"""Return a new copy of *self* where *f* has been applied to every """Return a new copy of *self* where *f* has been applied to every
expression occurring in *self*. *args* will be passed as extra expression occurring in *self*. *args* will be passed as extra
arguments (in addition to the expression) to *f*. arguments (in addition to the expression) to *f*.
If *assignee_f* is passed, then left-hand sides of assignments are
passed to it. If it is not given, it defaults to the same as *f*.
""" """
raise NotImplementedError raise NotImplementedError
...@@ -959,12 +963,15 @@ class Assignment(MultiAssignmentBase): ...@@ -959,12 +963,15 @@ class Assignment(MultiAssignmentBase):
def assignee_subscript_deps(self): def assignee_subscript_deps(self):
return (_get_assignee_subscript_deps(self.assignee),) return (_get_assignee_subscript_deps(self.assignee),)
def with_transformed_expressions(self, f, *args): def with_transformed_expressions(self, f, assignee_f=None):
if assignee_f is None:
assignee_f = f
return self.copy( return self.copy(
assignee=f(self.assignee, *args), assignee=assignee_f(self.assignee),
expression=f(self.expression, *args), expression=f(self.expression),
predicates=frozenset( predicates=frozenset(
f(pred, *args) for pred in self.predicates)) f(pred) for pred in self.predicates))
# }}} # }}}
...@@ -1114,12 +1121,15 @@ class CallInstruction(MultiAssignmentBase): ...@@ -1114,12 +1121,15 @@ class CallInstruction(MultiAssignmentBase):
_get_assignee_subscript_deps(a) _get_assignee_subscript_deps(a)
for a in self.assignees) for a in self.assignees)
def with_transformed_expressions(self, f, *args): def with_transformed_expressions(self, f, assignee_f=None):
if assignee_f is None:
assignee_f = f
return self.copy( return self.copy(
assignees=f(self.assignees, *args), assignees=assignee_f(self.assignees),
expression=f(self.expression, *args), expression=f(self.expression),
predicates=frozenset( predicates=frozenset(
f(pred, *args) for pred in self.predicates)) f(pred) for pred in self.predicates))
# }}} # }}}
...@@ -1315,14 +1325,17 @@ class CInstruction(InstructionBase): ...@@ -1315,14 +1325,17 @@ class CInstruction(InstructionBase):
_get_assignee_subscript_deps(a) _get_assignee_subscript_deps(a)
for a in self.assignees) for a in self.assignees)
def with_transformed_expressions(self, f, *args): def with_transformed_expressions(self, f, assignee_f=None):
if assignee_f is None:
assignee_f = f
return self.copy( return self.copy(
iname_exprs=[ iname_exprs=[
(name, f(expr, *args)) (name, f(expr))
for name, expr in self.iname_exprs], for name, expr in self.iname_exprs],
assignees=[f(a, *args) for a in self.assignees], assignees=[assignee_f(a) for a in self.assignees],
predicates=frozenset( predicates=frozenset(
f(pred, *args) for pred in self.predicates)) f(pred) for pred in self.predicates))
# }}} # }}}
...@@ -1357,7 +1370,7 @@ class _DataObliviousInstruction(InstructionBase): ...@@ -1357,7 +1370,7 @@ class _DataObliviousInstruction(InstructionBase):
def assignee_subscript_deps(self): def assignee_subscript_deps(self):
return frozenset() return frozenset()
def with_transformed_expressions(self, f, *args): def with_transformed_expressions(self, f, assignee_f=None):
return self.copy( return self.copy(
predicates=frozenset( predicates=frozenset(
f(pred) for pred in self.predicates)) f(pred) for pred in self.predicates))
......
...@@ -34,8 +34,7 @@ import numpy as np ...@@ -34,8 +34,7 @@ import numpy as np
import islpy as isl import islpy as isl
from islpy import dim_type from islpy import dim_type
from loopy.diagnostic import LoopyError, warn_with_kernel from loopy.diagnostic import LoopyError, warn_with_kernel
from pytools import memoize_on_first_arg from pytools import memoize_on_first_arg, natsorted
from loopy.tools import natsorted
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -1381,7 +1380,7 @@ def draw_dependencies_as_unicode_arrows( ...@@ -1381,7 +1380,7 @@ def draw_dependencies_as_unicode_arrows(
.replace(style.RESET_ALL, "")) .replace(style.RESET_ALL, ""))
return len(s) return len(s)
def truncate_without_color_escapes(s, l): def truncate_without_color_escapes(s, length):
# FIXME: This is a bit dumb--it removes color escapes when truncation # FIXME: This is a bit dumb--it removes color escapes when truncation
# is needed. # is needed.
...@@ -1389,7 +1388,7 @@ def draw_dependencies_as_unicode_arrows( ...@@ -1389,7 +1388,7 @@ def draw_dependencies_as_unicode_arrows(
.replace(fore.RED, "") .replace(fore.RED, "")
.replace(style.RESET_ALL, "")) .replace(style.RESET_ALL, ""))
return s[:l] + u"" return s[:length] + u""
def conform_to_uniform_length(s): def conform_to_uniform_length(s):
len_s = len_without_color_escapes(s) len_s = len_without_color_escapes(s)
...@@ -1428,6 +1427,8 @@ def stringify_instruction_list(kernel): ...@@ -1428,6 +1427,8 @@ def stringify_instruction_list(kernel):
def insert_insn_into_order(insn): def insert_insn_into_order(insn):
if insn.id in printed_insn_ids: if insn.id in printed_insn_ids:
# Note: dependency cycles are deliberately ignored so that printing
# succeeds.
return return
printed_insn_ids.add(insn.id) printed_insn_ids.add(insn.id)
...@@ -1511,7 +1512,7 @@ def stringify_instruction_list(kernel): ...@@ -1511,7 +1512,7 @@ def stringify_instruction_list(kernel):
", ".join("%s=%s" % (name, expr) ", ".join("%s=%s" % (name, expr)
for name, expr in insn.iname_exprs)) for name, expr in insn.iname_exprs))
trailing = [l for l in insn.code.split("\n")] trailing = insn.code.split("\n")
elif isinstance(insn, lp.BarrierInstruction): elif isinstance(insn, lp.BarrierInstruction):
lhs = "" lhs = ""
rhs = "... %sbarrier" % insn.synchronization_kind[0] rhs = "... %sbarrier" % insn.synchronization_kind[0]
...@@ -1583,6 +1584,13 @@ def stringify_instruction_list(kernel): ...@@ -1583,6 +1584,13 @@ def stringify_instruction_list(kernel):
# {{{ global barrier order finding # {{{ global barrier order finding
def _is_global_barrier(kernel, insn_id):
insn = kernel.id_to_insn[insn_id]
from loopy.kernel.instruction import BarrierInstruction
return isinstance(insn, BarrierInstruction) and \
insn.synchronization_kind == "global"
@memoize_on_first_arg @memoize_on_first_arg
def get_global_barrier_order(kernel): def get_global_barrier_order(kernel):
"""Return a :class:`tuple` of the listing the ids of global barrier instructions """Return a :class:`tuple` of the listing the ids of global barrier instructions
...@@ -1590,49 +1598,27 @@ def get_global_barrier_order(kernel): ...@@ -1590,49 +1598,27 @@ def get_global_barrier_order(kernel):
See also :class:`loopy.instruction.BarrierInstruction`. See also :class:`loopy.instruction.BarrierInstruction`.
""" """
barriers = [] dep_graph = {insn.id: set() for insn in kernel.instructions}
visiting = set() for insn in kernel.instructions:
visited = set() for dep in insn.depends_on:
dep_graph[dep].add(insn.id)
unvisited = set(insn.id for insn in kernel.instructions)
def is_barrier(my_insn_id):
insn = kernel.id_to_insn[my_insn_id]
from loopy.kernel.instruction import BarrierInstruction
return isinstance(insn, BarrierInstruction) and \
insn.synchronization_kind == "global"
while unvisited:
stack = [unvisited.pop()]
while stack:
top = stack[-1]
if top in visiting:
visiting.remove(top)
if is_barrier(top):
barriers.append(top)
if top in visited: from pytools.graph import compute_topological_order
stack.pop() order = compute_topological_order(dep_graph)
continue
visited.add(top) barriers = [
visiting.add(top) insn_id for insn_id in order
if _is_global_barrier(kernel, insn_id)]
for child in kernel.id_to_insn[top].depends_on: del order
# Check for no cycles.
assert child not in visiting
stack.append(child)
# Ensure this is the only possible order. # Ensure this is the only possible order.
# #
# We do this by looking at the barriers in order. # We do this by looking at the barriers in order.
# We check for each adjacent pair (a,b) in the order if a < b, # We check for each adjacent pair (a,b) in the order if a < b,
# i.e. if a is reachable by a chain of dependencies from b. # i.e. if a is reachable by a chain of dependencies from b.
visited = set()
visiting.clear() visiting = set()
visited.clear()
for prev_barrier, barrier in zip(barriers, barriers[1:]): for prev_barrier, barrier in zip(barriers, barriers[1:]):
# Check if prev_barrier is reachable from barrier. # Check if prev_barrier is reachable from barrier.
...@@ -1690,12 +1676,6 @@ def find_most_recent_global_barrier(kernel, insn_id): ...@@ -1690,12 +1676,6 @@ def find_most_recent_global_barrier(kernel, insn_id):
if len(insn.depends_on) == 0: if len(insn.depends_on) == 0:
return None return None
def is_barrier(my_insn_id):
insn = kernel.id_to_insn[my_insn_id]
from loopy.kernel.instruction import BarrierInstruction
return isinstance(insn, BarrierInstruction) and \
insn.synchronization_kind == "global"
global_barrier_to_ordinal = dict( global_barrier_to_ordinal = dict(
(b, i) for i, b in enumerate(global_barrier_order)) (b, i) for i, b in enumerate(global_barrier_order))
...@@ -1705,7 +1685,7 @@ def find_most_recent_global_barrier(kernel, insn_id): ...@@ -1705,7 +1685,7 @@ def find_most_recent_global_barrier(kernel, insn_id):
else -1) else -1)
direct_barrier_dependencies = set( direct_barrier_dependencies = set(
dep for dep in insn.depends_on if is_barrier(dep)) dep for dep in insn.depends_on if _is_global_barrier(kernel, dep))
if len(direct_barrier_dependencies) > 0: if len(direct_barrier_dependencies) > 0:
return max(direct_barrier_dependencies, key=get_barrier_ordinal) return max(direct_barrier_dependencies, key=get_barrier_ordinal)
...@@ -1727,8 +1707,8 @@ def get_subkernels(kernel): ...@@ -1727,8 +1707,8 @@ def get_subkernels(kernel):
See also :class:`loopy.schedule.CallKernel`. See also :class:`loopy.schedule.CallKernel`.
""" """
from loopy.kernel import KernelState from loopy.kernel import KernelState
if kernel.state != KernelState.SCHEDULED: if kernel.state != KernelState.LINEARIZED:
raise LoopyError("Kernel must be scheduled") raise LoopyError("Kernel must be linearized")
from loopy.schedule import CallKernel from loopy.schedule import CallKernel
...@@ -1744,7 +1724,7 @@ def get_subkernel_to_insn_id_map(kernel): ...@@ -1744,7 +1724,7 @@ def get_subkernel_to_insn_id_map(kernel):
kernel must be scheduled. kernel must be scheduled.
""" """
from loopy.kernel import KernelState from loopy.kernel import KernelState
if kernel.state != KernelState.SCHEDULED: if kernel.state != KernelState.LINEARIZED:
raise LoopyError("Kernel must be scheduled") raise LoopyError("Kernel must be scheduled")
from loopy.schedule import ( from loopy.schedule import (
......
...@@ -37,6 +37,7 @@ from loopy.version import DATA_MODEL_VERSION ...@@ -37,6 +37,7 @@ from loopy.version import DATA_MODEL_VERSION
from loopy.kernel.data import make_assignment, filter_iname_tags_by_type from loopy.kernel.data import make_assignment, filter_iname_tags_by_type
# for the benefit of loopy.statistics, for now # for the benefit of loopy.statistics, for now
from loopy.type_inference import infer_unknown_types from loopy.type_inference import infer_unknown_types
from loopy.transform.iname import remove_any_newly_unused_inames
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -289,7 +290,7 @@ def _classify_reduction_inames(kernel, inames): ...@@ -289,7 +290,7 @@ def _classify_reduction_inames(kernel, inames):
nonlocal_par = [] nonlocal_par = []
from loopy.kernel.data import ( from loopy.kernel.data import (
LocalIndexTagBase, UnrolledIlpTag, UnrollTag, VectorizeTag, LocalIndexTagBase, UnrolledIlpTag, UnrollTag,
ConcurrentTag, filter_iname_tags_by_type) ConcurrentTag, filter_iname_tags_by_type)
for iname in inames: for iname in inames:
...@@ -303,7 +304,7 @@ def _classify_reduction_inames(kernel, inames): ...@@ -303,7 +304,7 @@ def _classify_reduction_inames(kernel, inames):
elif filter_iname_tags_by_type(iname_tags, LocalIndexTagBase): elif filter_iname_tags_by_type(iname_tags, LocalIndexTagBase):
local_par.append(iname) local_par.append(iname)
elif filter_iname_tags_by_type(iname_tags, (ConcurrentTag, VectorizeTag)): elif filter_iname_tags_by_type(iname_tags, ConcurrentTag):
nonlocal_par.append(iname) nonlocal_par.append(iname)
else: else:
...@@ -882,6 +883,7 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain): ...@@ -882,6 +883,7 @@ def _insert_subdomain_into_domain_tree(kernel, domains, subdomain):
# }}} # }}}
@remove_any_newly_unused_inames
def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
automagic_scans_ok=False, force_scan=False, automagic_scans_ok=False, force_scan=False,
force_outer_iname_for_scan=None): force_outer_iname_for_scan=None):
...@@ -1370,7 +1372,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, ...@@ -1370,7 +1372,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
track_iname = var_name_gen( track_iname = var_name_gen(
"{sweep_iname}__seq_scan" "{sweep_iname}__seq_scan"
.format(scan_iname=scan_iname, sweep_iname=sweep_iname)) .format(sweep_iname=sweep_iname))
get_or_add_sweep_tracking_iname_and_domain( get_or_add_sweep_tracking_iname_and_domain(
scan_iname, sweep_iname, sweep_min_value, scan_min_value, scan_iname, sweep_iname, sweep_min_value, scan_min_value,
...@@ -1480,7 +1482,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, ...@@ -1480,7 +1482,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
track_iname = var_name_gen( track_iname = var_name_gen(
"{sweep_iname}__pre_scan" "{sweep_iname}__pre_scan"
.format(scan_iname=scan_iname, sweep_iname=sweep_iname)) .format(sweep_iname=sweep_iname))
get_or_add_sweep_tracking_iname_and_domain( get_or_add_sweep_tracking_iname_and_domain(
scan_iname, sweep_iname, sweep_min_value, scan_min_value, stride, scan_iname, sweep_iname, sweep_min_value, scan_min_value, stride,
...@@ -1924,8 +1926,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, ...@@ -1924,8 +1926,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True,
kernel = lp.tag_inames(kernel, new_iname_tags) kernel = lp.tag_inames(kernel, new_iname_tags)
# TODO: remove unused inames...
kernel = ( kernel = (
_hackily_ensure_multi_assignment_return_values_are_scoped_private( _hackily_ensure_multi_assignment_return_values_are_scoped_private(
kernel)) kernel))
...@@ -1979,7 +1979,7 @@ def find_idempotence(kernel): ...@@ -1979,7 +1979,7 @@ def find_idempotence(kernel):
# Find SCCs of dep_graph. These are used for checking if the instruction is # Find SCCs of dep_graph. These are used for checking if the instruction is
# in a dependency cycle. # in a dependency cycle.
from loopy.tools import compute_sccs from pytools.graph import compute_sccs
sccs = dict((item, scc) sccs = dict((item, scc)
for scc in compute_sccs(dep_graph) for scc in compute_sccs(dep_graph)
......
...@@ -212,12 +212,12 @@ def find_loop_nest_with_map(kernel): ...@@ -212,12 +212,12 @@ def find_loop_nest_with_map(kernel):
""" """
result = {} result = {}
from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag from loopy.kernel.data import ConcurrentTag, IlpBaseTag
all_nonpar_inames = set( all_nonpar_inames = set(
iname for iname in kernel.all_inames() iname for iname in kernel.all_inames()
if not kernel.iname_tags_of_type(iname, if not kernel.iname_tags_of_type(iname,
(ConcurrentTag, IlpBaseTag, VectorizeTag))) (ConcurrentTag, IlpBaseTag)))
iname_to_insns = kernel.iname_to_insns() iname_to_insns = kernel.iname_to_insns()
...@@ -276,7 +276,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): ...@@ -276,7 +276,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
result = {} result = {}
from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag from loopy.kernel.data import ConcurrentTag, IlpBaseTag
for insn in kernel.instructions: for insn in kernel.instructions:
for iname in kernel.insn_inames(insn): for iname in kernel.insn_inames(insn):
if kernel.iname_tags_of_type(iname, ConcurrentTag): if kernel.iname_tags_of_type(iname, ConcurrentTag):
...@@ -310,7 +310,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): ...@@ -310,7 +310,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
continue continue
if kernel.iname_tags_of_type(dep_insn_iname, if kernel.iname_tags_of_type(dep_insn_iname,
(ConcurrentTag, IlpBaseTag, VectorizeTag)): (ConcurrentTag, IlpBaseTag)):
# Parallel tags don't really nest, so we'll disregard # Parallel tags don't really nest, so we'll disregard
# them here. # them here.
continue continue
...@@ -1841,7 +1841,7 @@ def generate_loop_schedules(kernel, debug_args={}): ...@@ -1841,7 +1841,7 @@ def generate_loop_schedules(kernel, debug_args={}):
def generate_loop_schedules_inner(kernel, debug_args={}): def generate_loop_schedules_inner(kernel, debug_args={}):
from loopy.kernel import KernelState from loopy.kernel import KernelState
if kernel.state not in (KernelState.PREPROCESSED, KernelState.SCHEDULED): if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED):
raise LoopyError("cannot schedule a kernel that has not been " raise LoopyError("cannot schedule a kernel that has not been "
"preprocessed") "preprocessed")
...@@ -1852,7 +1852,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): ...@@ -1852,7 +1852,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
debug = ScheduleDebugger(**debug_args) debug = ScheduleDebugger(**debug_args)
preschedule = kernel.schedule if kernel.state == KernelState.SCHEDULED else () preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else ()
prescheduled_inames = set( prescheduled_inames = set(
insn.iname insn.iname
...@@ -1904,7 +1904,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}): ...@@ -1904,7 +1904,7 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), unscheduled_insn_ids=set(insn.id for insn in kernel.instructions),
scheduled_insn_ids=frozenset(), scheduled_insn_ids=frozenset(),
within_subkernel=kernel.state != KernelState.SCHEDULED, within_subkernel=kernel.state != KernelState.LINEARIZED,
may_schedule_global_barriers=True, may_schedule_global_barriers=True,
preschedule=preschedule, preschedule=preschedule,
...@@ -1973,11 +1973,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): ...@@ -1973,11 +1973,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
new_kernel = kernel.copy( new_kernel = kernel.copy(
schedule=gen_sched, schedule=gen_sched,
state=KernelState.SCHEDULED) state=KernelState.LINEARIZED)
from loopy.schedule.device_mapping import \ from loopy.schedule.device_mapping import \
map_schedule_onto_host_or_device map_schedule_onto_host_or_device
if kernel.state != KernelState.SCHEDULED: if kernel.state != KernelState.LINEARIZED:
# Device mapper only gets run once. # Device mapper only gets run once.
new_kernel = map_schedule_onto_host_or_device(new_kernel) new_kernel = map_schedule_onto_host_or_device(new_kernel)
...@@ -2029,6 +2029,15 @@ def _get_one_scheduled_kernel_inner(kernel): ...@@ -2029,6 +2029,15 @@ def _get_one_scheduled_kernel_inner(kernel):
def get_one_scheduled_kernel(kernel): def get_one_scheduled_kernel(kernel):
warn_with_kernel(
kernel, "get_one_scheduled_kernel_deprecated",
"get_one_scheduled_kernel is deprecated. "
"Use get_one_linearized_kernel instead.",
DeprecationWarning)
return get_one_linearized_kernel(kernel)
def get_one_linearized_kernel(kernel):
from loopy import CACHING_ENABLED from loopy import CACHING_ENABLED
sched_cache_key = kernel sched_cache_key = kernel
......
...@@ -31,7 +31,7 @@ from loopy.schedule.tools import get_block_boundaries ...@@ -31,7 +31,7 @@ from loopy.schedule.tools import get_block_boundaries
def map_schedule_onto_host_or_device(kernel): def map_schedule_onto_host_or_device(kernel):
# FIXME: Should be idempotent. # FIXME: Should be idempotent.
from loopy.kernel import KernelState from loopy.kernel import KernelState
assert kernel.state == KernelState.SCHEDULED assert kernel.state == KernelState.LINEARIZED
from functools import partial from functools import partial
device_prog_name_gen = partial( device_prog_name_gen = partial(
......
...@@ -1863,75 +1863,4 @@ def gather_access_footprint_bytes(kernel, ignore_uncountable=False): ...@@ -1863,75 +1863,4 @@ def gather_access_footprint_bytes(kernel, ignore_uncountable=False):
# }}} # }}}
# {{{ compat goop
def get_lmem_access_poly(knl):
"""Count the number of local memory accesses in a loopy kernel.
get_lmem_access_poly is deprecated. Use get_mem_access_map and filter the
result with the mtype=['local'] option.
"""
warn_with_kernel(knl, "deprecated_get_lmem_access_poly",
"get_lmem_access_poly is deprecated. Use "
"get_mem_access_map and filter the result with the "
"mtype=['local'] option.")
return get_mem_access_map(knl).filter_by(mtype=['local'])
def get_DRAM_access_poly(knl):
"""Count the number of global memory accesses in a loopy kernel.
get_DRAM_access_poly is deprecated. Use get_mem_access_map and filter the
result with the mtype=['global'] option.
"""
warn_with_kernel(knl, "deprecated_get_DRAM_access_poly",
"get_DRAM_access_poly is deprecated. Use "
"get_mem_access_map and filter the result with the "
"mtype=['global'] option.")
return get_mem_access_map(knl).filter_by(mtype=['global'])
def get_gmem_access_poly(knl):
"""Count the number of global memory accesses in a loopy kernel.
get_DRAM_access_poly is deprecated. Use get_mem_access_map and filter the
result with the mtype=['global'] option.
"""
warn_with_kernel(knl, "deprecated_get_gmem_access_poly",
"get_DRAM_access_poly is deprecated. Use "
"get_mem_access_map and filter the result with the "
"mtype=['global'] option.")
return get_mem_access_map(knl).filter_by(mtype=['global'])
def get_synchronization_poly(knl):
"""Count the number of synchronization events each work-item encounters in
a loopy kernel.
get_synchronization_poly is deprecated. Use get_synchronization_map
instead.
"""
warn_with_kernel(knl, "deprecated_get_synchronization_poly",
"get_synchronization_poly is deprecated. Use "
"get_synchronization_map instead.")
return get_synchronization_map(knl)
def get_op_poly(knl, numpy_types=True):
"""Count the number of operations in a loopy kernel.
get_op_poly is deprecated. Use get_op_map instead.
"""
warn_with_kernel(knl, "deprecated_get_op_poly",
"get_op_poly is deprecated. Use get_op_map instead.")
return get_op_map(knl, numpy_types)
# }}}
# vim: foldmethod=marker # vim: foldmethod=marker
...@@ -273,8 +273,7 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase): ...@@ -273,8 +273,7 @@ class UnidirectionalUnifier(UnidirectionalUnifierBase):
if not isinstance(other, type(expr)): if not isinstance(other, type(expr)):
return self.treat_mismatch(expr, other, unis) return self.treat_mismatch(expr, other, unis)
if (expr.inames != other.inames if (expr.inames != other.inames
or type(expr.operation) != type(other.operation) # noqa or type(expr.operation) != type(other.operation)): # noqa
):
return [] return []
return self.rec(expr.expr, other.expr, unis) return self.rec(expr.expr, other.expr, unis)
...@@ -971,7 +970,8 @@ class RuleAwareIdentityMapper(IdentityMapper): ...@@ -971,7 +970,8 @@ class RuleAwareIdentityMapper(IdentityMapper):
# may perform tasks entirely unrelated to subst rules, so # may perform tasks entirely unrelated to subst rules, so
# we must map assignees, too. # we must map assignees, too.
self.map_instruction(kernel, self.map_instruction(kernel,
insn.with_transformed_expressions(self, kernel, insn)) insn.with_transformed_expressions(
lambda expr: self(expr, kernel, insn)))
for insn in kernel.instructions] for insn in kernel.instructions]
return kernel.copy(instructions=new_insns) return kernel.copy(instructions=new_insns)
......
...@@ -80,6 +80,11 @@ class DTypeRegistryWrapper(object): ...@@ -80,6 +80,11 @@ class DTypeRegistryWrapper(object):
def c99_preamble_generator(preamble_info): def c99_preamble_generator(preamble_info):
if any(dtype.is_integral() for dtype in preamble_info.seen_dtypes): if any(dtype.is_integral() for dtype in preamble_info.seen_dtypes):
yield("10_stdint", "#include <stdint.h>") yield("10_stdint", "#include <stdint.h>")
if any(dtype.numpy_dtype == np.dtype("bool")
for dtype in preamble_info.seen_dtypes):
yield("10_stdbool", "#include <stdbool.h>")
if any(dtype.is_complex() for dtype in preamble_info.seen_dtypes):
yield("10_complex", "#include <complex.h>")
def _preamble_generator(preamble_info): def _preamble_generator(preamble_info):
...@@ -436,7 +441,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): ...@@ -436,7 +441,7 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True):
arg_dtypes=arg_dtypes) arg_dtypes=arg_dtypes)
# binary functions # binary functions
if (name in ["fmax", "fmin"] if (name in ["fmax", "fmin", "copysign"]
and len(arg_dtypes) == 2): and len(arg_dtypes) == 2):
dtype = np.find_common_type( dtype = np.find_common_type(
...@@ -1079,9 +1084,11 @@ class CTarget(CFamilyTarget): ...@@ -1079,9 +1084,11 @@ class CTarget(CFamilyTarget):
@memoize_method @memoize_method
def get_dtype_registry(self): def get_dtype_registry(self):
from loopy.target.c.compyte.dtypes import ( from loopy.target.c.compyte.dtypes import (
DTypeRegistry, fill_registry_with_c99_stdint_types) DTypeRegistry, fill_registry_with_c99_stdint_types,
fill_registry_with_c99_complex_types)
result = DTypeRegistry() result = DTypeRegistry()
fill_registry_with_c99_stdint_types(result) fill_registry_with_c99_stdint_types(result)
fill_registry_with_c99_complex_types(result)
return DTypeRegistryWrapper(result) return DTypeRegistryWrapper(result)
......
Subproject commit 25ee8b48fd0c7d9f0bd987c6862cdb1884fb1372 Subproject commit 7e48e1166a13cfbb7b60f909b071f088034ffda1
# coding: utf-8
"""OpenCL target integrated with PyOpenCL.""" """OpenCL target integrated with PyOpenCL."""
from __future__ import division, absolute_import from __future__ import division, absolute_import
...@@ -285,6 +286,9 @@ class PyOpenCLTarget(OpenCLTarget): ...@@ -285,6 +286,9 @@ class PyOpenCLTarget(OpenCLTarget):
warnings) and support for complex numbers. warnings) and support for complex numbers.
""" """
# FIXME make prefixes conform to naming rules
# (see Reference: Loopy’s Model of a Kernel)
host_program_name_prefix = "_lpy_host_" host_program_name_prefix = "_lpy_host_"
host_program_name_suffix = "" host_program_name_suffix = ""
...@@ -299,7 +303,26 @@ class PyOpenCLTarget(OpenCLTarget): ...@@ -299,7 +303,26 @@ class PyOpenCLTarget(OpenCLTarget):
self.device = device self.device = device
self.pyopencl_module_name = pyopencl_module_name self.pyopencl_module_name = pyopencl_module_name
comparison_fields = ["device"] # NB: Not including 'device', as that is handled specially here.
hash_fields = OpenCLTarget.hash_fields + (
"pyopencl_module_name",)
comparison_fields = OpenCLTarget.comparison_fields + (
"pyopencl_module_name",)
def __eq__(self, other):
if not super(PyOpenCLTarget, self).__eq__(other):
return False
if (self.device is None) != (other.device is None):
return False
if self.device is not None:
assert other.device is not None
return (self.device.persistent_unique_id
== other.device.persistent_unique_id)
else:
assert other.device is None
return True
def update_persistent_hash(self, key_hash, key_builder): def update_persistent_hash(self, key_hash, key_builder):
super(PyOpenCLTarget, self).update_persistent_hash(key_hash, key_builder) super(PyOpenCLTarget, self).update_persistent_hash(key_hash, key_builder)
......