diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 583f7e7bf2b211e07aa29064ca9928c92f95f3c5..a41af3cf1b7559f7813edc478854471bfe09dbac 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -37,9 +37,11 @@ from loopy.kernel.data import ( from loopy.diagnostic import LoopyError, warn_with_kernel import islpy as isl from islpy import dim_type +from pytools import ProcessLogger import six from six.moves import range, zip, intern +import loopy.version import re @@ -1944,10 +1946,9 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): *seq_dependencies* added. """ - from time import time - logger.debug( - "%s: kernel creation start" % kwargs.get("name", "(unnamed)")) - kernel_creation_start_time = time() + creation_plog = ProcessLogger( + logger, + "%s: instantiate" % kwargs.get("name", "(unnamed)")) defines = kwargs.pop("defines", {}) default_order = kwargs.pop("default_order", "C") @@ -1981,12 +1982,18 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): from loopy.options import make_options options = make_options(options) + # {{{ handle kernel language version + + from loopy.version import LANGUAGE_VERSION_SYMBOLS + + version_to_symbol = dict( + (getattr(loopy.version, lvs), lvs) + for lvs in LANGUAGE_VERSION_SYMBOLS) + lang_version = kwargs.pop("lang_version", None) if lang_version is None: # {{{ peek into caller's module to look for LOOPY_KERNEL_LANGUAGE_VERSION - from loopy.version import LANGUAGE_VERSION_SYMBOLS - # This *is* gross. But it seems like the right thing interface-wise. import inspect caller_globals = inspect.currentframe().f_back.f_globals @@ -2000,11 +2007,6 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): # }}} - import loopy.version - version_to_symbol = dict( - (getattr(loopy.version, lvs), lvs) - for lvs in LANGUAGE_VERSION_SYMBOLS) - if lang_version is None: from warnings import warn from loopy.diagnostic import LoopyWarning @@ -2025,14 +2027,15 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): lang_version = FALLBACK_LANGUAGE_VERSION - if lang_version not in version_to_symbol: - raise LoopyError("Language version '%s' is not known." % lang_version) - + if lang_version not in version_to_symbol: + raise LoopyError("Language version '%s' is not known." % (lang_version,)) if lang_version >= (2018, 1): options = options.copy(enforce_variable_access_ordered=True) if lang_version >= (2018, 2): options = options.copy(ignore_boostable_into=True) + # }}} + if isinstance(silenced_warnings, str): silenced_warnings = silenced_warnings.split(";") @@ -2170,14 +2173,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): from loopy.preprocess import prepare_for_caching knl = prepare_for_caching(knl) - creation_elapsed = time() - kernel_creation_start_time - if creation_elapsed > 0.1: - time_logger = logger.info - else: - time_logger = logger.debug - - time_logger( - "%s: kernel creation done after %g s", knl.name, creation_elapsed) + creation_plog.done() return knl diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 2c9964b11ad30c5b6b6ffacacc7f67ef239b50a1..3c9a6baed99b9e3267608570aaa8dac5c73ddff7 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -29,7 +29,7 @@ import sys import islpy as isl from loopy.diagnostic import warn_with_kernel, LoopyError # noqa -from pytools import MinRecursionLimit +from pytools import MinRecursionLimit, ProcessLogger from pytools.persistent_dict import WriteOncePersistentDict from loopy.tools import LoopyKeyBuilder @@ -1930,14 +1930,9 @@ def generate_loop_schedules_inner(kernel, debug_args={}): uses_of_boostability=[]) - generators = [] - - if not kernel.options.ignore_boostable_into: - generators.append(generate_loop_schedules_internal(sched_state, - debug=debug, allow_boost=None)) - - generators.append(generate_loop_schedules_internal(sched_state, - debug=debug)) + schedule_gen_kwargs = {} + if kernel.options.ignore_boostable_into: + schedule_gen_kwargs["allow_boost"] = None def print_longest_dead_end(): if debug.interactive: @@ -1957,8 +1952,8 @@ def generate_loop_schedules_inner(kernel, debug_args={}): debug.debug_length = len(debug.longest_rejected_schedule) while True: try: - for _ in generate_loop_schedules_internal(sched_state, - debug=debug): + for _ in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): pass except ScheduleDebugInput as e: @@ -1968,48 +1963,44 @@ def generate_loop_schedules_inner(kernel, debug_args={}): break try: - for gen in generators: - for gen_sched in gen: - debug.stop() + for gen_sched in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): + debug.stop() - gen_sched = filter_nops_from_schedule(kernel, gen_sched) - gen_sched = convert_barrier_instructions_to_barriers( - kernel, gen_sched) + gen_sched = filter_nops_from_schedule(kernel, gen_sched) + gen_sched = convert_barrier_instructions_to_barriers( + kernel, gen_sched) - gsize, lsize = kernel.get_grid_size_upper_bounds() + gsize, lsize = kernel.get_grid_size_upper_bounds() - if (gsize or lsize): - if not kernel.options.disable_global_barriers: - logger.debug("%s: barrier insertion: global" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="global", verify_only=True) - - logger.debug("%s: barrier insertion: local" % kernel.name) + if (gsize or lsize): + if not kernel.options.disable_global_barriers: + logger.debug("%s: barrier insertion: global" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="local", verify_only=False) - logger.debug("%s: barrier insertion: done" % kernel.name) + synchronization_kind="global", verify_only=True) - new_kernel = kernel.copy( - schedule=gen_sched, - state=kernel_state.SCHEDULED) + logger.debug("%s: barrier insertion: local" % kernel.name) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="local", verify_only=False) + logger.debug("%s: barrier insertion: done" % kernel.name) - from loopy.schedule.device_mapping import \ - map_schedule_onto_host_or_device - if kernel.state != kernel_state.SCHEDULED: - # Device mapper only gets run once. - new_kernel = map_schedule_onto_host_or_device(new_kernel) + new_kernel = kernel.copy( + schedule=gen_sched, + state=kernel_state.SCHEDULED) - from loopy.schedule.tools import add_extra_args_to_schedule - new_kernel = add_extra_args_to_schedule(new_kernel) - yield new_kernel + from loopy.schedule.device_mapping import \ + map_schedule_onto_host_or_device + if kernel.state != kernel_state.SCHEDULED: + # Device mapper only gets run once. + new_kernel = map_schedule_onto_host_or_device(new_kernel) - debug.start() + from loopy.schedule.tools import add_extra_args_to_schedule + new_kernel = add_extra_args_to_schedule(new_kernel) + yield new_kernel - schedule_count += 1 + debug.start() - # if no-boost mode yielded a viable schedule, stop now - if schedule_count: - break + schedule_count += 1 except KeyboardInterrupt: print() @@ -2066,16 +2057,9 @@ def get_one_scheduled_kernel(kernel): pass if not from_cache: - from time import time - start_time = time() - - logger.info("%s: schedule start" % kernel.name) - - with MinRecursionLimitForScheduling(kernel): - result = _get_one_scheduled_kernel_inner(kernel) - - logger.info("%s: scheduling done after %.2f s" % ( - kernel.name, time()-start_time)) + with ProcessLogger(logger, "%s: schedule" % kernel.name): + with MinRecursionLimitForScheduling(kernel): + result = _get_one_scheduled_kernel_inner(kernel) if CACHING_ENABLED and not from_cache: schedule_cache.store_if_not_present(sched_cache_key, result) diff --git a/setup.py b/setup.py index c84a63fe30b6c623090bab9c9b88da77d05ac62d..4229aeb45084cb30b0b8942d0c05d27a1aab2144 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ setup(name="loo.py", ], install_requires=[ - "pytools>=2018.3", + "pytools>=2018.4", "pymbolic>=2016.2", "genpy>=2016.1.2", "cgen>=2016.1",