diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d8101763de864e20bd92c6be0d1fef0e31d1b31..05b2e323793ee19e202f6e89425e26f5f9fb2582 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,8 +4,6 @@ on: branches: - master pull_request: - paths-ignore: - - 'doc/*.rst' schedule: - cron: '17 3 * * 0' diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d69f0b8c489c07d3aa1512f6f1cbb8ced0f6a2e9..f0e9aa0e593784742a9c2587c6e037f0b111d127 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -89,6 +89,8 @@ Python 3 POCL Examples: - python3 - pocl - large-node + # For examples/python/ispc-stream-harness.py + - avx2 except: - tags diff --git a/doc/conf.py b/doc/conf.py index 942afcd3ce11056c65c6a7500bb5ed312dc40187..9b8cf81e11dbbaee53110c36b1e601a80ae0104b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,52 +1,35 @@ -# -# loopy documentation build configuration file, created by -# sphinx-quickstart on Tue Aug 9 13:40:49 2011. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -#import sys import os -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +#needs_sphinx = "1.0" # Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +# coming with Sphinx (named "sphinx.ext.*") or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - #'sphinx.ext.viewcode', - 'sphinx.ext.doctest', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + #"sphinx.ext.viewcode", + "sphinx.ext.doctest", + "sphinx_copybutton", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +#source_encoding = "utf-8-sig" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'loopy' -copyright = '2016, Andreas Klöckner' +project = "loopy" +copyright = "2016, Andreas Klöckner" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -59,7 +42,7 @@ with open(_version_source) as vpy_file: version_py = vpy_file.read() os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] = "1" -exec(compile(version_py, _version_source, 'exec'), ver_dic) +exec(compile(version_py, _version_source, "exec"), ver_dic) version = ".".join(str(x) for x in ver_dic["VERSION"]) # The full version, including alpha/beta/rc tags. release = ver_dic["VERSION_TEXT"] @@ -77,7 +60,7 @@ del os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None @@ -94,7 +77,7 @@ exclude_patterns = ['_build'] #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] @@ -102,135 +85,16 @@ pygments_style = 'sphinx' # -- Options for HTML output --------------------------------------------------- -html_theme = "alabaster" +html_theme = "furo" html_theme_options = { - "extra_nav_links": { - "🚀 Github": "https://github.com/inducer/loopy", - "💾 Download Releases": "https://pypi.org/project/loopy", - } } html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', - 'searchbox.html', - ] -} - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False + } # If true, links to the reST sources are added to the pages. -html_show_sourcelink = False - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'loopydoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'loopy.tex', 'loopy Documentation', - 'Andreas Kloeckner', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True +html_show_sourcelink = True # -- Options for manual page output -------------------------------------------- @@ -238,20 +102,21 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'loopy', 'loopy Documentation', - ['Andreas Kloeckner'], 1) + ("index", "loopy", "loopy Documentation", + ["Andreas Kloeckner"], 1) ] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/3': None, - 'https://documen.tician.de/islpy': None, - 'https://documen.tician.de/pyopencl': None, - 'https://documen.tician.de/cgen': None, - 'https://docs.scipy.org/doc/numpy/': None, - 'https://documen.tician.de/pymbolic': None, - 'https://documen.tician.de/pytools': None, + "https://docs.python.org/3": None, + "https://numpy.org/doc/stable/": None, + "https://documen.tician.de/islpy": None, + "https://documen.tician.de/pyopencl": None, + "https://documen.tician.de/cgen": None, + "https://documen.tician.de/pymbolic": None, + "https://documen.tician.de/pytools": None, } autoclass_content = "class" +autodoc_typehints = "description" diff --git a/doc/index.rst b/doc/index.rst index 8eb996f6b48b4b2526b2114c10fbe94669f87b44..7baff3249a25e69019c06802901538500c1af971 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -46,6 +46,8 @@ Please check :ref:`installation` to get started. ref_other misc ref_internals + 🚀 Github + 💾 Download Releases Indices and tables ================== diff --git a/doc/misc.rst b/doc/misc.rst index 4c8c9867f3ceee2447f9249097c7c30f4d6f501d..e8bcefc65ea5519eafb3ad8b1ec34774b64314ed 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -49,21 +49,18 @@ MacOS support computers: Everywhere else, just making sure you have the ``g++`` package should be enough. -#. Install `miniconda `_. - (Both Python 2 and 3 should work. In the absence of other constraints, prefer Python 3.) +#. Install `miniforge `_. -#. ``export CONDA=/WHERE/YOU/INSTALLED/miniconda3`` +#. ``export CONDA=/WHERE/YOU/INSTALLED/miniforge3`` If you accepted the default location, this should work: - ``export CONDA=$HOME/miniconda3`` + ``export CONDA=$HOME/miniforge3`` #. ``$CONDA/bin/conda create -n dev`` #. ``source $CONDA/bin/activate dev`` -#. ``conda config --add channels conda-forge`` - #. ``conda install git pip pocl islpy pyopencl`` (Linux) or @@ -76,7 +73,7 @@ MacOS support computers: Next time you want to use :mod:`loopy`, just run the following command:: - source /WHERE/YOU/INSTALLED/miniconda3/bin/activate dev + source /WHERE/YOU/INSTALLED/miniforge3/bin/activate dev You may also like to add this to a startup file (like :file:`$HOME/.bashrc`) or create an alias for it. diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index efe147493df36df2f7afa4bac4241b88bb5ce598..2b496c77deeaa58be05ce13021a42dd78d2f9ded 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -3,6 +3,72 @@ Reference: Loopy's Model of a Kernel ==================================== +What Types of Computation can a Loopy Program Express? +------------------------------------------------------ + +Loopy programs consist of an a-priori unordered set of statements, operating +on :math:`n`-dimensional array variables. + +Arrays consist of "plain old data" and structures thereof, as describable +by a :class:`numpy.dtype`. The n-dimensional shape of these arrays is +given by a tuple of expressions at most affine in parameters that are +fixed for the duration of program execution. +Each array variable in the program is either an argument or a temporary +variable. A temporary variable is only live within the program, while +argument variables are accessible outside the program and constitute the +program's inputs and outputs. + +A statement (still called 'instruction' in some places, cf. +:class:`loopy.InstructionBase`) encodes an assignment to an entry of an array. +The right-hand side of an assignment consists of an expression that may +consist of arithmetic operations and calls to functions. +If the outermost operation of the RHS expression is a function call, +the RHS value may be a tuple, and multiple (still scalar) arrays appear +as LHS values. (This is the only sense in which tuple types are supported.) +Each statement is parametrized by zero or more loop variables ("inames"). +A statement is executed once for each integer point defined by the domain +forest for the iname tuple given for that statement +(:attr:`loopy.InstructionBase.within_inames`). Each execution of a +statement (with specific values of the inames) is called a *statement +instance*. Dependencies between these instances as well as instances of +other statements are encoded in the program representation and specify permissible +execution orderings. (The semantics of the dependencies are `being +sharpened `__.) Assignments +(comprising the evaluation of the RHS and the assignment to the LHS) may +be specified to be atomic. + +The basic building blocks of the domain forest are sets given as +conjunctions of equalities and inequalities of quasi-affine expressions on +integer tuples, called domains, and represented as instances of +:class:`islpy.BasicSet`. The entries of each integer tuple are +either *parameters* or *inames*. Each domain may optionally have a *parent +domain*. Parameters of parent-less domains are given by value arguments +supplied to the program that will remain unchanged during program +execution. Parameters of domains with parents may be + +- run-time-constant value arguments to the program, or +- inames from parent domains, or +- scalar, integer temporary variables that are written by statements + with iteration domains controlled by a parent domain. + +For each tuple of concrete parameter values, the set of iname tuples must be +finite. Each iname is defined by exactly one domain. + +For a tuple of inames, the domain forest defines an iteration domain +by finding all the domains defining the inames involved, along with their +parent domains. The resulting tree of domains may contain multiple roots, +but no branches. The iteration domain is then constructed by intersecting +these domains and constructing the projection of that set onto the space +given by the required iname tuple. Observe that, via the parent-child +domain mechanism, imperfectly-nested and data-dependent loops become +expressible. + +The set of functions callable from the language is predefined by the system. +Additional functions may be defined by the user by registering them. It is +not currently possible to define functions from within Loopy, however work +is progressing on permitting this. Even once this is allowed, recursion +will not be permitted. + .. _domain-tree: Loop Domain Forest diff --git a/loopy/__init__.py b/loopy/__init__.py index 66ba75024f1cbd80349025991af6927346db88b6..134daf9cc40cbcff04d37def32a3c5219b51e7f4 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -120,6 +120,9 @@ from loopy.type_inference import infer_unknown_types from loopy.preprocess import preprocess_kernel, realize_reduction from loopy.schedule import ( generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel) +from loopy.schedule.checker import ( + create_dependencies_from_legacy_knl, + check_linearization_validity) from loopy.statistics import (ToCountMap, CountGranularity, stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map, get_synchronization_map, gather_access_footprints, @@ -245,6 +248,8 @@ __all__ = [ "preprocess_kernel", "realize_reduction", "generate_loop_schedules", "get_one_scheduled_kernel", "get_one_linearized_kernel", + "create_dependencies_from_legacy_knl", + "check_linearization_validity", "GeneratedProgram", "CodeGenerationResult", "PreambleInfo", "generate_code", "generate_code_v2", "generate_body", diff --git a/loopy/check.py b/loopy/check.py index e66af04d2fe4dfc2e1f5a99281783feecec2bee7..0bf02f7cf7425f0a277a200a1bdc51c60347fd57 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -24,8 +24,7 @@ THE SOFTWARE. from islpy import dim_type import islpy as isl from loopy.symbolic import WalkMapper -from loopy.diagnostic import (LoopyError, WriteRaceConditionWarning, - warn_with_kernel, ExpressionToAffineConversionError) +from loopy.diagnostic import LoopyError, WriteRaceConditionWarning, warn_with_kernel from loopy.type_inference import TypeInferenceMapper from loopy.kernel.instruction import (MultiAssignmentBase, CallInstruction, CInstruction, _DataObliviousInstruction) @@ -216,7 +215,7 @@ def check_for_double_use_of_hw_axes(kernel): for insn in kernel.instructions: insn_tag_keys = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: for tag in kernel.iname_tags_of_type(iname, UniqueTag): key = tag.key if key in insn_tag_keys: @@ -233,12 +232,12 @@ def check_for_inactive_iname_access(kernel): for insn in kernel.instructions: expression_inames = insn.read_dependency_names() & kernel.all_inames() - if not expression_inames <= kernel.insn_inames(insn): + if not expression_inames <= insn.within_inames: raise LoopyError( "instruction '%s' references " "inames '%s' that the instruction does not depend on" % (insn.id, - ", ".join(expression_inames - kernel.insn_inames(insn)))) + ", ".join(expression_inames - insn.within_inames))) def check_for_unused_inames(kernel): @@ -294,7 +293,7 @@ def check_for_write_races(kernel): insn.assignee_var_names(), insn.assignee_subscript_deps()): assignee_inames = assignee_indices & kernel.all_inames() - if not assignee_inames <= kernel.insn_inames(insn): + if not assignee_inames <= insn.within_inames: raise LoopyError( "assignee of instructions '%s' references " "iname that the instruction does not depend on" @@ -305,13 +304,13 @@ def check_for_write_races(kernel): # will cause write races. raceable_parallel_insn_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, ConcurrentTag)} elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] raceable_parallel_insn_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if any(_is_racing_iname_tag(temp_var, tag) for tag in kernel.iname_tags(iname))} @@ -445,19 +444,14 @@ class _AccessCheckMapper(WalkMapper): % (expr, self.insn_id, access_range, shape_domain)) def map_if(self, expr, domain): - from loopy.symbolic import get_dependencies - if get_dependencies(expr.condition) <= frozenset( - domain.space.get_var_dict()): - try: - from loopy.symbolic import isl_set_from_expr - then_set = isl_set_from_expr(domain.space, expr.condition) - else_set = then_set.complement() - except ExpressionToAffineConversionError: - # non-affine condition: can't do much - then_set = else_set = isl.BasicSet.universe(domain.space) - else: - # data-dependent condition: can't do much + from loopy.symbolic import condition_to_set + then_set = condition_to_set(domain.space, expr.condition) + if then_set is None: + # condition cannot be inferred as ISL expression => ignore + # for domain contributions enforced by it then_set = else_set = isl.BasicSet.universe(domain.space) + else: + else_set = then_set.complement() self.rec(expr.then, domain & then_set) self.rec(expr.else_, domain & else_set) @@ -467,9 +461,10 @@ def check_bounds(kernel): """ Performs out-of-bound check for every array access. """ + from loopy.kernel.instruction import get_insn_domain temp_var_names = set(kernel.temporary_variables) for insn in kernel.instructions: - domain = kernel.get_inames_domain(kernel.insn_inames(insn)) + domain = get_insn_domain(insn, kernel) # data-dependent bounds? can't do much if set(domain.get_var_names(dim_type.param)) & temp_var_names: @@ -496,7 +491,7 @@ def check_write_destinations(kernel): if wvar in kernel.all_inames(): raise LoopyError("iname '%s' may not be written" % wvar) - insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn)) + insn_domain = kernel.get_inames_domain(insn.within_inames) insn_params = set(insn_domain.get_var_names(dim_type.param)) if wvar in kernel.all_params(): @@ -941,7 +936,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): group_axes_used = set() local_axes_used = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: ltags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) gtags = kernel.iname_tags_of_type(iname, GroupIndexTag, max_num=1) altags = kernel.iname_tags_of_type( @@ -1197,7 +1192,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None): assert idomains - insn_inames = kernel.insn_inames(insn) + insn_inames = insn.within_inames # {{{ if we've checked the same thing before, no need to check it again @@ -1274,7 +1269,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None): iname_to_dim = pt.get_space().get_var_dict() point_axes = [] - for iname in kernel.insn_inames(insn) | parameter_inames: + for iname in insn_inames | parameter_inames: tp, dim = iname_to_dim[iname] point_axes.append("%s=%d" % ( iname, pt.get_coordinate_val(tp, dim).to_python())) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index cbae4eac5ed796090c52c40a7fde4b6ebeed36a0..0f5d824cc752a372023cc177c780b2606593a0f7 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -146,13 +146,18 @@ class SeenFunction(ImmutableRecord): .. attribute:: arg_dtypes a tuple of arg dtypes + + .. attribute:: result_dtypes + + a tuple of result dtypes """ - def __init__(self, name, c_name, arg_dtypes): + def __init__(self, name, c_name, arg_dtypes, result_dtypes): ImmutableRecord.__init__(self, name=name, c_name=c_name, - arg_dtypes=arg_dtypes) + arg_dtypes=arg_dtypes, + result_dtypes=result_dtypes) class CodeGenerationState: diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 71133ef7cf2a29be1a8673e99a81f21544f5404a..14efb64f4618c025a319564ebef3e0232800aecc 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -89,7 +89,7 @@ def generate_instruction_code(codegen_state, insn): else: raise RuntimeError("unexpected instruction type") - insn_inames = kernel.insn_inames(insn) + insn_inames = insn.within_inames return to_codegen_result( codegen_state, diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9088f3bfe5f56c42884ab196c4cfc04d8341e3ef..b24cde2c419cc3fb549473cb620e040520a29a07 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -824,7 +824,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): result = { iname: set() for iname in self.all_inames()} for insn in self.instructions: - for iname in self.insn_inames(insn): + for iname in insn.within_inames: result[iname].add(insn.id) return result @@ -1561,10 +1561,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): for field_name in self.hash_fields: key_builder.rec(key_hash, getattr(self, field_name)) + @memoize_method def __hash__(self): from loopy.tools import LoopyKeyBuilder - from pytools.persistent_dict import new_hash - key_hash = new_hash() + import hashlib + key_hash = hashlib.sha256() self.update_persistent_hash(key_hash, LoopyKeyBuilder()) return hash(key_hash.digest()) diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 6b0248f4f9c18001ef23b0c1551316d9cb6ad065..9fd166ab8f15bdc97006c94c7d03977b64c08292 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -26,6 +26,7 @@ THE SOFTWARE. import re from pytools import ImmutableRecord, memoize_method +from pytools.tag import Taggable import numpy as np # noqa @@ -136,6 +137,12 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase): return self.stringify(True) def map_expr(self, mapper): + from loopy.kernel.data import auto + + if self.stride is auto: + # lp.auto not an expr => do not map + return self + return self.copy(stride=mapper(self.stride)) @@ -557,7 +564,7 @@ def _parse_shape_or_strides(x): return tuple(_pymbolic_parse_if_necessary(xi) for xi in x) -class ArrayBase(ImmutableRecord): +class ArrayBase(ImmutableRecord, Taggable): """ .. attribute :: name @@ -600,7 +607,8 @@ class ArrayBase(ImmutableRecord): .. attribute:: offset Offset from the beginning of the buffer to the point from - which the strides are counted. May be one of + which the strides are counted, in units of the :attr:`dtype`. + May be one of * 0 or None * a string (that is interpreted as an argument name). @@ -636,6 +644,14 @@ class ArrayBase(ImmutableRecord): .. versionadded:: 2018.1 + .. attribute:: tags + + A (possibly empty) frozenset of instances of + :class:`pytools.tag.Tag` intended for + consumption by an application. + + .. versionadded:: 2020.2.2 + .. automethod:: __init__ .. automethod:: __eq__ .. automethod:: num_user_axes @@ -652,8 +668,7 @@ class ArrayBase(ImmutableRecord): def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, - target=None, alignment=None, - **kwargs): + target=None, alignment=None, tags=None, **kwargs): """ All of the following (except *name*) are optional. Specify either strides or shape. @@ -691,7 +706,8 @@ class ArrayBase(ImmutableRecord): using atomic-capable data types. :arg offset: (See :attr:`offset`) :arg alignment: memory alignment in bytes - + :arg tags: An instance of or an Iterable of instances of + :class:`pytools.tag.Tag`. """ for kwarg_name in kwargs: @@ -848,6 +864,7 @@ class ArrayBase(ImmutableRecord): order=order, alignment=alignment, for_atomic=for_atomic, + tags=tags, **kwargs) def __eq__(self, other): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a22fef9e8021d55759a9f0a2c0f4f23bfe35df80..94534382f19790936152661b48d4d515e9e0e129 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1523,7 +1523,7 @@ def determine_shapes_of_temporaries(knl): def feed_all_expressions(receiver): for insn in knl.instructions: insn.with_transformed_expressions( - lambda expr: receiver(expr, knl.insn_inames(insn))) + lambda expr: receiver(expr, insn.within_inames)) var_to_base_indices, var_to_shape, var_to_error = ( find_shapes_of_vars( @@ -1543,7 +1543,7 @@ def determine_shapes_of_temporaries(knl): def feed_assignee_of_instruction(receiver): for insn in knl.instructions: for assignee in insn.assignees: - receiver(assignee, knl.insn_inames(insn)) + receiver(assignee, insn.within_inames) var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = ( find_shapes_of_vars( diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 43770ffb6d0f2ae08d8967baa03fedea669343ed..6e454d925167fd6344a7d4cd30c83f28f6ac2e23 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -27,6 +27,7 @@ THE SOFTWARE. from sys import intern import numpy as np # noqa from pytools import ImmutableRecord +from pytools.tag import Taggable from loopy.kernel.array import ArrayBase from loopy.diagnostic import LoopyError from loopy.kernel.instruction import ( # noqa @@ -357,7 +358,6 @@ class KernelArgument(ImmutableRecord): DeprecationWarning, stacklevel=2) dtype = None - kwargs["dtype"] = dtype ImmutableRecord.__init__(self, **kwargs) @@ -379,13 +379,13 @@ class ArrayArg(ArrayBase, KernelArgument): allowed_extra_kwargs = [ "address_space", - "is_output_only"] + "is_output_only", + "tags"] def __init__(self, *args, **kwargs): if "address_space" not in kwargs: raise TypeError("'address_space' must be specified") kwargs["is_output_only"] = kwargs.pop("is_output_only", False) - super().__init__(*args, **kwargs) min_target_axes = 0 @@ -451,15 +451,29 @@ class ImageArg(ArrayBase, KernelArgument): self.num_target_axes(), dtype, is_written) -class ValueArg(KernelArgument): +""" + :attribute tags: A (possibly empty) frozenset of instances of + :class:`pytools.tag.Tag` intended for consumption by an + application. + + ..versionadded: 2020.2.2 +""" + + +class ValueArg(KernelArgument, Taggable): def __init__(self, name, dtype=None, approximately=1000, target=None, - is_output_only=False): + is_output_only=False, tags=None): + """ + :arg tags: A an instance of or Iterable of instances of + :class:`pytools.tag.Tag` intended for consumption by an + application. + """ KernelArgument.__init__(self, name=name, dtype=dtype, approximately=approximately, target=target, - is_output_only=is_output_only) + is_output_only=is_output_only, tags=tags) def __str__(self): import loopy as lp diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 791ea89a6521c58cfe9281723ea8d83f83baf84a..101d16624c6698bf6f8ac45c5154b0fab4e6e9f5 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -25,6 +25,7 @@ from pytools import ImmutableRecord, memoize_method from loopy.diagnostic import LoopyError from loopy.tools import Optional from warnings import warn +import islpy as isl # {{{ instructions: base class @@ -1438,4 +1439,49 @@ def _check_and_fix_temp_var_type(temp_var_type, stacklevel=2): # }}} +def get_insn_domain(insn, kernel): + """ + Returns an instance of :class:`islpy.Set` for the *insn*'s domain. + + .. note:: + + Does not take into account additional hints available through + :attr:`loopy.LoopKernel.assumptions`. + """ + domain = kernel.get_inames_domain(insn.within_inames) + + # {{{ add read-only ValueArgs to domain + + from loopy.kernel.data import ValueArg + + valueargs_to_add = ({arg.name for arg in kernel.args + if isinstance(arg, ValueArg) + and arg.name not in kernel.get_written_variables()} + - set(domain.get_var_names(isl.dim_type.param))) + + # only consider valueargs relevant to *insn* + valueargs_to_add = valueargs_to_add & insn.read_dependency_names() + + for arg_to_add in valueargs_to_add: + idim = domain.dim(isl.dim_type.param) + domain = domain.add_dims(isl.dim_type.param, 1) + domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add) + + # }}} + + # {{{ enforce restriction from predicates + + insn_preds_set = isl.BasicSet.universe(domain.space) + + for predicate in insn.predicates: + from loopy.symbolic import condition_to_set + predicate_as_isl_set = condition_to_set(domain.space, predicate) + if predicate_as_isl_set is not None: + insn_preds_set = insn_preds_set & predicate_as_isl_set + + # }}} + + return domain & insn_preds_set + + # vim: foldmethod=marker diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 0b8d9841ee77020149a1f246a301e9c422b202e6..541bb45ce52821d00e3e255ad600c392f535d303 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -685,7 +685,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)} # }}} @@ -744,7 +744,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): if aggregate_strides: very_large_stride = int(np.iinfo(np.int32).max) - return sorted((iname for iname in kernel.insn_inames(insn)), + return sorted((iname for iname in insn.within_inames), key=lambda iname: ( aggregate_strides.get(iname, very_large_stride), iname)) @@ -885,7 +885,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): continue auto_axis_inames = [ - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)] if not auto_axis_inames: @@ -893,7 +893,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: tags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) if tags: tag, = tags @@ -1000,7 +1000,7 @@ def guess_var_shape(kernel, var_name): submap = SubstitutionRuleExpander(kernel.substitutions) def run_through_armap(expr): - armap(submap(expr), kernel.insn_inames(insn)) + armap(submap(expr), insn.within_inames) return expr try: @@ -1533,7 +1533,7 @@ def stringify_instruction_list(kernel): raise LoopyError("unexpected instruction type: %s" % type(insn).__name__) - adapt_to_new_inames_list(kernel.insn_inames(insn)) + adapt_to_new_inames_list(insn.within_inames) options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL] if insn.priority: diff --git a/loopy/options.py b/loopy/options.py index 2dc8f22cd8a205da89d86b5157af8792a37111ed..46ff37947b66c02e3751a815ab660d9807e86724 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -98,6 +98,12 @@ class Options(ImmutableRecord): Do not do any checking (data type, data layout, shape, etc.) on arguments for a minor performance gain. + .. versionchanged:: 2021.1 + + This now defaults to the same value as the ``optimize`` + sub-flag from :data:`sys.flags`. This flag can be controlled + (i.e. set to *True*) by running Python with the ``-O`` flag. + .. attribute:: no_numpy Do not check for or accept :mod:`numpy` arrays as @@ -196,6 +202,7 @@ class Options(ImmutableRecord): allow_terminal_colors_def = ( ALLOW_TERMINAL_COLORS and allow_terminal_colors_def) + import sys ImmutableRecord.__init__( self, @@ -203,7 +210,7 @@ class Options(ImmutableRecord): trace_assignments=kwargs.get("trace_assignments", False), trace_assignment_values=kwargs.get("trace_assignment_values", False), - skip_arg_checks=kwargs.get("skip_arg_checks", False), + skip_arg_checks=kwargs.get("skip_arg_checks", sys.flags.optimize), no_numpy=kwargs.get("no_numpy", False), cl_exec_manage_array_events=kwargs.get("no_numpy", True), return_dict=kwargs.get("return_dict", False), diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 12f1cb4691cf749ebd147c65582900c9ce3dce04..40b5827343ae7c4cf2fb2886d88a5324c930285a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -256,7 +256,6 @@ def find_temporary_address_space(kernel): overall_aspace = max(desired_aspace_per_insn) - from pytools import all if not all(iaspace == overall_aspace for iaspace in desired_aspace_per_insn): raise LoopyError("not all instructions agree on the " "the desired address space (private/local/global) of the " @@ -1004,7 +1003,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_reduction_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import AddressSpace acc_var_names = make_temporaries( @@ -1041,7 +1040,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, update_id = insn_id_gen( based_on="{}_{}_update".format(insn.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames) + update_insn_iname_deps = insn.within_inames | set(expr.inames) if insn.within_inames_is_final: update_insn_iname_deps = insn.within_inames | set(expr.inames) @@ -1126,7 +1125,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, size = _get_int_iname_size(red_iname) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames @@ -1363,7 +1362,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_scan_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_iname, sweep_min_value, scan_min_value, stride): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames inames_to_remove.add(scan_iname) track_iname = var_name_gen( @@ -1417,7 +1416,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, update_id = insn_id_gen( based_on="{}_{}_update".format(insn.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | {track_iname} + update_insn_iname_deps = insn.within_inames | {track_iname} if insn.within_inames_is_final: update_insn_iname_deps = insn.within_inames | {track_iname} @@ -1461,7 +1460,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, return map_reduction_seq( expr, rec, nresults, arg_dtypes, reduction_dtypes) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames @@ -1668,7 +1667,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, infer_arg_and_reduction_dtypes_for_reduction_expression( temp_kernel, expr, unknown_types_ok)) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames bad_inames = frozenset(expr.inames) & outer_insn_inames if bad_inames: raise LoopyError("reduction used within loop(s) that it was " @@ -1854,7 +1853,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, no_sync_with=insn.no_sync_with | frozenset(new_insn_add_no_sync_with), within_inames=( - temp_kernel.insn_inames(insn) + insn.within_inames | new_insn_add_within_inames)) kwargs.pop("id") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 936c7c4d605cfcaebe57d4d61b862000b0b3bc3c..ccfe0d5ff9b403b9ed68bfabf7d69ec36bd66b57 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -182,7 +182,6 @@ def has_barrier_within(kernel, sched_index): if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( kernel.schedule, sched_index) - from pytools import any return any(isinstance(subsched_item, Barrier) for subsched_item in loop_contents) elif isinstance(sched_item, Barrier): @@ -296,7 +295,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = kernel.insn_inames(dep_insn) + dep_insn_inames = dep_insn.within_inames if iname in dep_insn_inames: # Nothing to be learned, dependency is in loop over iname @@ -940,7 +939,7 @@ def generate_loop_schedules_internal( if not is_ready: continue - want = kernel.insn_inames(insn) - sched_state.parallel_inames + want = insn.within_inames - sched_state.parallel_inames have = active_inames_set - sched_state.parallel_inames if want != have: @@ -1046,8 +1045,9 @@ def generate_loop_schedules_internal( sched_state.active_group_counts.keys()): new_insn_ids_to_try = None - new_toposorted_insns = sched_state.insns_in_topologically_sorted_order[:] - new_toposorted_insns.remove(insn) + # explicitly use id to compare to avoid performance issues like #199 + new_toposorted_insns = [x for x in + sched_state.insns_in_topologically_sorted_order if x.id != insn.id] # }}} @@ -1106,7 +1106,7 @@ def generate_loop_schedules_internal( for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] - if last_entered_loop in kernel.insn_inames(insn): + if last_entered_loop in insn.within_inames: if debug_mode: print("cannot leave '%s' because '%s' still depends on it" % (last_entered_loop, format_insn(kernel, insn.id))) @@ -1294,7 +1294,7 @@ def generate_loop_schedules_internal( for insn_id in reachable_insn_ids: insn = kernel.id_to_insn[insn_id] - want = kernel.insn_inames(insn) + want = insn.within_inames if hypothetically_active_loops <= want: if usefulness is None: diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py index f9e9933c61783272b23b555fe3a0bddf8585c20a..a1cc64cf700eb21976d2b9c4bf10795c55b87fb7 100644 --- a/loopy/schedule/checker/__init__.py +++ b/loopy/schedule/checker/__init__.py @@ -139,3 +139,240 @@ def get_schedules_for_statement_pairs( # }}} # }}} + + +def create_dependencies_from_legacy_knl(knl): + """Return a set of + :class:`loopy.schedule.checker.dependency.TBD` + instances created for a :class:`loopy.LoopKernel` containing legacy + depencencies. + + Create the new dependencies according to the following rules: + + (1) If a dependency exists between ``insn0`` and ``insn1``, create the + dependnecy ``SAME(SNC)`` where ``SNC`` is the set of non-concurrent inames + used by both ``insn0`` and ``insn1``, and ``SAME`` is the relationship + specified by the ``SAME`` attribute of + :class:`loopy.schedule.checker.dependency.LegacyDependencyType`. + + (2) For each subset of non-concurrent inames used by any instruction, + + (a), find the set of all instructions using those inames, + + (b), create a directed graph with these instructions as nodes and + edges representing a 'happens before' relationship specfied by + each dependency, + + (c), find the sources and sinks within this graph, and + + (d), connect each sink to each source (sink happens before source) + with a ``PRIOR(SNC)`` dependency, where ``PRIOR`` is the + relationship specified by the ``PRIOR`` attribute of + :class:`loopy.schedule.checker.dependency.LegacyDependencyType`. + + """ + + from loopy.schedule.checker.dependency import ( + create_legacy_dependency_constraint, + get_dependency_sources_and_sinks, + LegacyDependencyType, + ) + from loopy.schedule.checker.utils import ( + partition_inames_by_concurrency, + get_all_nonconcurrent_insn_iname_subsets, + get_linearization_item_ids_within_inames, + ) + + # Preprocess if not already preprocessed + # note: kernels must always be preprocessed before scheduling + from loopy import preprocess_kernel + preprocessed_knl = preprocess_kernel(knl) + + # TODO instead of keeping these in a set, attach each one to depender insn + + # Create constraint maps from kernel dependencies + #dep_maps = set() # TODO update other stuff after this change + dep_maps = {} + + # Introduce SAME dep for set of shared, non-concurrent inames + + conc_inames, non_conc_inames = partition_inames_by_concurrency( + preprocessed_knl) + for insn_after in preprocessed_knl.instructions: + for insn_before_id in insn_after.depends_on: + insn_before = preprocessed_knl.id_to_insn[insn_before_id] + insn_before_inames = insn_before.within_inames + insn_after_inames = insn_after.within_inames + shared_non_conc_inames = ( + insn_before_inames & insn_after_inames & non_conc_inames) + + # TODO what to do if there is already a dep from insn_before->insn_after? + # (currently just add a new one) + + # create a map representing constraints from the dependency, + # which maps statement instance to all stmt instances that must occur + # later and is acquired from the non-preprocessed kernel + constraint_map = create_legacy_dependency_constraint( + preprocessed_knl, + insn_before_id, + insn_after.id, + {LegacyDependencyType.SAME: shared_non_conc_inames}, + ) + + """ + dep_maps.add(( + insn_before_id, + insn_after.id, + constraint_map, + )) + """ + dep_maps.setdefault( + (insn_before_id, insn_after.id), [] + ).append(constraint_map) + + # loop-carried deps ------------------------------------------ + + # Go through insns and get all unique insn.depends_on iname sets + non_conc_iname_subsets = get_all_nonconcurrent_insn_iname_subsets( + preprocessed_knl, exclude_empty=True, non_conc_inames=non_conc_inames) + + # For each set of insns within a given iname set, find sources and sinks. + # Then make PRIOR dep from all sinks to all sources at previous iterations + for iname_subset in non_conc_iname_subsets: + # find items within this iname set + linearization_item_ids = get_linearization_item_ids_within_inames( + preprocessed_knl, iname_subset) + + # find sources and sinks + sources, sinks = get_dependency_sources_and_sinks( + preprocessed_knl, linearization_item_ids) + + # create prior deps + + # in future, consider inserting single no-op source and sink + for source_id in sources: + for sink_id in sinks: + sink_insn_inames = preprocessed_knl.id_to_insn[sink_id].within_inames + source_insn_inames = preprocessed_knl.id_to_insn[ + source_id].within_inames + shared_non_conc_inames = ( + sink_insn_inames & source_insn_inames & non_conc_inames) + + # create a map representing constraints from the dependency, + # which maps statement instance to all stmt instances that must occur + # later and is acquired from the non-preprocessed kernel + constraint_map = create_legacy_dependency_constraint( + preprocessed_knl, + sink_id, + source_id, + {LegacyDependencyType.PRIOR: shared_non_conc_inames}, + ) + + # TODO what if there is already a different dep from sink->source? + """ + dep_maps.add(( + sink_id, + source_id, + constraint_map, + )) + """ + dep_maps.setdefault( + (sink_id, source_id), [] + ).append(constraint_map) + + return dep_maps + + +def check_linearization_validity( + knl, + dep_maps, + linearization_items, + ): + # TODO document + + from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, + ) + from loopy.schedule.checker.utils import ( + prettier_map_string, + ) + from loopy.schedule.checker.schedule import ( + get_lex_order_map_for_sched_space, + ) + + # Preprocess if not already preprocessed + # note: kernels must always be preprocessed before scheduling + from loopy import preprocess_kernel + preprocessed_knl = preprocess_kernel(knl) + + schedules = get_schedules_for_statement_pairs( + preprocessed_knl, + linearization_items, + dep_maps.keys(), + ) + + # For each dependency, create+test linearization containing pair of insns------ + linearization_is_valid = True + for (insn_id_before, insn_id_after), constraint_maps in dep_maps.items(): + + constraint_map = constraint_maps[0] # TODO handle multple properly? + + # Get two isl maps from {statement instance: lex point}, + # one for each linearization item involved in the dependency + isl_sched_map_before, isl_sched_map_after = schedules[ + (insn_id_before, insn_id_after)] + + # get map representing lexicographic ordering + sched_lex_order_map = get_lex_order_map_for_sched_space(isl_sched_map_before) + + # create statement instance ordering, + # maps each statement instance to all statement instances occuring later + sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + ) + + # reorder variables/params in constraint map space to match SIO so we can + # check to see whether the constraint map is a subset of the SIO + # (spaces must be aligned so that the variables in the constraint map + # correspond to the same variables in the SIO) + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + ) + + aligned_constraint_map = ensure_dim_names_match_and_align( + constraint_map, sio) + + import islpy as isl + assert aligned_constraint_map.space == sio.space + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) + assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + + if not aligned_constraint_map.is_subset(sio): + + linearization_is_valid = False + + print("================ constraint check failure =================") + print("Constraint map not subset of SIO") + print("Dependencies:") + print(insn_id_before+"->"+insn_id_after) + print(prettier_map_string(constraint_map)) + print("Statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(prettier_map_string(aligned_constraint_map.gist(sio))) + print("sio.gist(constraint_map)") + print(prettier_map_string(sio.gist(aligned_constraint_map))) + print("Loop priority known:") + print(preprocessed_knl.loop_priority) + print("===========================================================") + + return linearization_is_valid diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py new file mode 100644 index 0000000000000000000000000000000000000000..e8855f230a8083b1c310e6a811e506770953a849 --- /dev/null +++ b/loopy/schedule/checker/dependency.py @@ -0,0 +1,635 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +class LegacyDependencyType: + """Strings specifying a particular type of dependency relationship. + + .. attribute:: SAME + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff SAME({i, j})`` specifies that + ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``. + Note that ``SAME({}) = True``. + + .. attribute:: PRIOR + + A :class:`str` specifying the following dependency relationship: + + If ``S = {i, j, k, ...}`` is a set of inames used in both statements + ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values + of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the + values of the inames in ``insn1``, then the dependency + ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of + two possibilities, depending on whether the loop nest ordering is + known. If the loop nest ordering is unknown, then + ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``. + If the loop nest ordering is known, the condition becomes + ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``, + i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``. + + """ + + SAME = "same" + PRIOR = "prior" + + +def create_elementwise_comparison_conjunction_set( + names0, names1, islvars, op="eq"): + """Create a set constrained by the conjunction of conditions comparing + `names0` to `names1`. + + :arg names0: A list of :class:`str` representing variable names. + + :arg names1: A list of :class:`str` representing variable names. + + :arg islvars: A dictionary from variable names to :class:`islpy.PwAff` + instances that represent each of the variables + (islvars may be produced by `islpy.make_zero_and_vars`). The key + '0' is also include and represents a :class:`islpy.PwAff` zero constant. + + :arg op: A :class:`str` describing the operator to use when creating + the set constraints. Options: `eq` for `=`, `lt` for `<` + + :returns: A set involving `islvars` cosntrained by the constraints + `{names0[0] names1[0] and names0[1] names1[1] and ...}`. + + """ + + # initialize set with constraint that is always true + conj_set = islvars[0].eq_set(islvars[0]) + for n0, n1 in zip(names0, names1): + if op == "eq": + conj_set = conj_set & islvars[n0].eq_set(islvars[n1]) + elif op == "lt": + conj_set = conj_set & islvars[n0].lt_set(islvars[n1]) + + return conj_set + + +def _convert_constraint_set_to_map(constraint_set, mv_count, src_position=None): + dim_type = isl.dim_type + constraint_map = isl.Map.from_domain(constraint_set) + if src_position: + return constraint_map.move_dims( + dim_type.out, 0, dim_type.in_, src_position, mv_count) + else: + return constraint_map.move_dims( + dim_type.out, 0, dim_type.in_, mv_count, mv_count) + + +def create_legacy_dependency_constraint( + knl, + insn_id_before, + insn_id_after, + deps, + ): + """Create a statement dependency constraint represented as a map from + each statement instance to statement instances that must occur later, + i.e., ``{[s'=0, i', j'] -> [s=1, i, j] : condition on {i', j', i, j}}`` + indicates that statement ``0`` comes before statment ``1`` when the + specified condition on inames ``i',j',i,j`` is met. ``i'`` and ``j'`` + are the values of inames ``i`` and ``j`` in first statement instance. + + :arg knl: A :class:`loopy.kernel.LoopKernel` containing the + depender and dependee instructions. + + :arg insn_id_before: A :class:`str` specifying the :mod:`loopy` + instruction id for the dependee statement. + + :arg insn_id_after: A :class:`str` specifying the :mod:`loopy` + instruction id for the depender statement. + + :arg deps: A :class:`dict` mapping instances of :class:`LegacyDependencyType` + to the :mod:`loopy` kernel inames involved in that particular + dependency relationship. + + :returns: An :class:`islpy.Map` mapping each statement instance to all + statement instances that must occur later according to the constraints. + + """ + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + append_apostrophes, + add_dims_to_isl_set, + reorder_dims_by_name, + append_marker_to_isl_map_var_names, + sorted_union_of_names_in_isl_sets, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the dependency given to create the following constraint: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + # TODO we're now computing these doms multiple times + # could be more efficient... + dom_before = knl.get_inames_domain(knl.id_to_insn[insn_id_before].within_inames) + dom_after = knl.get_inames_domain(knl.id_to_insn[insn_id_after].within_inames) + dom_inames_ordered_before = sorted_union_of_names_in_isl_sets([dom_before]) + dom_inames_ordered_after = sorted_union_of_names_in_isl_sets([dom_after]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+dom_inames_ordered_before, + other_var_names=[STATEMENT_VAR_NAME]+dom_inames_ordered_after, + marker="'", + ) + statement_var_name_prime = STATEMENT_VAR_NAME+"'" + + # initialize constraints to False + # this will disappear as soon as we add a constraint + all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + + # for each (dep_type, inames) pair, create 'happens before' constraint, + # all_constraints_set will be the union of all these constraints + ldt = LegacyDependencyType + for dep_type, inames in deps.items(): + # need to put inames in a list so that order of inames and inames' + # matches when calling create_elementwise_comparison_conj... + if not isinstance(inames, list): + inames_list = list(inames) + else: + inames_list = inames[:] + inames_prime = append_apostrophes(inames_list) # e.g., [j', k'] + + if dep_type == ldt.SAME: + constraint_set = create_elementwise_comparison_conjunction_set( + inames_prime, inames_list, islvars, op="eq") + elif dep_type == ldt.PRIOR: + + priority_known = False + # if nesting info is provided: + if knl.loop_priority: + # assumes all loop_priority tuples are consistent + + # with multiple priority tuples, determine whether the combined + # info they contain can give us a single, full proiritization, + # e.g., if prios={(a, b), (b, c), (c, d, e)}, then we know + # a -> b -> c -> d -> e + + # remove irrelevant inames from priority tuples (because we're + # about to perform a costly operation on remaining tuples) + relevant_priorities = set() + for p_tuple in knl.loop_priority: + new_tuple = [iname for iname in p_tuple if iname in inames_list] + # empty tuples and single tuples don't help us define + # a nesting, so ignore them (if we're dealing with a single + # iname, priorities will be ignored later anyway) + if len(new_tuple) > 1: + relevant_priorities.add(tuple(new_tuple)) + + # create a mapping from each iname to inames that must be + # nested inside that iname + nested_inside = {} + for outside_iname in inames_list: + nested_inside_inames = set() + for p_tuple in relevant_priorities: + if outside_iname in p_tuple: + nested_inside_inames.update([ + inside_iname for inside_iname in + p_tuple[p_tuple.index(outside_iname)+1:]]) + nested_inside[outside_iname] = nested_inside_inames + + from loopy.schedule.checker.utils import ( + get_orderings_of_length_n) + # get all orderings that are explicitly allowed by priorities + orders = get_orderings_of_length_n( + nested_inside, + required_length=len(inames_list), + #return_first_found=True, + return_first_found=False, # slower; allows priorities test below + ) + + if orders: + # test for invalid priorities (includes cycles) + if len(orders) != 1: + raise ValueError( + "create_dependency_constriant encountered invalid " + "priorities %s" + % (knl.loop_priority)) + priority_known = True + priority_tuple = orders.pop() + + # if only one loop, we know the priority + if not priority_known and len(inames_list) == 1: + priority_tuple = tuple(inames_list) + priority_known = True + + if priority_known: + # PRIOR requires statement before complete previous iterations + # of loops before statement after completes current iteration + # according to loop nest order + inames_list_nest_ordered = [ + iname for iname in priority_tuple + if iname in inames_list] + inames_list_nest_ordered_prime = append_apostrophes( + inames_list_nest_ordered) + if set(inames_list_nest_ordered) != set(inames_list): + # TODO could this happen? + assert False + + from loopy.schedule.checker import ( + lexicographic_order_map as lom) + # TODO handle case where inames list is empty + constraint_set = lom.get_lex_order_set( + inames_list_nest_ordered_prime, + inames_list_nest_ordered, + islvars, + ) + else: # priority not known + # PRIOR requires upper left quadrant happen before: + constraint_set = create_elementwise_comparison_conjunction_set( + inames_prime, inames_list, islvars, op="lt") + + # get ints representing statements in pairwise schedule + s_before_int = 0 + s_after_int = 0 if insn_id_before == insn_id_after else 1 + + # set statement_var_name == statement # + constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+s_before_int) + constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+s_after_int) + + # union this constraint_set with all_constraints_set + all_constraints_set = all_constraints_set | constraint_set + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + all_constraints_set, + mv_count=len(dom_inames_ordered_after)+1, # +1 for statement var + src_position=len(dom_inames_ordered_before)+1, # +1 for statement var + ) + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + # (anything other than 0 risks being out of bounds) + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + dom_after, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + dom_before, isl.dim_type.set, marker="'") + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + # TODO nothing should be missing now, just reorder + assert set( + append_apostrophes([STATEMENT_VAR_NAME] + dom_inames_ordered_before) + ) == set(domain_to_intersect.get_var_names(isl.dim_type.out)) + assert set( + [STATEMENT_VAR_NAME] + dom_inames_ordered_after + ) == set(range_to_intersect.get_var_names(isl.dim_type.out)) + domain_to_intersect = reorder_dims_by_name( + domain_to_intersect, isl.dim_type.out, + append_apostrophes([STATEMENT_VAR_NAME] + dom_inames_ordered_before)) + range_to_intersect = reorder_dims_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + dom_inames_ordered_after) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +# TODO no longer used, move elsewhere +def _create_5pt_stencil_dependency_constraint( + dom_before_constraint_set, + dom_after_constraint_set, + sid_before, + sid_after, + space_iname, + time_iname, + all_dom_inames_ordered=None, # TODO eliminate need for this arg + ): + """ WIP: NO NEED TO REVIEW YET """ + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + append_apostrophes, + add_dims_to_isl_set, + reorder_dims_by_name, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the dependency given to create the following constraint: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + from loopy.schedule.checker.utils import ( + sorted_union_of_names_in_isl_sets, + ) + if all_dom_inames_ordered is None: + all_dom_inames_ordered = sorted_union_of_names_in_isl_sets( + [dom_before_constraint_set, dom_after_constraint_set]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + marker="'", + ) + statement_var_name_prime = STATEMENT_VAR_NAME+"'" + + # initialize constraints to False + # this will disappear as soon as we add a constraint + #all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + + space_iname_prime = space_iname + "'" + time_iname_prime = time_iname + "'" + one = islvars[0] + 1 + two = islvars[0] + 2 + # global: + """ + constraint_set = ( + islvars[time_iname_prime].gt_set(islvars[time_iname]) & + ( + (islvars[space_iname_prime]-two).lt_set(islvars[space_iname]) & + islvars[space_iname].lt_set(islvars[space_iname_prime]+two) + ) + | + islvars[time_iname_prime].gt_set(islvars[time_iname] + one) & + islvars[space_iname].eq_set(islvars[space_iname_prime]) + ) + """ + # local dep: + constraint_set = ( + islvars[time_iname].eq_set(islvars[time_iname_prime] + one) & ( + (islvars[space_iname]-two).lt_set(islvars[space_iname_prime]) & + islvars[space_iname_prime].lt_set(islvars[space_iname]+two)) + | + (islvars[time_iname].eq_set(islvars[time_iname_prime] + two) + & islvars[space_iname_prime].eq_set(islvars[space_iname])) + ) + + # set statement_var_name == statement # + constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+sid_before) + constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+sid_after) + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + constraint_set, len(all_dom_inames_ordered) + 1) # +1 for statement var + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + dom_after_constraint_set, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + dom_before_constraint_set, isl.dim_type.set, marker="'") + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + domain_to_intersect = reorder_dims_by_name( + domain_to_intersect, isl.dim_type.out, + append_apostrophes([STATEMENT_VAR_NAME] + all_dom_inames_ordered)) + range_to_intersect = reorder_dims_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + all_dom_inames_ordered) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +def create_arbitrary_dependency_constraint( + constraint_str, + dom_before_constraint_set, + dom_after_constraint_set, + sid_before, + sid_after, + all_dom_inames_ordered=None, # TODO eliminate need for this arg + ): + """ WIP: NO NEED TO REVIEW YET """ + + # TODO test after switching primes to before vars + + from loopy.schedule.checker.utils import ( + make_islvars_with_marker, + #append_apostrophes, + append_marker_to_strings, + add_dims_to_isl_set, + reorder_dims_by_name, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME + # This function uses the constraint given to create the following map: + # Statement [s,i,j] comes before statement [s',i',j'] iff + + from loopy.schedule.checker.utils import ( + sorted_union_of_names_in_isl_sets, + ) + if all_dom_inames_ordered is None: + all_dom_inames_ordered = sorted_union_of_names_in_isl_sets( + [dom_before_constraint_set, dom_after_constraint_set]) + + # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'} + islvars = make_islvars_with_marker( + var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered, + marker="p", + ) # TODO figure out before/after notation + #statement_var_name_prime = STATEMENT_VAR_NAME+"'" + statement_var_name_prime = STATEMENT_VAR_NAME+"p" + # TODO figure out before/after notation + + # initialize constraints to False + # this will disappear as soon as we add a constraint + all_constraints_set = islvars[0].eq_set(islvars[0] + 1) + space = all_constraints_set.space + from pymbolic import parse + from loopy.symbolic import aff_from_expr + + or_constraint_strs = constraint_str.split("or") + + def _quant(s): + return "(" + s + ")" + + def _diff(s0, s1): + return _quant(s0) + "-" + _quant(s1) + + for or_constraint_str in or_constraint_strs: + and_constraint_strs = or_constraint_str.split("and") + #conj_constraint = islvars[0].eq_set(islvars[0]) # init to true + conj_constraint = isl.BasicSet.universe(space) + for cons_str in and_constraint_strs: + if "<=" in cons_str: + lhs, rhs = cons_str.split("<=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(rhs, lhs))))) + # TODO something more robust than this string meddling^ + elif ">=" in cons_str: + lhs, rhs = cons_str.split(">=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs))))) + elif "<" in cons_str: + lhs, rhs = cons_str.split("<") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(rhs, lhs) + "- 1")))) + elif ">" in cons_str: + lhs, rhs = cons_str.split(">") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.inequality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs) + "- 1")))) + elif "=" in cons_str: + lhs, rhs = cons_str.split("=") + conj_constraint = conj_constraint.add_constraint( + isl.Constraint.equality_from_aff( + aff_from_expr(space, parse(_diff(lhs, rhs))))) + else: + 1/0 + all_constraints_set = all_constraints_set | conj_constraint + + # set statement_var_name == statement # + all_constraints_set = ( + all_constraints_set & islvars[statement_var_name_prime].eq_set( + islvars[0]+sid_before) + ) + all_constraints_set = ( + all_constraints_set & islvars[STATEMENT_VAR_NAME].eq_set( + islvars[0]+sid_after) + ) + + # convert constraint set to map + all_constraints_map = _convert_constraint_set_to_map( + all_constraints_set, len(all_dom_inames_ordered) + 1) # +1 for statement var + + # now apply domain sets to constraint variables + statement_var_idx = 0 # index of statement_var dimension in map + + # add statement variable to doms to enable intersection + range_to_intersect = add_dims_to_isl_set( + dom_after_constraint_set, isl.dim_type.out, + [STATEMENT_VAR_NAME], statement_var_idx) + domain_constraint_set = append_marker_to_isl_map_var_names( + dom_before_constraint_set, isl.dim_type.set, marker="p") + # TODO figure out before/after notation + domain_to_intersect = add_dims_to_isl_set( + domain_constraint_set, isl.dim_type.out, + [statement_var_name_prime], statement_var_idx) + + # insert inames missing from doms to enable intersection + domain_to_intersect = reorder_dims_by_name( + domain_to_intersect, isl.dim_type.out, + append_marker_to_strings( # TODO figure out before/after notation + [STATEMENT_VAR_NAME] + all_dom_inames_ordered, "p")) + range_to_intersect = reorder_dims_by_name( + range_to_intersect, + isl.dim_type.out, + [STATEMENT_VAR_NAME] + all_dom_inames_ordered) + + # intersect doms + map_with_loop_domain_constraints = all_constraints_map.intersect_domain( + domain_to_intersect).intersect_range(range_to_intersect) + + return map_with_loop_domain_constraints + + +def get_dependency_sources_and_sinks(knl, linearization_item_ids): + """Implicitly create a directed graph with the linearization items specified + by ``linearization_item_ids`` as nodes, and with edges representing a + 'happens before' relationship specfied by each legacy dependency between + two instructions. Return the sources and sinks within this graph. + + :arg linearization_item_ids: A :class:`list` of :class:`str` representing + loopy instruction ids. + + :returns: Two instances of :class:`set` of :class:`str` instruction ids + representing the sources and sinks in the dependency graph. + + """ + sources = set() + dependees = set() # all dependees (within linearization_item_ids) + for item_id in linearization_item_ids: + # find the deps within linearization_item_ids + deps = knl.id_to_insn[item_id].depends_on & linearization_item_ids + if deps: + # add deps to dependees + dependees.update(deps) + else: # has no deps (within linearization_item_ids), this is a source + sources.add(item_id) + + # sinks don't point to anyone + sinks = linearization_item_ids - dependees + + return sources, sinks + + +def filter_deps_by_intersection_with_SAME( + knl, + deps, + non_conc_inames, + ): + # TODO document + + ldt = LegacyDependencyType + + # determine which dep relations have a non-empty intersection with + # the SAME relation + deps_filtered = [] + for insn_id_before, insn_id_after, dep_constraint_map in deps: + + # create isl map representing "SAME" dep for these two insns + shared_nc_inames = ( + knl.id_to_insn[insn_id_before].within_inames & + knl.id_to_insn[insn_id_after].within_inames & + non_conc_inames) + + same_dep_constraint_map = create_legacy_dependency_constraint( + knl, + insn_id_before, + insn_id_after, + {ldt.SAME: shared_nc_inames}, + ) + + # see whether the intersection of dep map and SAME dep map exists + intersect_dep_and_same = same_dep_constraint_map & dep_constraint_map + intersect_not_empty = not bool(intersect_dep_and_same.is_empty()) + + if intersect_not_empty: + deps_filtered.append((insn_id_before, insn_id_after, dep_constraint_map)) + + return deps_filtered diff --git a/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py new file mode 100644 index 0000000000000000000000000000000000000000..3e4999cd4273ffaf5275c2c5ff4d169309de9471 --- /dev/null +++ b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py @@ -0,0 +1,356 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +""" WIP: NO NEED TO REVIEW YET """ +import loopy as lp +import numpy as np +from loopy.schedule.checker.utils import ( + create_graph_from_pairs, +) +from loopy.schedule.checker.dependency import ( + filter_deps_by_intersection_with_SAME, +) +from loopy import ( + preprocess_kernel, + get_one_linearized_kernel, +) + +# Choose kernel ---------------------------------------------------------- + +knl_choice = "example" +#knl_choice = "unused_inames" +#knl_choice = "matmul" +#knl_choice = "scan" +#knl_choice = "dependent_domain" +#knl_choice = "stroud_bernstein_orig" # TODO invalid sched? +#knl_choice = "ilp_kernel" +#knl_choice = "add_barrier" +#knl_choice = "nop" +#knl_choice = "nest_multi_dom" +#knl_choice = "loop_carried_deps" + +if knl_choice == "example": + knl = lp.make_kernel( + [ + "{[i,ii]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c} + end + end + for t + e[t] = f[t] {id=insn_d} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + #knl = lp.tag_inames(knl, {"i": "l.0"}) + #knl = lp.prioritize_loops(knl, "i,k,j") + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") +if knl_choice == "unused_inames": + knl = lp.make_kernel( + [ + "{[i,ii]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + end + end + """, + name="unused_inames", + assumptions="pi,pj,pk >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32}) + #knl = lp.tag_inames(knl, {"i": "l.0"}) + #knl = lp.prioritize_loops(knl, "i,k,j") + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") +elif knl_choice == "matmul": + bsize = 16 + knl = lp.make_kernel( + "{[i,k,j]: 0<=i {[i,j]: 0<=i {[i]: 0<=i xi = qpts[1, i2] + <> s = 1-xi + <> r = xi/s + <> aind = 0 {id=aind_init} + for alpha1 + <> w = s**(deg-alpha1) {id=init_w} + for alpha2 + tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ + {id=write_tmp,dep=init_w:aind_init} + w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ + {id=update_w,dep=init_w:write_tmp} + aind = aind + 1 \ + {id=aind_incr,dep=aind_init:write_tmp:update_w} + end + end + end + """, + [lp.GlobalArg("coeffs", None, shape=None), "..."], + name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1") + knl = lp.add_and_infer_dtypes(knl, + dict(coeffs=np.float32, qpts=np.int32)) + knl = lp.fix_parameters(knl, nqp1d=7, deg=4) + knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", + inner_tag="ilp", slabs=(0, 1)) + knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + # Must declare coeffs to have "no" shape, to keep loopy + # from trying to figure it out the shape automatically. +elif knl_choice == "ilp_kernel": + knl = lp.make_kernel( + "{[i,j,ilp_iname]: 0 <= i,j < n and 0 <= ilp_iname < 4}", + """ + for i + for j + for ilp_iname + tmp[i,j,ilp_iname] = 3.14 + end + end + end + """, + name="ilp_kernel", + assumptions="n>=1 and n mod 4 = 0", + ) + # TODO why is conditional on ilp_name? + knl = lp.tag_inames(knl, {"j": "l.0", "ilp_iname": "ilp"}) + #knl = lp.prioritize_loops(knl, "i_outer_outer,i_outer_inner,i_inner,a") +if knl_choice == "add_barrier": + np.random.seed(17) + #a = np.random.randn(16) + cnst = np.random.randn(16) + knl = lp.make_kernel( + "{[i, ii]: 0<=i, ii c_end = 2 + for c + ... nop + end + end + """, + "...", + seq_dependencies=True) + knl = lp.fix_parameters(knl, dim=3) +if knl_choice == "nest_multi_dom": + #"{[i,j,k]: 0<=i,j,kacc = 0 {id=insn0} + for j + for k + acc = acc + j + k {id=insn1,dep=insn0} + end + end + end + end + """, + name="nest_multi_dom", + #assumptions="n >= 1", + assumptions="ni,nj,nk,nx >= 1", + lang_version=(2018, 2) + ) + """ + <>foo = 0 {id=insn0} + for i + <>acc = 0 {id=insn1} + for j + for k + acc = acc + j + k {id=insn2,dep=insn1} + end + end + foo = foo + acc {id=insn3,dep=insn2} + end + <>bar = foo {id=insn4,dep=insn3} + """ + knl = lp.prioritize_loops(knl, "x,xx,i") + knl = lp.prioritize_loops(knl, "i,j") + knl = lp.prioritize_loops(knl, "j,k") + +if knl_choice == "loop_carried_deps": + knl = lp.make_kernel( + "{[i]: 0<=iacc0 = 0 {id=insn0} + for i + acc0 = acc0 + i {id=insn1,dep=insn0} + <>acc2 = acc0 + i {id=insn2,dep=insn1} + <>acc3 = acc2 + i {id=insn3,dep=insn2} + <>acc4 = acc0 + i {id=insn4,dep=insn1} + end + """, + name="loop_carried_deps", + assumptions="n >= 1", + lang_version=(2018, 2) + ) + +unprocessed_knl = knl.copy() + +deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + +# get a linearization to check +knl = preprocess_kernel(knl) +knl = get_one_linearized_kernel(knl) +print("kernel schedueld") +linearization_items = knl.linearization +print("checking validity") + +linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items, + ) + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + + +print("="*80) +print("testing dep sort") +print("="*80) + +# create dependency graph + +# for which deps does the intersection with the SAME dependency relation exist? +# create a graph including these deps as edges (from after->before) + +from loopy.schedule.checker.utils import ( + partition_inames_by_concurrency, +) +_, non_conc_inames = partition_inames_by_concurrency(knl) +legacy_deps_filtered_by_same = filter_deps_by_intersection_with_SAME( + knl, + deps, + non_conc_inames, + ) + +# get dep graph edges +dep_graph_pairs = [ + (insn_id_before, insn_id_after) + for insn_id_before, insn_id_after, _ in legacy_deps_filtered_by_same] + +# create dep graph from edges +dep_graph = create_graph_from_pairs(dep_graph_pairs) + +print("dep_graph:") +for k, v in dep_graph.items(): + print("%s: %s" % (k, v)) diff --git a/loopy/schedule/checker/experimental_scripts/example_wave_equation.py b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py new file mode 100644 index 0000000000000000000000000000000000000000..8e168a5efcee6201e139a1a8f34e098c8c190088 --- /dev/null +++ b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py @@ -0,0 +1,623 @@ +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +""" WIP: NO NEED TO REVIEW YET """ +import loopy as lp +from loopy import generate_code_v2 +from loopy import get_one_linearized_kernel +from loopy import preprocess_kernel +import numpy as np +import islpy as isl +#from loopy.kernel_stat_collector import KernelStatCollector +#from loopy.kernel_stat_collector import KernelStatOptions as kso # noqa +from loopy.schedule.checker.utils import ( + prettier_map_string, + ensure_dim_names_match_and_align, + append_marker_to_isl_map_var_names, + partition_inames_by_concurrency, +) +from loopy.schedule.checker.dependency import ( + create_arbitrary_dependency_constraint, +) +from loopy.schedule.checker.schedule import PairwiseScheduleBuilder +from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, +) + +# Make kernel ---------------------------------------------------------- + +# u[x,t+1] = 2*u[x,t] - u[x,t-1] + c*(dt/dx)**2*(u[x+1,t] - 2*u[x,t] + u[x-1,t]) +# mine, works: +# "{[x,t]: 1<=x {[ix, it]: 1<=ix {[ix, it]: 1<=ix lex time):") + #print(isl_sched_map.space) + #print("-"*80) + +# }}} + +# get map representing lexicographic ordering +sched_lex_order_map = sched.get_lex_order_map_for_sched_space() + +# {{{ verbose + +""" +if verbose: + print("sched lex order map:") + print(prettier_map_string(sched_lex_order_map)) + print("space (lex time -> lex time):") + print(sched_lex_order_map.space) + print("-"*80) +""" + +# }}} + +# create statement instance ordering, +# maps each statement instance to all statement instances occuring later +sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + before_marker="p") + +# {{{ verbose + +if verbose: + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("-"*80) + +if verbose: + print("constraint map space (before aligning):") + print(constraint_map.space) + +# }}} + +# align constraint map spaces to match sio so we can compare them +aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio) + +# {{{ verbose + +if verbose: + print("constraint map space (after aligning):") + print(aligned_constraint_map.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + +# }}} + +assert aligned_constraint_map.space == sio.space +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + +linearization_is_valid = aligned_constraint_map.is_subset(sio) + +if not linearization_is_valid: + + # {{{ verbose + + if verbose: + print("================ constraint check failure =================") + print("constraint map not subset of SIO") + print("dependency:") + print(prettier_map_string(constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(aligned_constraint_map.gist(sio)) + print("sio.gist(constraint_map)") + print(sio.gist(aligned_constraint_map)) + print("loop priority known:") + print(preprocessed_knl.loop_priority) + """ + from loopy.schedule.checker.utils import ( + partition_inames_by_concurrency, + ) + conc_inames, non_conc_inames = partition_inames_by_concurrency(linearized_knl) + print("concurrent inames:", conc_inames) + print("sequential inames:", non_conc_inames) + print("constraint map space (stmt instances -> stmt instances):") + print(aligned_constraint_map.space) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("{insn id -> sched sid int} dict:") + print(lp_insn_id_to_lex_sched_id) + """ + print("===========================================================") + + # }}} + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + + +# ====================================================================== +# now do this with complicated mapping + + +# create mapping: +# old (wrong) +""" +m = isl.BasicMap( + "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: " + "16*(tx - tt + tparity) + itx - itt = ix - it and " + "16*(tx + tt) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2 = isl.BasicMap( + "[nx,nt,unused] -> {[statement, ix, it] -> " + "[statement'=statement, tx, tt, tparity, itt, itx]: " + "16*(tx - tt + tparity) + itx - itt = ix - it and " + "16*(tx + tt) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2_prime = isl.BasicMap( + "[nx,nt,unused] -> {[statement, ix, it] -> " + "[statement'=statement, tx', tt', tparity', itt', itx']: " + "16*(tx' - tt' + tparity') + itx' - itt' = ix - it and " + "16*(tx' + tt') + itt' + itx' = ix + it and " + "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}") +""" + +# new +# TODO remove "unused" +m = isl.BasicMap( + "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix - it and " + "16*(tx + tt + tparity) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +m2 = isl.BasicMap( + "[nx,nt,unused] -> {[_lp_linchk_stmt, ix, it] -> " + "[_lp_linchk_stmt'=_lp_linchk_stmt, tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix - it and " + "16*(tx + tt + tparity) + itt + itx = ix + it and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") +#m2_primes_after = isl.BasicMap( +# "[nx,nt,unused] -> {[statement, ix, it] -> " +# "[statement'=statement, tx', tt', tparity', itt', itx']: " +# "16*(tx' - tt') + itx' - itt' = ix - it and " +# "16*(tx' + tt' + tparity') + itt' + itx' = ix + it and " +# "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}") +m2_prime = isl.BasicMap( + "[nx,nt,unused] -> {[_lp_linchk_stmt', ix', it'] -> " + "[_lp_linchk_stmt=_lp_linchk_stmt', tx, tt, tparity, itt, itx]: " + "16*(tx - tt) + itx - itt = ix' - it' and " + "16*(tx + tt + tparity) + itt + itx = ix' + it' and " + "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}") + +# TODO note order must match statement_iname_premap_order + +print("maping:") +print(prettier_map_string(m2)) + +# new kernel +knl = lp.map_domain(ref_knl, m) +knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx") +print("code after mapping:") +print(generate_code_v2(knl).device_code()) +#1/0 + +print("constraint_map before apply_range:") +print(prettier_map_string(constraint_map)) +#mapped_constraint_map = constraint_map.apply_range(m2_prime) +mapped_constraint_map = constraint_map.apply_range(m2) +print("constraint_map after apply_range:") +print(prettier_map_string(mapped_constraint_map)) +#mapped_constraint_map = mapped_constraint_map.apply_domain(m2) +mapped_constraint_map = mapped_constraint_map.apply_domain(m2_prime) +# put primes on *before* names +mapped_constraint_map = append_marker_to_isl_map_var_names( + mapped_constraint_map, isl.dim_type.in_, marker="'") + +print("constraint_map after apply_domain:") +print(prettier_map_string(mapped_constraint_map)) + +statement_inames_mapped = set(["itx", "itt", "tt", "tparity", "tx"]) +sid_before = 0 +sid_after = 0 + +preprocessed_knl = preprocess_kernel(knl) +inames_domain_before_mapped = preprocessed_knl.get_inames_domain( + statement_inames_mapped) +inames_domain_after_mapped = preprocessed_knl.get_inames_domain( + statement_inames_mapped) +print("(mapped) inames_domain_before:", inames_domain_before_mapped) +print("(mapped) inames_domain_after:", inames_domain_after_mapped) + +# ============================================= + +verbose = False +verbose = True + +# get a linearization to check +if preprocessed_knl.linearization is None: + linearized_knl = get_one_linearized_kernel(preprocessed_knl) +else: + linearized_knl = preprocessed_knl + +# {{{ verbose + +if verbose: + # Print kernel info ------------------------------------------------------ + print("="*80) + print("Kernel:") + print(linearized_knl) + #print(generate_code_v2(linearized_knl).device_code()) + print("="*80) + print("Iname tags: %s" % (linearized_knl.iname_to_tags)) + print("="*80) + print("Loopy linearization:") + for linearization_item in linearized_knl.linearization: + print(linearization_item) + + print("="*80) + print("inames_domain_before_mapped:", inames_domain_before_mapped) + print("inames_domain_after_mapped:", inames_domain_after_mapped) + +# }}} + + +conc_loop_inames, _ = partition_inames_by_concurrency(linearized_knl) +# Create a mapping of {statement instance: lex point} +# including only instructions involved in this dependency +sched = PairwiseScheduleBuilder( + linearized_knl.linearization, + str(sid_before), + str(sid_after), + loops_to_ignore=conc_loop_inames, + ) + +# Get an isl map representing the PairwiseScheduleBuilder; +# this requires the iname domains + +# get a mapping from lex schedule id to relevant inames domain +sid_to_dom = { + sid_before: inames_domain_before_mapped, + sid_after: inames_domain_after_mapped, + } + +isl_sched_map_before, isl_sched_map_after = sched.build_maps(linearized_knl) + +# {{{ verbose + +if verbose: + print("sid_to_dom:\n", sid_to_dom) + print("PairwiseScheduleBuilder after creating isl map:") + print(sched) + print("LexSched:") + print(prettier_map_string(isl_sched_map_before)) + print(prettier_map_string(isl_sched_map_after)) + #print("space (statement instances -> lex time):") + #print(isl_sched_map.space) + #print("-"*80) + +# }}} + +# get map representing lexicographic ordering +sched_lex_order_map = sched.get_lex_order_map_for_sched_space() + +# {{{ verbose + +""" +if verbose: + print("sched lex order map:") + print(prettier_map_string(sched_lex_order_map)) + print("space (lex time -> lex time):") + print(sched_lex_order_map.space) + print("-"*80) +""" + +# }}} + +# create statement instance ordering, +# maps each statement instance to all statement instances occuring later +sio = get_statement_ordering_map( + isl_sched_map_before, + isl_sched_map_after, + sched_lex_order_map, + before_marker="'") + +# {{{ verbose + +if verbose: + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("-"*80) + +if verbose: + print("constraint map space (before aligning):") + print(constraint_map.space) + +# }}} + +# align constraint map spaces to match sio so we can compare them +aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio) + +# {{{ verbose + +if verbose: + print("constraint map space (after aligning):") + print(aligned_constraint_map.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + +# }}} + +assert aligned_constraint_map.space == sio.space +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.in_) + == sio.space.get_var_names(isl.dim_type.in_)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.out) + == sio.space.get_var_names(isl.dim_type.out)) +assert ( + aligned_constraint_map.space.get_var_names(isl.dim_type.param) + == sio.space.get_var_names(isl.dim_type.param)) + +linearization_is_valid = aligned_constraint_map.is_subset(sio) + +if not linearization_is_valid: + + # {{{ verbose + + if verbose: + print("================ constraint check failure =================") + print("constraint map not subset of SIO") + print("dependency:") + print(prettier_map_string(constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("constraint_map.gist(sio):") + print(aligned_constraint_map.gist(sio)) + print("sio.gist(constraint_map)") + print(sio.gist(aligned_constraint_map)) + print("loop priority known:") + print(preprocessed_knl.loop_priority) + """ + from loopy.schedule.checker.utils import ( + partition_inames_by_concurrency, + ) + conc_inames, non_conc_inames = partition_inames_by_concurrency(linearized_knl) + print("concurrent inames:", conc_inames) + print("sequential inames:", non_conc_inames) + print("constraint map space (stmt instances -> stmt instances):") + print(aligned_constraint_map.space) + print("SIO space (statement instances -> statement instances):") + print(sio.space) + print("constraint map:") + print(prettier_map_string(aligned_constraint_map)) + print("statement instance ordering:") + print(prettier_map_string(sio)) + print("{insn id -> sched sid int} dict:") + print(lp_insn_id_to_lex_sched_id) + """ + print("===========================================================") + + # }}} + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) + +''' +# (U_n^{k+1}-U_n^k)/dt = C*(U_{n+1}^k-U_n^k)/dx +# U_n^{k+1} = U_n^k + dt/dx*C*(U_{n+1}^k-U_n^k) +''' + +# Get stats ---------------------------------------------------------- + +""" +sc = KernelStatCollector( + evaluate_polys=False, + count_madds=False, # TODO enable after madd counting branch is merged + ) +#nx = 2**11 +#nt = 2**11 +nx = 2**5 +nt = 2**5 +param_dict = {"nx": nx, "nt": nt, "c": 1, "dt": 0.1, "dx": 0.1} +stat_list = [kso.WALL_TIME, kso.OP_MAP, kso.FLOP_RATE] +stats = sc.collect_stats(knl, stat_list, param_dict=param_dict) + +# Measured time + flop rate +time_measured = stats[kso.WALL_TIME] +#flop_rate_measured = stats[kso.FLOP_RATE] + +print("time:", time_measured) +""" + +""" +linearization_is_valid = lp.check_linearization_validity(knl, verbose=True) + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) +""" + +""" +linearization_is_valid = lp.check_linearization_validity(knl, verbose=True) + +print("is linearization valid? constraint map subset of SIO?") +print(linearization_is_valid) +""" diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py new file mode 100644 index 0000000000000000000000000000000000000000..d9066030fbe499508d568ab561739fa8c31e07e5 --- /dev/null +++ b/loopy/schedule/checker/lexicographic_order_map.py @@ -0,0 +1,198 @@ +# coding: utf-8 +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +def get_statement_ordering_map( + sched_before, sched_after, lex_map, before_marker="'"): + """Return a statement ordering represented as a map from each statement + instance to all statement instances occurring later. + + :arg sched_before: An :class:`islpy.Map` representing a schedule + as a mapping from statement instances (for one particular statement) + to lexicographic time. The statement represented will typically + be the dependee in a dependency relationship. + + :arg sched_after: An :class:`islpy.Map` representing a schedule + as a mapping from statement instances (for one particular statement) + to lexicographic time. The statement represented will typically + be the depender in a dependency relationship. + + :arg lex_map: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. E.g.:: + + {[i0', i1', i2', ...] -> [i0, i1, i2, ...] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2) ...} + + :arg before_marker: A :class:`str` to be appended to the names of the + map dimensions representing the 'before' statement in the + 'happens before' relationship. + + :returns: An :class:`islpy.Map` representing the statement odering as + a mapping from each statement instance to all statement instances + occurring later. I.e., we compose relations B, L, and A as + B ∘ L ∘ A^-1, where B is `sched_before`, A is `sched_after`, + and L is `lex_map`. + + """ + + # Perform the composition of relations + sio = sched_before.apply_range( + lex_map).apply_range(sched_after.reverse()) + + # Append marker to in_ dims + from loopy.schedule.checker.utils import ( + append_marker_to_isl_map_var_names, + ) + return append_marker_to_isl_map_var_names( + sio, isl.dim_type.in_, before_marker) + + +def get_lex_order_set(before_names, after_names, islvars=None): + """Return an :class:`islpy.Set` representing a lexicographic ordering + with the number of dimensions provided in `before_names` + (equal to the number of dimensions in `after_names`). + + :arg before_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs before another point, which will be represented using + `after_names`. (see example below) + + :arg after_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs after another point, which will be represented using + `before_names`. (see example below) + + :arg islvars: A dictionary mapping variable names in `before_names` and + `after_names` to :class:`islpy.PwAff` instances that represent each + of the variables (islvars may be produced by `islpy.make_zero_and_vars`). + The key '0' is also include and represents a :class:`islpy.PwAff` zero + constant. This dictionary defines the space to be used for the set. If no + value is passed, the dictionary will be made using `before_names` + and `after_names`. + + :returns: An :class:`islpy.Set` representing a big-endian lexicographic ordering + with the number of dimensions provided in `before_names`. The set + has one dimension for each name in *both* `before_names` and + `after_names`, and contains all points which meet a 'happens before' + constraint defining the lexicographic ordering. E.g., if + `before_names = [i0', i1', i2']` and `after_names = [i0, i1, i2]`, + return the set containing all points in a 3-dimensional, big-endian + lexicographic ordering such that point + `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return:: + + {[i0', i1', i2', i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + # If no islvars passed, make them using the names provided + if islvars is None: + islvars = isl.make_zero_and_vars(before_names+after_names, []) + + # Initialize set with constraint i0' < i0 + lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]]) + + # For each dim d, starting with d=1, equality_conj_set will be constrained + # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)). + equality_conj_set = islvars[0].eq_set(islvars[0]) # initialize to 'true' + + for i in range(1, len(before_names)): + + # Add the next equality constraint to equality_conj_set + equality_conj_set = equality_conj_set & \ + islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]]) + + # Create a set constrained by adding a less-than constraint for this dim, + # e.g., (i1' < i1), to the current equality conjunction set. + # For each dim d, starting with d=1, this full conjunction will have + # d equalities and one inequality, e.g., + # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id) + full_conj_set = islvars[before_names[i]].lt_set( + islvars[after_names[i]]) & equality_conj_set + + # Union this new constraint with the current lex_order_set + lex_order_set = lex_order_set | full_conj_set + + return lex_order_set + + +def create_lex_order_map( + n_dims=None, + before_names=None, + after_names=None, + ): + """Return a map from each point in a lexicographic ordering to every + point that occurs later in the lexicographic ordering. + + :arg n_dims: An :class:`int` representing the number of dimensions + in the lexicographic ordering. If not provided, `n_dims` will be + set to length of `after_names`. + + :arg before_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs before another point, which will be represented using + `after_names`. (see example below) + + :arg after_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs after another point, which will be represented using + `before_names`. (see example below) + + :returns: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. + E.g., if `before_names = [i0', i1', i2']` and + `after_names = [i0, i1, i2]`, return the map:: + + {[i0', i1', i2'] -> [i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + if after_names is None: + after_names = ["i%s" % (i) for i in range(n_dims)] + if before_names is None: + from loopy.schedule.checker.utils import ( + append_marker_to_strings, + ) + before_names = append_marker_to_strings(after_names, marker="'") + if n_dims is None: + n_dims = len(after_names) + + assert len(before_names) == len(after_names) == n_dims + dim_type = isl.dim_type + + # First, get a set representing the lexicographic ordering. + lex_order_set = get_lex_order_set(before_names, after_names) + + # Now convert that set to a map. + lex_map = isl.Map.from_domain(lex_order_set) + return lex_map.move_dims( + dim_type.out, 0, dim_type.in_, + len(before_names), len(after_names)) diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py index bc71df5d8f9658c788141e17a6eaf948cf5aa635..a947da3ac029b8b49868d65472d8eb893d79a946 100644 --- a/loopy/schedule/checker/schedule.py +++ b/loopy/schedule/checker/schedule.py @@ -313,3 +313,27 @@ def generate_pairwise_schedules( pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps) return pairwise_schedules + + +def get_lex_order_map_for_sched_space(schedule): + """Return an :class:`islpy.BasicMap` that maps each point in a + lexicographic ordering to every point that occurs later. + + :arg schedule: A :class:`islpy.Map` representing the ordering of + statement instances as a mapping from statement instances to + lexicographic time. + + :returns: An :class:`islpy.BasicMap` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time, with + the dimension count and names matching the output dimension + of `schedule`. + + """ + + from loopy.schedule.checker.lexicographic_order_map import ( + create_lex_order_map, + ) + + lex_dim_names = schedule.space.get_var_names(isl.dim_type.out) + return create_lex_order_map(after_names=lex_dim_names) diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index 8e2a82a016202f054bb808887caa27c2f363842b..805fddd1132ac86535cfdc2b60ffbdc62dd40fc1 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -87,6 +87,73 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map): return isl.align_spaces(obj_map, tgt_map) +def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"): + """Return an :class:`islpy.Map` with a marker appended to the specified + dimension names. + + :arg old_isl_map: An :class:`islpy.Map`. + + :arg dim_type: An :class:`islpy.dim_type`, i.e., an :class:`int`, + specifying the dimension to be marked. + + :arg marker: A :class:`str` to be appended to the specified dimension + names. If not provided, `marker` defaults to an apostrophe. + + :returns: An :class:`islpy.Map` matching `old_isl_map` with + `marker` appended to the `dim_type` dimension names. + + """ + + new_map = old_isl_map.copy() + for i in range(len(old_isl_map.get_var_names(dim_type))): + new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name( + dim_type, i)+marker) + return new_map + + +def make_islvars_with_marker( + var_names_needing_marker, other_var_names, param_names=[], marker="'"): + """Return a dictionary from variable and parameter names + to :class:`islpy.PwAff` instances that represent each of + the variables and parameters, appending marker to + var_names_needing_marker. + + :arg var_names_needing_marker: A :class:`list` of :class:`str` + elements representing variable names to have markers appended. + + :arg other_var_names: A :class:`list` of :class:`str` + elements representing variable names to be included as-is. + + :arg param_names: A :class:`list` of :class:`str` elements + representing parameter names. + + :returns: A dictionary from variable names to :class:`islpy.PwAff` + instances that represent each of the variables + (islvars may be produced by `islpy.make_zero_and_vars`). The key + '0' is also include and represents a :class:`islpy.PwAff` zero constant. + + """ + + def append_marker(items, mark): + new_items = [] + for item in items: + new_items.append(item+mark) + return new_items + + return isl.make_zero_and_vars( + append_marker(var_names_needing_marker, marker) + + other_var_names, param_names) + + +def append_marker_to_strings(strings, marker="'"): + assert isinstance(strings, list) + return [s+marker for s in strings] + + +def append_apostrophes(strings): + return append_marker_to_strings(strings, marker="'") + + def sorted_union_of_names_in_isl_sets( isl_sets, set_dim=isl.dim_type.set): @@ -126,6 +193,7 @@ def create_symbolic_map_from_tuples( on these values. """ + # TODO clarify this with more comments # TODO allow None for domains dim_type = isl.dim_type @@ -213,6 +281,166 @@ def get_insn_id_from_linearization_item(linearization_item): return linearization_item.insn_id +# TODO for better performance, could combine these funcs so we don't +# loop over linearization more than once +def get_all_nonconcurrent_insn_iname_subsets( + knl, exclude_empty=False, non_conc_inames=None): + """Return a :class:`set` of every unique subset of non-concurrent + inames used in an instruction in a :class:`loopy.LoopKernel`. + + :arg knl: A :class:`loopy.LoopKernel`. + + :arg exclude_empty: A :class:`bool` specifying whether to + exclude the empty set. + + :arg non_conc_inames: A :class:`set` of non-concurrent inames + which may be provided if already known. + + :returns: A :class:`set` of every unique subset of non-concurrent + inames used in any instruction in a :class:`loopy.LoopKernel`. + + """ + + if non_conc_inames is None: + _, non_conc_inames = partition_inames_by_concurrency(knl) + + iname_subsets = set() + for insn in knl.instructions: + iname_subsets.add(insn.within_inames & non_conc_inames) + + if exclude_empty: + iname_subsets.discard(frozenset()) + + return iname_subsets + + +def get_linearization_item_ids_within_inames(knl, inames): + linearization_item_ids = set() + for insn in knl.instructions: + if inames.issubset(insn.within_inames): + linearization_item_ids.add(insn.id) + return linearization_item_ids + + +# TODO use yield to clean this up +# TODO use topological sort from loopy, then find longest path in dag +def _generate_orderings_starting_w_prefix( + allowed_after_dict, orderings, required_length=None, + start_prefix=(), return_first_found=False): + # alowed_after_dict = {str: set(str)} + # start prefix = tuple(str) + # orderings = set + if start_prefix: + next_items = allowed_after_dict[start_prefix[-1]]-set(start_prefix) + else: + next_items = allowed_after_dict.keys() + + if required_length: + if len(start_prefix) == required_length: + orderings.add(start_prefix) + if return_first_found: + return + else: + orderings.add(start_prefix) + if return_first_found: + return + + # return if no more items left + if not next_items: + return + + for next_item in next_items: + new_prefix = start_prefix + (next_item,) + _generate_orderings_starting_w_prefix( + allowed_after_dict, + orderings, + required_length=required_length, + start_prefix=new_prefix, + return_first_found=return_first_found, + ) + if return_first_found and orderings: + return + return + + +def get_orderings_of_length_n( + allowed_after_dict, required_length, return_first_found=False): + """Return all orderings found in tree represented by `allowed_after_dict`. + + :arg allowed_after_dict: A :class:`dict` mapping each :class:`string` + names to a :class:`set` of names that are allowed to come after + that name. + + :arg required_length: A :class:`int` representing the length required + for all orderings. Orderings not matching the required length will + not be returned. + + :arg return_first_found: A :class:`bool` specifying whether to return + the first valid ordering found. + + :returns: A :class:`set` of all orderings that are *explicitly* allowed + by the tree represented by `allowed_after_dict`. I.e., if we know + a->b and c->b, we don't know enough to return a->c->b. Note that + if the set for a dict key is empty, nothing is allowed to come after. + + """ + + orderings = set() + _generate_orderings_starting_w_prefix( + allowed_after_dict, + orderings, + required_length=required_length, + start_prefix=(), + return_first_found=return_first_found, + ) + return orderings + + +def create_graph_from_pairs(before_after_pairs): + # create key for every before + graph = dict([(before, set()) for before, _ in before_after_pairs]) + for before, after in before_after_pairs: + graph[before] = graph[before] | set([after, ]) + return graph + + +# only used for example purposes: +# TODO: make sure we don't need this, then remove + +def create_explicit_map_from_tuples(tuple_pairs, space): + """Return a :class:`islpy.Map` in :class:`islpy.Space` space + mapping tup_in->tup_out for each `(tup_in, tup_out)` pair + in `tuple_pairs`, where `tup_in` and `tup_out` are + tuples of :class:`int` values to be assigned to the + corresponding dimension variables in `space`. + + """ + + dim_type = isl.dim_type + individual_maps = [] + + for tup_in, tup_out in tuple_pairs: + constraints = [] + for i, val_in in enumerate(tup_in): + constraints.append( + isl.Constraint.equality_alloc(space) + .set_coefficient_val(dim_type.in_, i, 1) + .set_constant_val(-1*val_in)) + for i, val_out in enumerate(tup_out): + constraints.append( + isl.Constraint.equality_alloc(space) + .set_coefficient_val(dim_type.out, i, 1) + .set_constant_val(-1*val_out)) + individual_maps.append( + isl.Map.universe(space).add_constraints(constraints)) + + union_map = individual_maps[0] + for m in individual_maps[1:]: + union_map = union_map.union(m) + + return union_map + + def get_EnterLoop_inames(linearization_items): from loopy.schedule import EnterLoop diff --git a/loopy/statistics.py b/loopy/statistics.py index eda750120bc8456e9090304cbd2905a02ff2358e..a0a0f9c7ed7c62e4ec8f6ca517809696abfd2a8d 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1239,7 +1239,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import LocalIndexTag, GroupIndexTag - for iname in knl.insn_inames(insn): + for iname in insn.within_inames: tags = knl.iname_tags_of_type(iname, (LocalIndexTag, GroupIndexTag), max_num=1) if tags: @@ -1273,7 +1273,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False): - insn_inames = knl.insn_inames(insn) + insn_inames = insn.within_inames if disregard_local_axes: from loopy.kernel.data import LocalIndexTag diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 7e5de3164de761c01f41981a850bb14f6895c95d..77f8228b66a9af0e2cb500bb7d012887e9c94fcc 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1002,20 +1002,77 @@ class RuleAwareIdentityMapper(IdentityMapper): lambda expr: self(expr, kernel, insn))) for insn in kernel.instructions] - return kernel.copy(instructions=new_insns) + from functools import partial + + non_insn_self = partial(self, kernel=kernel, insn=None) + + from loopy.kernel.array import ArrayBase + + # {{{ args + + new_args = [ + arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg + for arg in kernel.args] + + # }}} + + # {{{ tvs + + new_tvs = { + tv_name: tv.map_exprs(non_insn_self) + for tv_name, tv in kernel.temporary_variables.items()} + + # }}} + + # domains, var names: not exprs => do not map + + return kernel.copy(instructions=new_insns, + args=new_args, + temporary_variables=new_tvs) class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): + """ + Mapper to substitute expressions and record any divergence of substitution + rule expressions of :class:`loopy.LoopKernel`. + + .. attribute:: rule_mapping_context + + An instance of :class:`SubstitutionRuleMappingContext` to record + divergence of substitution rules. + + .. attribute:: within + + An instance of :class:`loopy.match.StackMatchComponent`. + :class:`RuleAwareSubstitutionMapper` would perform + substitutions in the expression if the stack match is ``True`` or + if the expression does not arise from an :class:`~loopy.InstructionBase`. + + .. note:: + + The mapped kernel should be passed through + :meth:`SubstitutionRuleMappingContext.finish_kernel` to perform any + renaming mandated by the rule expression divergences. + """ def __init__(self, rule_mapping_context, subst_func, within): super().__init__(rule_mapping_context) self.subst_func = subst_func - self.within = within + self._within = within + + def within(self, kernel, instruction, stack): + if instruction is None: + # always perform substitutions on expressions not coming from + # instructions. + return True + else: + return self._within(kernel, instruction, stack) def map_variable(self, expr, expn_state): if (expr.name in expn_state.arg_context or not self.within( expn_state.kernel, expn_state.instruction, expn_state.stack)): + # expr not in within => do nothing (call IdentityMapper) return super().map_variable( expr, expn_state) @@ -1525,7 +1582,13 @@ def qpolynomial_from_expr(space, expr): def simplify_using_aff(kernel, expr): inames = get_dependencies(expr) & kernel.all_inames() - domain = kernel.get_inames_domain(inames) + # FIXME: Ideally, we should find out what inames are usable and allow + # the simplification to use all of those. For now, fall back to making + # sure that the simplification only uses inames that were already there. + domain = ( + kernel + .get_inames_domain(inames) + .project_out_except(inames, [dim_type.set])) try: aff = guarded_aff_from_expr(domain.space, expr) @@ -1679,6 +1742,25 @@ def isl_set_from_expr(space, expr): return set_ + +def condition_to_set(space, expr): + """ + Returns an instance of :class:`islpy.Set` if *expr* can be expressed as an + ISL-set on *space*, if not then returns *None*. + """ + from loopy.symbolic import get_dependencies + if get_dependencies(expr) <= frozenset( + space.get_var_dict()): + try: + from loopy.symbolic import isl_set_from_expr + return isl_set_from_expr(space, expr) + except ExpressionToAffineConversionError: + # non-affine condition: can't do much + return None + else: + # data-dependent condition: can't do much + return None + # }}} @@ -2036,7 +2118,7 @@ class AccessRangeOverlapChecker: arm = BatchedAccessRangeMapper(self.kernel, self.vars, overestimate=True) for expr in exprs: - arm(expr, self.kernel.insn_inames(insn)) + arm(expr, insn.within_inames) for name, arange in arm.access_ranges.items(): if arm.bad_subscripts[name]: diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 6bad214ec4e10a91e36b3566f454eabab00dde26..8af47c41222416fbd2dbe3dc5a88d4090a4a06f0 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -39,6 +39,14 @@ __doc__ = """ .. autoclass:: NumbaTarget .. autoclass:: NumbaCudaTarget +References to Canonical Names +----------------------------- + +.. currentmodule:: loopy.target + +.. class:: TargetBase + + See :class:`loopy.TargetBase`. """ diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 3234da45d469e2cc71de4733f3186aea8a93b065..d1e474c2054a15688f00b2bd5f5c6d9e6e9975df 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -34,6 +34,9 @@ from loopy.symbolic import IdentityMapper from loopy.types import NumpyType import pymbolic.primitives as p +from loopy.tools import remove_common_indentation +import re + from pytools import memoize_method __doc__ = """ @@ -172,6 +175,46 @@ def _preamble_generator(preamble_info): yield ("04_%s" % func_name, func_body) yield undef_integer_types_macro + for func in preamble_info.seen_functions: + if func.name == "int_pow": + base_ctype = preamble_info.kernel.target.dtype_to_typename( + func.arg_dtypes[0]) + exp_ctype = preamble_info.kernel.target.dtype_to_typename( + func.arg_dtypes[1]) + res_ctype = preamble_info.kernel.target.dtype_to_typename( + func.result_dtypes[0]) + + if func.arg_dtypes[1].numpy_dtype.kind == "u": + signed_exponent_preamble = "" + else: + signed_exponent_preamble = "\n" + remove_common_indentation( + """ + if (n < 0) { + x = 1.0/x; + n = -n; + }""") + + yield(f"07_{func.c_name}", f""" + inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{ + if (n == 0) + return 1; + {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)} + + {res_ctype} y = 1; + + while (n > 1) {{ + if (n % 2) {{ + y = x * y; + x = x * x; + }} + else + x = x * x; + n = n / 2; + }} + + return x*y; + }}""") + # }}} @@ -447,14 +490,14 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): arg_dtypes=arg_dtypes) # binary functions - if (name in ["fmax", "fmin", "copysign"] + if (name in ["fmax", "fmin", "copysign", "pow"] and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": - raise LoopyTypeError("%s does not support complex numbers") + raise LoopyTypeError(f"{name} does not support complex numbers") elif dtype.kind == "f": if modify_name: @@ -942,7 +985,8 @@ class CFamilyASTBuilder(ASTBuilderBase): codegen_state.seen_functions.add( SeenFunction(func_id, mangle_result.target_name, - mangle_result.arg_dtypes)) + mangle_result.arg_dtypes, + mangle_result.result_dtypes)) from pymbolic import var for i, (a, tgt_dtype) in enumerate( diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 74f1ead8bcb3240f2cf3775048f7cc809e367a1f..9ec99c784f5955232038644de6ee06dd6466237a 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -325,7 +325,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.codegen_state.seen_functions.add( SeenFunction( name, f"{name}_{suffix}", - (result_dtype, result_dtype))) + (result_dtype, result_dtype), + (result_dtype,))) if den_nonneg: if num_nonneg: @@ -538,7 +539,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, - mangle_result.arg_dtypes or par_dtypes)) + mangle_result.arg_dtypes or par_dtypes, + mangle_result.result_dtypes)) return var(mangle_result.target_name)(*processed_parameters) @@ -701,6 +703,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.rec(expr.denominator, type_context, tgt_dtype)) def map_power(self, expr, type_context): + tgt_dtype = self.infer_type(expr) + base_dtype = self.infer_type(expr.base) + exponent_dtype = self.infer_type(expr.exponent) + def base_impl(expr, type_context): from pymbolic.primitives import is_constant, is_zero if is_constant(expr.exponent): @@ -711,14 +717,24 @@ class ExpressionToCExpressionMapper(IdentityMapper): elif is_zero(expr.exponent - 2): return self.rec(expr.base*expr.base, type_context) - return type(expr)( - self.rec(expr.base, type_context), - self.rec(expr.exponent, type_context)) + if exponent_dtype.is_integral(): + from loopy.codegen import SeenFunction + func_name = ("loopy_pow_" + f"{tgt_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}") + + self.codegen_state.seen_functions.add( + SeenFunction( + "int_pow", func_name, + (tgt_dtype, exponent_dtype), + (tgt_dtype, ))) + return var(func_name)(self.rec(expr.base, type_context), + self.rec(expr.exponent, type_context)) + else: + return self.rec(var("pow")(expr.base, expr.exponent), type_context) if not self.allow_complex: return base_impl(expr, type_context) - tgt_dtype = self.infer_type(expr) if tgt_dtype.is_complex(): if expr.exponent in [2, 3, 4]: value = expr.base @@ -726,8 +742,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): value = value * expr.base return self.rec(value, type_context) else: - b_complex = self.infer_type(expr.base).is_complex() - e_complex = self.infer_type(expr.exponent).is_complex() + b_complex = base_dtype.is_complex() + e_complex = exponent_dtype.is_complex() if b_complex and not e_complex: return var("%s_powr" % self.complex_type_name(tgt_dtype))( @@ -754,6 +770,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): # {{{ C expression to code mapper class CExpressionToCodeMapper(RecursiveMapper): + # {{{ helpers def parenthesize_if_needed(self, s, enclosing_prec, my_prec): @@ -954,9 +971,8 @@ class CExpressionToCodeMapper(RecursiveMapper): return self._map_division_operator("%", expr, enclosing_prec) def map_power(self, expr, enclosing_prec): - return "pow({}, {})".format( - self.rec(expr.base, PREC_NONE), - self.rec(expr.exponent, PREC_NONE)) + raise RuntimeError(f"'{expr}' should have been transformed to 'Call'" + " expression node.") def map_array_literal(self, expr, enclosing_prec): return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 2023077bf8f286d9c28cdee2e37f194276dc211a..67dc1fe249af91d9b73a7162867dcd98c7ef6bc7 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -127,6 +127,18 @@ def cuda_function_mangler(kernel, name, arg_dtypes): return dtype, name + if name in ["pow"] and len(arg_dtypes) == 2: + dtype = np.find_common_type([], arg_dtypes) + + if dtype == np.float64: + pass # pow + elif dtype == np.float32: + name = name + "f" # powf + else: + raise RuntimeError(f"{name} does not support type {dtype}") + + return dtype, name + if name in "atan2" and len(arg_dtypes) == 2: return arg_dtypes[0], name diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 74819b93932e0852a59c3ebacb99f9eaafab0a05..74887155b920e6d514df673c1ed8897486a4f81f 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -281,20 +281,20 @@ class ExecutionWrapperGeneratorBase: 'passed array")' % (arg.name, impl_array_name)) - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" - % (arg.name, impl_array_name, stride_impl_axis, - base_arg.dtype.dtype.itemsize)) + base_arg = kernel.impl_arg_to_arg[impl_array_name] - gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " - ' is not divisible by its dtype itemsize"' - % (stride_impl_axis, impl_array_name)) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" + % (arg.name, impl_array_name, stride_impl_axis, + base_arg.dtype.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " + ' is not divisible by its dtype itemsize"' + % (stride_impl_axis, impl_array_name)) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) gen("# }}}") gen("") @@ -639,8 +639,6 @@ class ExecutionWrapperGeneratorBase: if issubclass(idi.arg_class, KernelArgument) ]) - gen.add_to_preamble("from __future__ import division") - gen.add_to_preamble("") self.target_specific_preamble(gen) gen.add_to_preamble("") self.generate_host_code(gen, codegen_result) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 2ff9ede55e8c3ab5b5e1237b2a66c72635e1454b..c409df380c5a6b1e47cfcc9773aee2bee16ba1a8 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -28,7 +28,7 @@ import numpy as np from loopy.target.c import CFamilyTarget, CFamilyASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, LoopyTypeError from loopy.types import NumpyType from loopy.target.c import DTypeRegistryWrapper, c_math_mangler from loopy.kernel.data import AddressSpace, CallMangleInfo @@ -181,6 +181,22 @@ def opencl_function_mangler(kernel, name, arg_dtypes): result_dtypes=(result_dtype,), arg_dtypes=2*(result_dtype,)) + if name == "pow" and len(arg_dtypes) == 2: + dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) + if dtype == np.float64: + name = "powf64" + elif dtype == np.float32: + name = "powf32" + else: + raise LoopyTypeError(f"'pow' does not support type {dtype}.") + + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) + if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] return CallMangleInfo( @@ -286,6 +302,19 @@ def opencl_preamble_generator(preamble_info): """ % dict(idx_ctype=kernel.target.dtype_to_typename( kernel.index_dtype)))) + for func in preamble_info.seen_functions: + if func.name == "pow" and func.c_name == "powf32": + yield("08_clpowf32", """ + inline float powf32(float x, float y) { + return pow(x, y); + }""") + + if func.name == "pow" and func.c_name == "powf64": + yield("08_clpowf64", """ + inline double powf64(double x, double y) { + return pow(x, y); + }""") + # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index a17416c47bb290285972390ae161771bac8f77e9..8d0c309b08b8df4cda7e13c097441ef272449a02 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -509,14 +509,6 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) - if idi.dtype.is_integral(): - gen(Comment("cast to Python int to avoid trouble " - "with struct packing or Boost.Python")) - py_type = "int" - - gen(Assign(idi.name, f"{py_type}({idi.name})")) - gen(Line()) - if idi.dtype.is_composite(): gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name))) cl_arg_idx += 1 @@ -578,7 +570,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): fp_arg_count += 1 gen(S( - "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))" + "_lpy_knl._set_arg_buf(%d, _lpy_pack('%s', %s))" % (cl_arg_idx, idi.dtype.dtype.char, idi.name))) cl_arg_idx += 1 @@ -632,25 +624,22 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): if not issubclass(idi.arg_class, TemporaryVariable)] + ["wait_for=None", "allocator=None"]) - from genpy import (For, Function, Suite, Import, ImportAs, Return, - FromImport, Line, Statement as S) + from genpy import (For, Function, Suite, Return, Line, Statement as S) return Function( codegen_result.current_program(codegen_state).name, args, Suite([ - FromImport("struct", ["pack as _lpy_pack"]), - ImportAs("pyopencl", "_lpy_cl"), - Import("pyopencl.tools"), Line(), ] + [ Line(), function_body, Line(), - ] + [ - For("_tv", "_global_temporaries", - # free global temporaries - S("_tv.release()")) - ] + [ + ] + ([ + For("_tv", "_global_temporaries", + # free global temporaries + S("_tv.release()")) + ] if self._get_global_temporaries(codegen_state) else [] + ) + [ Line(), Return("_lpy_evt"), ])) @@ -660,6 +649,14 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): # no such thing in Python return None + def _get_global_temporaries(self, codegen_state): + from loopy.kernel.data import AddressSpace + + return sorted( + (tv for tv in codegen_state.kernel.temporary_variables.values() + if tv.address_space == AddressSpace.GLOBAL), + key=lambda tv: tv.name) + def get_temporary_decls(self, codegen_state, schedule_state): from genpy import Assign, Comment, Line @@ -668,18 +665,12 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): from operator import mul return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1) - from loopy.kernel.data import AddressSpace - - global_temporaries = sorted( - (tv for tv in codegen_state.kernel.temporary_variables.values() - if tv.address_space == AddressSpace.GLOBAL), - key=lambda tv: tv.name) - from pymbolic.mapper.stringifier import PREC_NONE ecm = self.get_expression_to_code_mapper(codegen_state) + global_temporaries = self._get_global_temporaries(codegen_state) if not global_temporaries: - return [Assign("_global_temporaries", "[]"), Line()] + return [] return [ Comment("{{{ allocate global temporaries"), @@ -734,8 +725,13 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): arry_arg_code, Assign("_lpy_evt", "%(pyopencl_module_name)s.enqueue_nd_range_kernel(" "queue, _lpy_knl, " - "%(gsize)s, %(lsize)s, wait_for=wait_for, " - "g_times_l=True, allow_empty_ndrange=True)" + "%(gsize)s, %(lsize)s, " + # using positional args because pybind is slow with kwargs + "None, " # offset + "wait_for, " + "True, " # g_times_l + "True, " # allow_empty_ndrange + ")" % dict( pyopencl_module_name=self.target.pyopencl_module_name, gsize=ecm(gsize, prec=PREC_NONE, type_context="i"), diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 7fc20f19167af62f86e9fb18690b2f03f932e63b..cdee5600bb5dd0dce3a3971583604f737c6913d9 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -142,6 +142,7 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") + gen.add_to_preamble("from struct import pack as _lpy_pack") def initialize_system_args(self, gen): """ diff --git a/loopy/target/python.py b/loopy/target/python.py index e54aa622f0b56360cb1b3f04be118c1319db7d3b..a1557e47bdf8990e7aa89472b59f3c9fc3666a05 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -118,7 +118,8 @@ class ExpressionToPythonMapper(StringifyMapper): self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, - mangle_result.arg_dtypes or par_dtypes)) + mangle_result.arg_dtypes or par_dtypes, + mangle_result.result_dtypes)) return "{}({})".format(mangle_result.target_name, ", ".join(str_parameters)) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a50725d20d579109f6e061fba0a1f408a6e23e93..e946a67c0cf067b4701a5ab4bcd86594d42c5b4c 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -631,6 +631,8 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): raise LoopyError("argument name '%s' conflicts with an existing identifier" "--cannot rename" % new_name) + # {{{ instructions + from pymbolic import var subst_dict = {old_name: var(new_name)} @@ -644,7 +646,11 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): make_subst_func(subst_dict), within=lambda kernel, insn, stack: True) - kernel = smap.map_kernel(kernel) + kernel = rule_mapping_context.finish_kernel(smap.map_kernel(kernel)) + + # }}} + + # {{{ args new_args = [] for arg in kernel.args: @@ -653,7 +659,22 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): new_args.append(arg) - return kernel.copy(args=new_args) + # }}} + + # {{{ domain + + new_domains = [] + for dom in kernel.domains: + dom_var_dict = dom.get_var_dict() + if old_name in dom_var_dict: + dt, pos = dom_var_dict[old_name] + dom = dom.set_dim_name(dt, pos, new_name) + + new_domains.append(dom) + + # }}} + + return kernel.copy(domains=new_domains, args=new_args) # }}} diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 241c1492d4c41124c21befb2739fae349538c908..fb5e8d781ebc3f8c806dfa7b531560f0855c98d5 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -118,25 +118,25 @@ def prioritize_loops(kernel, loop_priority): class _InameSplitter(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, within, - split_iname, outer_iname, inner_iname, replacement_index): + iname_to_split, outer_iname, inner_iname, replacement_index): super().__init__(rule_mapping_context) self.within = within - self.split_iname = split_iname + self.iname_to_split = iname_to_split self.outer_iname = outer_iname self.inner_iname = inner_iname self.replacement_index = replacement_index def map_reduction(self, expr, expn_state): - if (self.split_iname in expr.inames - and self.split_iname not in expn_state.arg_context + if (self.iname_to_split in expr.inames + and self.iname_to_split not in expn_state.arg_context and self.within( expn_state.kernel, expn_state.instruction)): new_inames = list(expr.inames) - new_inames.remove(self.split_iname) + new_inames.remove(self.iname_to_split) new_inames.extend([self.outer_iname, self.inner_iname]) from loopy.symbolic import Reduction @@ -147,8 +147,8 @@ class _InameSplitter(RuleAwareIdentityMapper): return super().map_reduction(expr, expn_state) def map_variable(self, expr, expn_state): - if (expr.name == self.split_iname - and self.split_iname not in expn_state.arg_context + if (expr.name == self.iname_to_split + and self.iname_to_split not in expn_state.arg_context and self.within( expn_state.kernel, expn_state.instruction)): @@ -157,7 +157,58 @@ class _InameSplitter(RuleAwareIdentityMapper): return super().map_variable(expr, expn_state) -def _split_iname_backend(kernel, split_iname, +def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_length, + fixed_length_is_inner): + var_dict = s.get_var_dict() + + if iname_to_split not in var_dict: + return s + + orig_dim_type, _ = var_dict[iname_to_split] + # orig_dim_type may be set or param (the latter if the iname is + # used as a parameter in a subdomain). + + # NB: dup_iname_to_split is not a globally valid identifier: only unique + # wrt the set s. + from pytools import generate_unique_names + for dup_iname_to_split in generate_unique_names(f"dup_{iname_to_split}"): + if dup_iname_to_split not in var_dict: + break + + from loopy.isl_helpers import duplicate_axes + s = duplicate_axes(s, (iname_to_split,), (dup_iname_to_split,)) + + outer_var_nr = s.dim(orig_dim_type) + inner_var_nr = s.dim(orig_dim_type)+1 + + s = s.add_dims(orig_dim_type, 2) + s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) + s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) + + from loopy.isl_helpers import make_slab + + if fixed_length_is_inner: + fixed_iname, var_length_iname = inner_iname, outer_iname + else: + fixed_iname, var_length_iname = outer_iname, inner_iname + + space = s.get_space() + s = s & ( + make_slab(space, fixed_iname, 0, fixed_length) + # name = fixed_iname + fixed_length*var_length_iname + .add_constraint(isl.Constraint.eq_from_names( + space, { + dup_iname_to_split: 1, + fixed_iname: -1, + var_length_iname: -fixed_length}))) + + dup_iname_dim_type, dup_name_idx = space.get_var_dict()[dup_iname_to_split] + s = s.project_out(dup_iname_dim_type, dup_name_idx, 1) + + return s + + +def _split_iname_backend(kernel, iname_to_split, fixed_length, fixed_length_is_inner, make_new_loop_index, outer_iname=None, inner_iname=None, @@ -186,88 +237,47 @@ def _split_iname_backend(kernel, split_iname, # }}} - existing_tags = kernel.iname_tags(split_iname) + existing_tags = kernel.iname_tags(iname_to_split) from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type if (do_tagged_check and existing_tags and not filter_iname_tags_by_type(existing_tags, ForceSequentialTag)): - raise LoopyError("cannot split already tagged iname '%s'" % split_iname) + raise LoopyError(f"cannot split already tagged iname '{iname_to_split}'") - if split_iname not in kernel.all_inames(): - raise ValueError("cannot split loop for unknown variable '%s'" % split_iname) + if iname_to_split not in kernel.all_inames(): + raise ValueError( + f"cannot split loop for unknown variable '{iname_to_split}'") applied_iname_rewrites = kernel.applied_iname_rewrites[:] vng = kernel.get_var_name_generator() if outer_iname is None: - outer_iname = vng(split_iname+"_outer") + outer_iname = vng(iname_to_split+"_outer") if inner_iname is None: - inner_iname = vng(split_iname+"_inner") - - def process_set(s): - var_dict = s.get_var_dict() - - if split_iname not in var_dict: - return s - - orig_dim_type, _ = var_dict[split_iname] + inner_iname = vng(iname_to_split+"_inner") - outer_var_nr = s.dim(orig_dim_type) - inner_var_nr = s.dim(orig_dim_type)+1 - - s = s.add_dims(orig_dim_type, 2) - s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) - s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) - - from loopy.isl_helpers import make_slab - - if fixed_length_is_inner: - fixed_iname, var_length_iname = inner_iname, outer_iname - else: - fixed_iname, var_length_iname = outer_iname, inner_iname - - space = s.get_space() - fixed_constraint_set = ( - make_slab(space, fixed_iname, 0, fixed_length) - # name = fixed_iname + fixed_length*var_length_iname - .add_constraint(isl.Constraint.eq_from_names( - space, { - split_iname: 1, - fixed_iname: -1, - var_length_iname: -fixed_length}))) - - name_dim_type, name_idx = space.get_var_dict()[split_iname] - s = s.intersect(fixed_constraint_set) - - def _project_out_only_if_all_instructions_in_within(): - for insn in kernel.instructions: - if split_iname in insn.within_inames and ( - not within(kernel, insn)): - return s - - return s.project_out(name_dim_type, name_idx, 1) - - return _project_out_only_if_all_instructions_in_within() - - new_domains = [process_set(dom) for dom in kernel.domains] + new_domains = [ + _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname, + fixed_length, fixed_length_is_inner) + for dom in kernel.domains] from pymbolic import var inner = var(inner_iname) outer = var(outer_iname) new_loop_index = make_new_loop_index(inner, outer) - subst_map = {var(split_iname): new_loop_index} + subst_map = {var(iname_to_split): new_loop_index} applied_iname_rewrites.append(subst_map) # {{{ update within_inames new_insns = [] for insn in kernel.instructions: - if split_iname in insn.within_inames and ( + if iname_to_split in insn.within_inames and ( within(kernel, insn)): new_within_inames = ( (insn.within_inames.copy() - - frozenset([split_iname])) + - frozenset([iname_to_split])) | frozenset([outer_iname, inner_iname])) else: new_within_inames = insn.within_inames @@ -286,7 +296,7 @@ def _split_iname_backend(kernel, split_iname, for prio in kernel.loop_priority: new_prio = () for prio_iname in prio: - if prio_iname == split_iname: + if prio_iname == iname_to_split: new_prio = new_prio + (outer_iname, inner_iname) else: new_prio = new_prio + (prio_iname,) @@ -302,7 +312,7 @@ def _split_iname_backend(kernel, split_iname, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) ins = _InameSplitter(rule_mapping_context, within, - split_iname, outer_iname, inner_iname, new_loop_index) + iname_to_split, outer_iname, inner_iname, new_loop_index) kernel = ins.map_kernel(kernel) kernel = rule_mapping_context.finish_kernel(kernel) @@ -311,7 +321,10 @@ def _split_iname_backend(kernel, split_iname, kernel = tag_inames(kernel, {outer_iname: existing_tag, inner_iname: existing_tag}) - return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) + kernel = tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) + kernel = remove_unused_inames(kernel, [iname_to_split]) + + return kernel # }}} @@ -319,6 +332,7 @@ def _split_iname_backend(kernel, split_iname, # {{{ split iname def split_iname(kernel, split_iname, inner_length, + *, outer_iname=None, inner_iname=None, outer_tag=None, inner_tag=None, slabs=(0, 0), do_tagged_check=True, @@ -1197,16 +1211,22 @@ def remove_unused_inames(kernel, inames=None): # {{{ remove them - from loopy.kernel.tools import DomainChanger - + domains = kernel.domains for iname in unused_inames: - domch = DomainChanger(kernel, (iname,)) + new_domains = [] + + for dom in domains: + try: + dt, idx = dom.get_var_dict()[iname] + except KeyError: + pass + else: + dom = dom.project_out(dt, idx, 1) + new_domains.append(dom) - dom = domch.domain - dt, idx = dom.get_var_dict()[iname] - dom = dom.project_out(dt, idx, 1) + domains = new_domains - kernel = kernel.copy(domains=domch.get_domains_with(dom)) + kernel = kernel.copy(domains=domains) # }}} @@ -1589,7 +1609,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): insns = [insn for insn in kernel.instructions if match(kernel, insn)] for insn in insns: - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: if kernel.iname_tags_of_type(iname, kind): used_axes.add(kind.axis) diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 8527023bc789c9b3c9e18fe7ad6827c82a6e7a55..ce2d7942b70c68a79fd5c6ddc36b24fd6896cc04 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -124,7 +124,7 @@ def privatize_temporaries_with_inames( for writer_insn_id in wmap.get(tv.name, []): writer_insn = kernel.id_to_insn[writer_insn_id] - priv_axis_inames = kernel.insn_inames(writer_insn) & privatizing_inames + priv_axis_inames = writer_insn.within_inames & privatizing_inames referenced_priv_axis_inames = (priv_axis_inames & writer_insn.write_dependency_names()) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 64337864f48e42f096ab851dd5b71afd607f067e..787966efc7fd00ad282e60990846ce07004e7906 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -216,8 +216,12 @@ class TypeInferenceMapper(CombineMapper): # Numpy types are sized return [NumpyType(np.dtype(type(expr)))] elif dt.kind == "f": - # deduce the smaller type by default - return [NumpyType(np.dtype(np.float32))] + if np.float32(expr) == np.float64(expr): + # No precision is lost by 'guessing' single precision, use that. + # This at least covers simple cases like '1j'. + return [NumpyType(np.dtype(np.float32))] + + return [NumpyType(np.dtype(np.float64))] elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) diff --git a/loopy/version.py b/loopy/version.py index fddd44479adcae87ec96f470a690274b154fde54..6f66c5347c55042ebf7b220a658bb4ebf3fef04d 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -42,7 +42,7 @@ else: # }}} -VERSION = (2020, 2, 1) +VERSION = (2020, 2, 2) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/requirements.txt b/requirements.txt index 2105aede063c65752ef4a9262eb960f749778a8a..8016ee7a86fbb4646d534bd66182f563b2cc9a44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/inducer/pytools.git#egg=pytools +git+https://github.com/inducer/pytools.git#egg=pytools >= 2021.1 git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/cgen.git#egg=cgen git+https://github.com/inducer/pyopencl.git#egg=pyopencl @@ -6,7 +6,7 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/genpy.git#egg=genpy git+https://github.com/inducer/codepy.git#egg=codepy -git+https://github.com/inducer/f2py +git+https://github.com/inducer/f2py#egg=f2py # Optional, needed for using the C preprocessor on Fortran ply>=3.6 diff --git a/setup.py b/setup.py index ddc47fefca853321d383bad4aeaa6f24f6d5c901..fcf284bc8574dc118e4b319c1b9ff38b0b24685d 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ setup(name="loopy", python_requires="~=3.6", install_requires=[ - "pytools>=2020.4", + "pytools>=2021.1", "pymbolic>=2019.2", "genpy>=2016.1.2", "cgen>=2016.1", diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 3c927a9cea09df50c4d0fe70dee7435b6ce3c129..54e6c98a3f10fde910b625fbb0f22e4b1f3ebdd9 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -31,6 +31,7 @@ from pyopencl.tools import ( # noqa as pytest_generate_tests) from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa import logging +from loopy.kernel import KernelState from loopy import ( preprocess_kernel, get_one_linearized_kernel, @@ -43,7 +44,9 @@ from loopy.schedule.checker.schedule import ( logger = logging.getLogger(__name__) -def test_lexschedule_creation(): +# {{{ test pairwise schedule creation + +def test_pairwise_schedule_creation(): import islpy as isl from loopy.schedule.checker import ( get_schedules_for_statement_pairs, @@ -296,6 +299,738 @@ def test_lexschedule_creation(): assert sched_map_before == sched_map_before_expected assert sched_map_after == sched_map_after_expected +# }}} + + +# {{{ test lex order map creation + +def test_lex_order_map_creation(): + import islpy as isl + from loopy.schedule.checker.lexicographic_order_map import ( + create_lex_order_map, + ) + from loopy.schedule.checker.utils import ( + append_marker_to_isl_map_var_names, + ) + + def _check_lex_map(expected_lex_order_map, n_dims): + # Isl ignores the apostrophes, so explicitly add them + expected_lex_order_map = append_marker_to_isl_map_var_names( + expected_lex_order_map, isl.dim_type.in_, "'") + + lex_order_map = create_lex_order_map( + n_dims=n_dims, + before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)], + after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)], + ) + + assert lex_order_map == expected_lex_order_map + assert ( + lex_order_map.get_var_names(isl.dim_type.in_) == + expected_lex_order_map.get_var_names(isl.dim_type.in_)) + assert ( + lex_order_map.get_var_names(isl.dim_type.out) == + expected_lex_order_map.get_var_names(isl.dim_type.out)) + + expected_lex_order_map = isl.Map( + "{{ " + "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :" + "(" + "{0}0' < {0}0 " + ") or (" + "{0}0'={0}0 and {0}1' < {0}1 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4" + ")" + "}}".format(LEX_VAR_PREFIX)) + + _check_lex_map(expected_lex_order_map, 5) + + expected_lex_order_map = isl.Map( + "{{ " + "[{0}0'] -> [{0}0] :" + "(" + "{0}0' < {0}0 " + ")" + "}}".format(LEX_VAR_PREFIX)) + + _check_lex_map(expected_lex_order_map, 1) + +# }}} + + +# {{{ test statement instance ordering creation + +def test_statement_instance_ordering_creation(): + import islpy as isl + from loopy.schedule.checker import ( + get_schedules_for_statement_pairs, + ) + from loopy.schedule.checker.schedule import ( + get_lex_order_map_for_sched_space, + ) + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, + create_lex_order_map, + ) + + # example kernel (add deps to fix loop order) + knl = lp.make_kernel( + [ + "{[i]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c,dep=insn_b} + end + end + for t + e[t] = f[t] {id=insn_d, dep=insn_c} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + + # get a linearization + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + # Get pairwise schedules + insn_id_pairs = [ + ("insn_a", "insn_b"), + ("insn_a", "insn_c"), + ("insn_a", "insn_d"), + ("insn_b", "insn_c"), + ("insn_b", "insn_d"), + ("insn_c", "insn_d"), + ] + sched_maps = get_schedules_for_statement_pairs( + knl, + linearization_items, + insn_id_pairs, + ) + + def check_sio_for_insn_pair( + insn_id_before, + insn_id_after, + expected_lex_dims, + expected_sio, + ): + + # Get pairwise schedule + sched_map_before, sched_map_after = sched_maps[ + (insn_id_before, insn_id_after)] + + # Get map representing lexicographic ordering + sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before) + + # Get expected lex order map + expected_lex_order_map = create_lex_order_map( + n_dims=expected_lex_dims, + before_names=["%s%d'" % (LEX_VAR_PREFIX, i) + for i in range(expected_lex_dims)], + after_names=["%s%d" % (LEX_VAR_PREFIX, i) + for i in range(expected_lex_dims)], + ) + + assert sched_lex_order_map == expected_lex_order_map + + # create statement instance ordering, + # maps each statement instance to all statement instances occuring later + sio = get_statement_ordering_map( + sched_map_before, + sched_map_after, + sched_lex_order_map, + ) + + sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio) + + assert sio_aligned == expected_sio + + # Relationship between insn_a and insn_b --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_b", 2, expected_sio) + + # Relationship between insn_a and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_c", 2, expected_sio) + + # Relationship between insn_a and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_d", 1, expected_sio) + + # Relationship between insn_b and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; " + "[{0}'=0, i', j'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; " + "[{0}'=0, i', j'] -> [{0}=1, i=i', j=j'] : " + "0 <= i' < pi and 0 <= j' < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_b", "insn_c", 3, expected_sio) + + # Relationship between insn_b and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_b", "insn_d", 1, expected_sio) + + # Relationship between insn_c and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio) + +# }}} + + +def test_linearization_checker_with_loop_prioritization(): + knl = lp.make_kernel( + [ + "{[i]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c} + end + end + for t + e[t] = f[t] {id=insn_d} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_matmul(): + bsize = 16 + knl = lp.make_kernel( + "{[i,k,j]: 0<=i {[i,j]: 0<=i {[i]: 0<=i xi = qpts[1, i2] + <> s = 1-xi + <> r = xi/s + <> aind = 0 {id=aind_init} + for alpha1 + <> w = s**(deg-alpha1) {id=init_w} + for alpha2 + tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \ + {id=write_tmp,dep=init_w:aind_init} + w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \ + {id=update_w,dep=init_w:write_tmp} + aind = aind + 1 \ + {id=aind_incr,dep=aind_init:write_tmp:update_w} + end + end + end + """, + [lp.GlobalArg("coeffs", None, shape=None), "..."], + name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1") + knl = lp.add_and_infer_dtypes(knl, + dict(coeffs=np.float32, qpts=np.int32)) + knl = lp.fix_parameters(knl, nqp1d=7, deg=4) + knl = lp.split_iname(knl, "el", 16, inner_tag="l.0") + knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0", + inner_tag="ilp", slabs=(0, 1)) + knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr")) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_nop(): + knl = lp.make_kernel( + [ + "{[b]: b_start<=b c_end = 2 + for c + ... nop + end + end + """, + "...", + seq_dependencies=True) + knl = lp.fix_parameters(knl, dim=3) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_multi_domain(): + knl = lp.make_kernel( + [ + "{[i]: 0<=iacc = 0 {id=insn0} + for j + for k + acc = acc + j + k {id=insn1,dep=insn0} + end + end + end + end + """, + name="nest_multi_dom", + assumptions="ni,nj,nk,nx >= 1", + lang_version=(2018, 2) + ) + knl = lp.prioritize_loops(knl, "x,xx,i") + knl = lp.prioritize_loops(knl, "i,j") + knl = lp.prioritize_loops(knl, "j,k") + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_with_loop_carried_deps(): + knl = lp.make_kernel( + "{[i]: 0<=iacc0 = 0 {id=insn0} + for i + acc0 = acc0 + i {id=insn1,dep=insn0} + <>acc2 = acc0 + i {id=insn2,dep=insn1} + <>acc3 = acc2 + i {id=insn3,dep=insn2} + <>acc4 = acc0 + i {id=insn4,dep=insn1} + end + """, + name="loop_carried_deps", + assumptions="n >= 1", + lang_version=(2018, 2) + ) + + unprocessed_knl = knl.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl = lp.add_dependencies_v2( # pylint:disable=no-member + knl, deps) + + # get a linearization to check + if knl.state < KernelState.PREPROCESSED: + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + +def test_linearization_checker_and_invalid_prioritiy_detection(): + ref_knl = lp.make_kernel( + [ + "{[h]: 0<=h acc = 0 + for h,i,j,k + acc = acc + h + i + j + k + end + """, + name="priorities", + assumptions="ni,nj,nk,nh >= 1", + lang_version=(2018, 2) + ) + + # no error: + knl0 = lp.prioritize_loops(ref_knl, "h,i") + knl0 = lp.prioritize_loops(ref_knl, "i,j") + knl0 = lp.prioritize_loops(knl0, "j,k") + + unprocessed_knl = knl0.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl0 = lp.add_dependencies_v2( # pylint:disable=no-member + knl0, deps) + + # get a linearization to check + if knl0.state < KernelState.PREPROCESSED: + knl0 = preprocess_kernel(knl0) + knl0 = get_one_linearized_kernel(knl0) + linearization_items = knl0.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # no error: + knl1 = lp.prioritize_loops(ref_knl, "h,i,k") + knl1 = lp.prioritize_loops(knl1, "h,j,k") + + unprocessed_knl = knl1.copy() + + deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl1 = lp.add_dependencies_v2( # pylint:disable=no-member + knl1, deps) + + # get a linearization to check + if knl1.state < KernelState.PREPROCESSED: + knl1 = preprocess_kernel(knl1) + knl1 = get_one_linearized_kernel(knl1) + linearization_items = knl1.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + assert linearization_is_valid + + # error (cycle): + knl2 = lp.prioritize_loops(ref_knl, "h,i,j") + knl2 = lp.prioritize_loops(knl2, "j,k") + # TODO think about when legacy deps should be updated based on prio changes + + try: + if hasattr(lp, "constrain_loop_nesting"): + knl2 = lp.constrain_loop_nesting(knl2, "k,i") # pylint:disable=no-member + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl2) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl2 = lp.add_dependencies_v2( # pylint:disable=no-member + knl2, deps) + else: + knl2 = lp.prioritize_loops(knl2, "k,i") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl2) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl2 = lp.add_dependencies_v2( # pylint:disable=no-member + knl2, deps) + + unprocessed_knl = knl2.copy() + + # get a linearization to check + if knl2.state < KernelState.PREPROCESSED: + knl2 = preprocess_kernel(knl2) + knl2 = get_one_linearized_kernel(knl2) + linearization_items = knl2.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + # should raise error + assert False + except ValueError as e: + if hasattr(lp, "constrain_loop_nesting"): + assert "cycle detected" in str(e) + else: + assert "invalid priorities" in str(e) + + # error (inconsistent priorities): + knl3 = lp.prioritize_loops(ref_knl, "h,i,j,k") + # TODO think about when legacy deps should be updated based on prio changes + try: + if hasattr(lp, "constrain_loop_nesting"): + knl3 = lp.constrain_loop_nesting( # pylint:disable=no-member + knl3, "h,j,i,k") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl3) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl3 = lp.add_dependencies_v2( # pylint:disable=no-member + knl3, deps) + else: + knl3 = lp.prioritize_loops(knl3, "h,j,i,k") + + # legacy deps depend on priorities, so update deps using new knl + deps = lp.create_dependencies_from_legacy_knl(knl3) + if hasattr(lp, "add_dependencies_v2"): + # TODO update this after dep refactoring + knl3 = lp.add_dependencies_v2( # pylint:disable=no-member + knl3, deps) + + unprocessed_knl = knl3.copy() + + # get a linearization to check + if knl3.state < KernelState.PREPROCESSED: + knl3 = preprocess_kernel(knl3) + knl3 = get_one_linearized_kernel(knl3) + linearization_items = knl3.linearization + + linearization_is_valid = lp.check_linearization_validity( + unprocessed_knl, deps, linearization_items) + # should raise error + assert False + except ValueError as e: + if hasattr(lp, "constrain_loop_nesting"): + assert "cycle detected" in str(e) + else: + assert "invalid priorities" in str(e) + +# TODO create more kernels with invalid linearizations to test linearization checker + if __name__ == "__main__": if len(sys.argv) > 1: diff --git a/test/test_loopy.py b/test/test_loopy.py index 41b5315e890bbd8199a2a3b67fe4cf8b0ae48f8d..be595aaa5d837abcf9ff189c415e73f7393b78df 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2920,6 +2920,123 @@ def test_access_check_with_conditionals(): lp.generate_code_v2(legal_but_nonaffine_condition_knl) +def test_access_check_with_insn_predicates(): + knl = lp.make_kernel( + "{[i]: 0 1: exec(sys.argv[1]) diff --git a/test/test_statistics.py b/test/test_statistics.py index 757f59e865b350c8f452977d36f2639393923fad..bcdc542cb8c4eac50143b07ef09f1460f5abd9c5 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -1070,7 +1070,7 @@ def test_floor_div_coefficient_collector(): [ "for i_outer", "for j_outer", - "<> loc[i_inner,j_inner] = 3.14 {id=loc_init}", + "<> loc[i_inner,j_inner] = 3.14f {id=loc_init}", "loc[i_inner,(j_inner+r+4) %% %d] = loc[i_inner,(j_inner+r) %% %d]" " {id=add,dep=loc_init}" % (bsize, bsize), "out0[i_outer*16+i_inner,j_outer*16+j_inner] = loc[i_inner,j_inner]" diff --git a/test/test_transform.py b/test/test_transform.py index 546f86838929a70f42044c3894ad972ff9f354b9..daa659808d1e7aa12f51d7b4b897672aa3344874 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -670,6 +670,49 @@ def test_add_inames_for_unused_hw_axes(ctx_factory): parameters={"n": n}) +def test_rename_argument_of_domain_params(ctx_factory): + knl = lp.make_kernel( + "{[i, j]: 0<=i 1: exec(sys.argv[1])