diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d8101763de864e20bd92c6be0d1fef0e31d1b31..05b2e323793ee19e202f6e89425e26f5f9fb2582 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,8 +4,6 @@ on: branches: - master pull_request: - paths-ignore: - - 'doc/*.rst' schedule: - cron: '17 3 * * 0' diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d69f0b8c489c07d3aa1512f6f1cbb8ced0f6a2e9..f0e9aa0e593784742a9c2587c6e037f0b111d127 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -89,6 +89,8 @@ Python 3 POCL Examples: - python3 - pocl - large-node + # For examples/python/ispc-stream-harness.py + - avx2 except: - tags diff --git a/doc/conf.py b/doc/conf.py index 942afcd3ce11056c65c6a7500bb5ed312dc40187..9b8cf81e11dbbaee53110c36b1e601a80ae0104b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,52 +1,35 @@ -# -# loopy documentation build configuration file, created by -# sphinx-quickstart on Tue Aug 9 13:40:49 2011. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -#import sys import os -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +#needs_sphinx = "1.0" # Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +# coming with Sphinx (named "sphinx.ext.*") or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - #'sphinx.ext.viewcode', - 'sphinx.ext.doctest', + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + #"sphinx.ext.viewcode", + "sphinx.ext.doctest", + "sphinx_copybutton", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +#source_encoding = "utf-8-sig" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'loopy' -copyright = '2016, Andreas Klöckner' +project = "loopy" +copyright = "2016, Andreas Klöckner" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -59,7 +42,7 @@ with open(_version_source) as vpy_file: version_py = vpy_file.read() os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] = "1" -exec(compile(version_py, _version_source, 'exec'), ver_dic) +exec(compile(version_py, _version_source, "exec"), ver_dic) version = ".".join(str(x) for x in ver_dic["VERSION"]) # The full version, including alpha/beta/rc tags. release = ver_dic["VERSION_TEXT"] @@ -77,7 +60,7 @@ del os.environ["AKPYTHON_EXEC_IMPORT_UNAVAILABLE"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None @@ -94,7 +77,7 @@ exclude_patterns = ['_build'] #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] @@ -102,135 +85,16 @@ pygments_style = 'sphinx' # -- Options for HTML output --------------------------------------------------- -html_theme = "alabaster" +html_theme = "furo" html_theme_options = { - "extra_nav_links": { - "🚀 Github": "https://github.com/inducer/loopy", - "💾 Download Releases": "https://pypi.org/project/loopy", - } } html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', - 'searchbox.html', - ] -} - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False + } # If true, links to the reST sources are added to the pages. -html_show_sourcelink = False - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'loopydoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'loopy.tex', 'loopy Documentation', - 'Andreas Kloeckner', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True +html_show_sourcelink = True # -- Options for manual page output -------------------------------------------- @@ -238,20 +102,21 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'loopy', 'loopy Documentation', - ['Andreas Kloeckner'], 1) + ("index", "loopy", "loopy Documentation", + ["Andreas Kloeckner"], 1) ] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'https://docs.python.org/3': None, - 'https://documen.tician.de/islpy': None, - 'https://documen.tician.de/pyopencl': None, - 'https://documen.tician.de/cgen': None, - 'https://docs.scipy.org/doc/numpy/': None, - 'https://documen.tician.de/pymbolic': None, - 'https://documen.tician.de/pytools': None, + "https://docs.python.org/3": None, + "https://numpy.org/doc/stable/": None, + "https://documen.tician.de/islpy": None, + "https://documen.tician.de/pyopencl": None, + "https://documen.tician.de/cgen": None, + "https://documen.tician.de/pymbolic": None, + "https://documen.tician.de/pytools": None, } autoclass_content = "class" +autodoc_typehints = "description" diff --git a/doc/index.rst b/doc/index.rst index 8eb996f6b48b4b2526b2114c10fbe94669f87b44..7baff3249a25e69019c06802901538500c1af971 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -46,6 +46,8 @@ Please check :ref:`installation` to get started. ref_other misc ref_internals + 🚀 Github + 💾 Download Releases Indices and tables ================== diff --git a/doc/misc.rst b/doc/misc.rst index 4c8c9867f3ceee2447f9249097c7c30f4d6f501d..e8bcefc65ea5519eafb3ad8b1ec34774b64314ed 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -49,21 +49,18 @@ MacOS support computers: Everywhere else, just making sure you have the ``g++`` package should be enough. -#. Install `miniconda `_. - (Both Python 2 and 3 should work. In the absence of other constraints, prefer Python 3.) +#. Install `miniforge `_. -#. ``export CONDA=/WHERE/YOU/INSTALLED/miniconda3`` +#. ``export CONDA=/WHERE/YOU/INSTALLED/miniforge3`` If you accepted the default location, this should work: - ``export CONDA=$HOME/miniconda3`` + ``export CONDA=$HOME/miniforge3`` #. ``$CONDA/bin/conda create -n dev`` #. ``source $CONDA/bin/activate dev`` -#. ``conda config --add channels conda-forge`` - #. ``conda install git pip pocl islpy pyopencl`` (Linux) or @@ -76,7 +73,7 @@ MacOS support computers: Next time you want to use :mod:`loopy`, just run the following command:: - source /WHERE/YOU/INSTALLED/miniconda3/bin/activate dev + source /WHERE/YOU/INSTALLED/miniforge3/bin/activate dev You may also like to add this to a startup file (like :file:`$HOME/.bashrc`) or create an alias for it. diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index efe147493df36df2f7afa4bac4241b88bb5ce598..2b496c77deeaa58be05ce13021a42dd78d2f9ded 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -3,6 +3,72 @@ Reference: Loopy's Model of a Kernel ==================================== +What Types of Computation can a Loopy Program Express? +------------------------------------------------------ + +Loopy programs consist of an a-priori unordered set of statements, operating +on :math:`n`-dimensional array variables. + +Arrays consist of "plain old data" and structures thereof, as describable +by a :class:`numpy.dtype`. The n-dimensional shape of these arrays is +given by a tuple of expressions at most affine in parameters that are +fixed for the duration of program execution. +Each array variable in the program is either an argument or a temporary +variable. A temporary variable is only live within the program, while +argument variables are accessible outside the program and constitute the +program's inputs and outputs. + +A statement (still called 'instruction' in some places, cf. +:class:`loopy.InstructionBase`) encodes an assignment to an entry of an array. +The right-hand side of an assignment consists of an expression that may +consist of arithmetic operations and calls to functions. +If the outermost operation of the RHS expression is a function call, +the RHS value may be a tuple, and multiple (still scalar) arrays appear +as LHS values. (This is the only sense in which tuple types are supported.) +Each statement is parametrized by zero or more loop variables ("inames"). +A statement is executed once for each integer point defined by the domain +forest for the iname tuple given for that statement +(:attr:`loopy.InstructionBase.within_inames`). Each execution of a +statement (with specific values of the inames) is called a *statement +instance*. Dependencies between these instances as well as instances of +other statements are encoded in the program representation and specify permissible +execution orderings. (The semantics of the dependencies are `being +sharpened `__.) Assignments +(comprising the evaluation of the RHS and the assignment to the LHS) may +be specified to be atomic. + +The basic building blocks of the domain forest are sets given as +conjunctions of equalities and inequalities of quasi-affine expressions on +integer tuples, called domains, and represented as instances of +:class:`islpy.BasicSet`. The entries of each integer tuple are +either *parameters* or *inames*. Each domain may optionally have a *parent +domain*. Parameters of parent-less domains are given by value arguments +supplied to the program that will remain unchanged during program +execution. Parameters of domains with parents may be + +- run-time-constant value arguments to the program, or +- inames from parent domains, or +- scalar, integer temporary variables that are written by statements + with iteration domains controlled by a parent domain. + +For each tuple of concrete parameter values, the set of iname tuples must be +finite. Each iname is defined by exactly one domain. + +For a tuple of inames, the domain forest defines an iteration domain +by finding all the domains defining the inames involved, along with their +parent domains. The resulting tree of domains may contain multiple roots, +but no branches. The iteration domain is then constructed by intersecting +these domains and constructing the projection of that set onto the space +given by the required iname tuple. Observe that, via the parent-child +domain mechanism, imperfectly-nested and data-dependent loops become +expressible. + +The set of functions callable from the language is predefined by the system. +Additional functions may be defined by the user by registering them. It is +not currently possible to define functions from within Loopy, however work +is progressing on permitting this. Even once this is allowed, recursion +will not be permitted. + .. _domain-tree: Loop Domain Forest diff --git a/loopy/check.py b/loopy/check.py index e66af04d2fe4dfc2e1f5a99281783feecec2bee7..0bf02f7cf7425f0a277a200a1bdc51c60347fd57 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -24,8 +24,7 @@ THE SOFTWARE. from islpy import dim_type import islpy as isl from loopy.symbolic import WalkMapper -from loopy.diagnostic import (LoopyError, WriteRaceConditionWarning, - warn_with_kernel, ExpressionToAffineConversionError) +from loopy.diagnostic import LoopyError, WriteRaceConditionWarning, warn_with_kernel from loopy.type_inference import TypeInferenceMapper from loopy.kernel.instruction import (MultiAssignmentBase, CallInstruction, CInstruction, _DataObliviousInstruction) @@ -216,7 +215,7 @@ def check_for_double_use_of_hw_axes(kernel): for insn in kernel.instructions: insn_tag_keys = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: for tag in kernel.iname_tags_of_type(iname, UniqueTag): key = tag.key if key in insn_tag_keys: @@ -233,12 +232,12 @@ def check_for_inactive_iname_access(kernel): for insn in kernel.instructions: expression_inames = insn.read_dependency_names() & kernel.all_inames() - if not expression_inames <= kernel.insn_inames(insn): + if not expression_inames <= insn.within_inames: raise LoopyError( "instruction '%s' references " "inames '%s' that the instruction does not depend on" % (insn.id, - ", ".join(expression_inames - kernel.insn_inames(insn)))) + ", ".join(expression_inames - insn.within_inames))) def check_for_unused_inames(kernel): @@ -294,7 +293,7 @@ def check_for_write_races(kernel): insn.assignee_var_names(), insn.assignee_subscript_deps()): assignee_inames = assignee_indices & kernel.all_inames() - if not assignee_inames <= kernel.insn_inames(insn): + if not assignee_inames <= insn.within_inames: raise LoopyError( "assignee of instructions '%s' references " "iname that the instruction does not depend on" @@ -305,13 +304,13 @@ def check_for_write_races(kernel): # will cause write races. raceable_parallel_insn_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, ConcurrentTag)} elif assignee_name in kernel.temporary_variables: temp_var = kernel.temporary_variables[assignee_name] raceable_parallel_insn_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if any(_is_racing_iname_tag(temp_var, tag) for tag in kernel.iname_tags(iname))} @@ -445,19 +444,14 @@ class _AccessCheckMapper(WalkMapper): % (expr, self.insn_id, access_range, shape_domain)) def map_if(self, expr, domain): - from loopy.symbolic import get_dependencies - if get_dependencies(expr.condition) <= frozenset( - domain.space.get_var_dict()): - try: - from loopy.symbolic import isl_set_from_expr - then_set = isl_set_from_expr(domain.space, expr.condition) - else_set = then_set.complement() - except ExpressionToAffineConversionError: - # non-affine condition: can't do much - then_set = else_set = isl.BasicSet.universe(domain.space) - else: - # data-dependent condition: can't do much + from loopy.symbolic import condition_to_set + then_set = condition_to_set(domain.space, expr.condition) + if then_set is None: + # condition cannot be inferred as ISL expression => ignore + # for domain contributions enforced by it then_set = else_set = isl.BasicSet.universe(domain.space) + else: + else_set = then_set.complement() self.rec(expr.then, domain & then_set) self.rec(expr.else_, domain & else_set) @@ -467,9 +461,10 @@ def check_bounds(kernel): """ Performs out-of-bound check for every array access. """ + from loopy.kernel.instruction import get_insn_domain temp_var_names = set(kernel.temporary_variables) for insn in kernel.instructions: - domain = kernel.get_inames_domain(kernel.insn_inames(insn)) + domain = get_insn_domain(insn, kernel) # data-dependent bounds? can't do much if set(domain.get_var_names(dim_type.param)) & temp_var_names: @@ -496,7 +491,7 @@ def check_write_destinations(kernel): if wvar in kernel.all_inames(): raise LoopyError("iname '%s' may not be written" % wvar) - insn_domain = kernel.get_inames_domain(kernel.insn_inames(insn)) + insn_domain = kernel.get_inames_domain(insn.within_inames) insn_params = set(insn_domain.get_var_names(dim_type.param)) if wvar in kernel.all_params(): @@ -941,7 +936,7 @@ def _check_for_unused_hw_axes_in_kernel_chunk(kernel, sched_index=None): group_axes_used = set() local_axes_used = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: ltags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) gtags = kernel.iname_tags_of_type(iname, GroupIndexTag, max_num=1) altags = kernel.iname_tags_of_type( @@ -1197,7 +1192,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None): assert idomains - insn_inames = kernel.insn_inames(insn) + insn_inames = insn.within_inames # {{{ if we've checked the same thing before, no need to check it again @@ -1274,7 +1269,7 @@ def check_implemented_domains(kernel, implemented_domains, code=None): iname_to_dim = pt.get_space().get_var_dict() point_axes = [] - for iname in kernel.insn_inames(insn) | parameter_inames: + for iname in insn_inames | parameter_inames: tp, dim = iname_to_dim[iname] point_axes.append("%s=%d" % ( iname, pt.get_coordinate_val(tp, dim).to_python())) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index cbae4eac5ed796090c52c40a7fde4b6ebeed36a0..0f5d824cc752a372023cc177c780b2606593a0f7 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -146,13 +146,18 @@ class SeenFunction(ImmutableRecord): .. attribute:: arg_dtypes a tuple of arg dtypes + + .. attribute:: result_dtypes + + a tuple of result dtypes """ - def __init__(self, name, c_name, arg_dtypes): + def __init__(self, name, c_name, arg_dtypes, result_dtypes): ImmutableRecord.__init__(self, name=name, c_name=c_name, - arg_dtypes=arg_dtypes) + arg_dtypes=arg_dtypes, + result_dtypes=result_dtypes) class CodeGenerationState: diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 71133ef7cf2a29be1a8673e99a81f21544f5404a..14efb64f4618c025a319564ebef3e0232800aecc 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -89,7 +89,7 @@ def generate_instruction_code(codegen_state, insn): else: raise RuntimeError("unexpected instruction type") - insn_inames = kernel.insn_inames(insn) + insn_inames = insn.within_inames return to_codegen_result( codegen_state, diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9088f3bfe5f56c42884ab196c4cfc04d8341e3ef..b24cde2c419cc3fb549473cb620e040520a29a07 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -824,7 +824,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): result = { iname: set() for iname in self.all_inames()} for insn in self.instructions: - for iname in self.insn_inames(insn): + for iname in insn.within_inames: result[iname].add(insn.id) return result @@ -1561,10 +1561,11 @@ class LoopKernel(ImmutableRecordWithoutPickling): for field_name in self.hash_fields: key_builder.rec(key_hash, getattr(self, field_name)) + @memoize_method def __hash__(self): from loopy.tools import LoopyKeyBuilder - from pytools.persistent_dict import new_hash - key_hash = new_hash() + import hashlib + key_hash = hashlib.sha256() self.update_persistent_hash(key_hash, LoopyKeyBuilder()) return hash(key_hash.digest()) diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 6b0248f4f9c18001ef23b0c1551316d9cb6ad065..9fd166ab8f15bdc97006c94c7d03977b64c08292 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -26,6 +26,7 @@ THE SOFTWARE. import re from pytools import ImmutableRecord, memoize_method +from pytools.tag import Taggable import numpy as np # noqa @@ -136,6 +137,12 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase): return self.stringify(True) def map_expr(self, mapper): + from loopy.kernel.data import auto + + if self.stride is auto: + # lp.auto not an expr => do not map + return self + return self.copy(stride=mapper(self.stride)) @@ -557,7 +564,7 @@ def _parse_shape_or_strides(x): return tuple(_pymbolic_parse_if_necessary(xi) for xi in x) -class ArrayBase(ImmutableRecord): +class ArrayBase(ImmutableRecord, Taggable): """ .. attribute :: name @@ -600,7 +607,8 @@ class ArrayBase(ImmutableRecord): .. attribute:: offset Offset from the beginning of the buffer to the point from - which the strides are counted. May be one of + which the strides are counted, in units of the :attr:`dtype`. + May be one of * 0 or None * a string (that is interpreted as an argument name). @@ -636,6 +644,14 @@ class ArrayBase(ImmutableRecord): .. versionadded:: 2018.1 + .. attribute:: tags + + A (possibly empty) frozenset of instances of + :class:`pytools.tag.Tag` intended for + consumption by an application. + + .. versionadded:: 2020.2.2 + .. automethod:: __init__ .. automethod:: __eq__ .. automethod:: num_user_axes @@ -652,8 +668,7 @@ class ArrayBase(ImmutableRecord): def __init__(self, name, dtype=None, shape=None, dim_tags=None, offset=0, dim_names=None, strides=None, order=None, for_atomic=False, - target=None, alignment=None, - **kwargs): + target=None, alignment=None, tags=None, **kwargs): """ All of the following (except *name*) are optional. Specify either strides or shape. @@ -691,7 +706,8 @@ class ArrayBase(ImmutableRecord): using atomic-capable data types. :arg offset: (See :attr:`offset`) :arg alignment: memory alignment in bytes - + :arg tags: An instance of or an Iterable of instances of + :class:`pytools.tag.Tag`. """ for kwarg_name in kwargs: @@ -848,6 +864,7 @@ class ArrayBase(ImmutableRecord): order=order, alignment=alignment, for_atomic=for_atomic, + tags=tags, **kwargs) def __eq__(self, other): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a22fef9e8021d55759a9f0a2c0f4f23bfe35df80..94534382f19790936152661b48d4d515e9e0e129 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1523,7 +1523,7 @@ def determine_shapes_of_temporaries(knl): def feed_all_expressions(receiver): for insn in knl.instructions: insn.with_transformed_expressions( - lambda expr: receiver(expr, knl.insn_inames(insn))) + lambda expr: receiver(expr, insn.within_inames)) var_to_base_indices, var_to_shape, var_to_error = ( find_shapes_of_vars( @@ -1543,7 +1543,7 @@ def determine_shapes_of_temporaries(knl): def feed_assignee_of_instruction(receiver): for insn in knl.instructions: for assignee in insn.assignees: - receiver(assignee, knl.insn_inames(insn)) + receiver(assignee, insn.within_inames) var_to_base_indices_fallback, var_to_shape_fallback, var_to_error = ( find_shapes_of_vars( diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 43770ffb6d0f2ae08d8967baa03fedea669343ed..6e454d925167fd6344a7d4cd30c83f28f6ac2e23 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -27,6 +27,7 @@ THE SOFTWARE. from sys import intern import numpy as np # noqa from pytools import ImmutableRecord +from pytools.tag import Taggable from loopy.kernel.array import ArrayBase from loopy.diagnostic import LoopyError from loopy.kernel.instruction import ( # noqa @@ -357,7 +358,6 @@ class KernelArgument(ImmutableRecord): DeprecationWarning, stacklevel=2) dtype = None - kwargs["dtype"] = dtype ImmutableRecord.__init__(self, **kwargs) @@ -379,13 +379,13 @@ class ArrayArg(ArrayBase, KernelArgument): allowed_extra_kwargs = [ "address_space", - "is_output_only"] + "is_output_only", + "tags"] def __init__(self, *args, **kwargs): if "address_space" not in kwargs: raise TypeError("'address_space' must be specified") kwargs["is_output_only"] = kwargs.pop("is_output_only", False) - super().__init__(*args, **kwargs) min_target_axes = 0 @@ -451,15 +451,29 @@ class ImageArg(ArrayBase, KernelArgument): self.num_target_axes(), dtype, is_written) -class ValueArg(KernelArgument): +""" + :attribute tags: A (possibly empty) frozenset of instances of + :class:`pytools.tag.Tag` intended for consumption by an + application. + + ..versionadded: 2020.2.2 +""" + + +class ValueArg(KernelArgument, Taggable): def __init__(self, name, dtype=None, approximately=1000, target=None, - is_output_only=False): + is_output_only=False, tags=None): + """ + :arg tags: A an instance of or Iterable of instances of + :class:`pytools.tag.Tag` intended for consumption by an + application. + """ KernelArgument.__init__(self, name=name, dtype=dtype, approximately=approximately, target=target, - is_output_only=is_output_only) + is_output_only=is_output_only, tags=tags) def __str__(self): import loopy as lp diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 791ea89a6521c58cfe9281723ea8d83f83baf84a..101d16624c6698bf6f8ac45c5154b0fab4e6e9f5 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -25,6 +25,7 @@ from pytools import ImmutableRecord, memoize_method from loopy.diagnostic import LoopyError from loopy.tools import Optional from warnings import warn +import islpy as isl # {{{ instructions: base class @@ -1438,4 +1439,49 @@ def _check_and_fix_temp_var_type(temp_var_type, stacklevel=2): # }}} +def get_insn_domain(insn, kernel): + """ + Returns an instance of :class:`islpy.Set` for the *insn*'s domain. + + .. note:: + + Does not take into account additional hints available through + :attr:`loopy.LoopKernel.assumptions`. + """ + domain = kernel.get_inames_domain(insn.within_inames) + + # {{{ add read-only ValueArgs to domain + + from loopy.kernel.data import ValueArg + + valueargs_to_add = ({arg.name for arg in kernel.args + if isinstance(arg, ValueArg) + and arg.name not in kernel.get_written_variables()} + - set(domain.get_var_names(isl.dim_type.param))) + + # only consider valueargs relevant to *insn* + valueargs_to_add = valueargs_to_add & insn.read_dependency_names() + + for arg_to_add in valueargs_to_add: + idim = domain.dim(isl.dim_type.param) + domain = domain.add_dims(isl.dim_type.param, 1) + domain = domain.set_dim_name(isl.dim_type.param, idim, arg_to_add) + + # }}} + + # {{{ enforce restriction from predicates + + insn_preds_set = isl.BasicSet.universe(domain.space) + + for predicate in insn.predicates: + from loopy.symbolic import condition_to_set + predicate_as_isl_set = condition_to_set(domain.space, predicate) + if predicate_as_isl_set is not None: + insn_preds_set = insn_preds_set & predicate_as_isl_set + + # }}} + + return domain & insn_preds_set + + # vim: foldmethod=marker diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 0b8d9841ee77020149a1f246a301e9c422b202e6..541bb45ce52821d00e3e255ad600c392f535d303 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -685,7 +685,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): from loopy.kernel.data import AutoLocalIndexTagBase auto_axis_inames = { - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)} # }}} @@ -744,7 +744,7 @@ def get_auto_axis_iname_ranking_by_stride(kernel, insn): if aggregate_strides: very_large_stride = int(np.iinfo(np.int32).max) - return sorted((iname for iname in kernel.insn_inames(insn)), + return sorted((iname for iname in insn.within_inames), key=lambda iname: ( aggregate_strides.get(iname, very_large_stride), iname)) @@ -885,7 +885,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): continue auto_axis_inames = [ - iname for iname in kernel.insn_inames(insn) + iname for iname in insn.within_inames if kernel.iname_tags_of_type(iname, AutoLocalIndexTagBase)] if not auto_axis_inames: @@ -893,7 +893,7 @@ def assign_automatic_axes(kernel, axis=0, local_size=None): assigned_local_axes = set() - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: tags = kernel.iname_tags_of_type(iname, LocalIndexTag, max_num=1) if tags: tag, = tags @@ -1000,7 +1000,7 @@ def guess_var_shape(kernel, var_name): submap = SubstitutionRuleExpander(kernel.substitutions) def run_through_armap(expr): - armap(submap(expr), kernel.insn_inames(insn)) + armap(submap(expr), insn.within_inames) return expr try: @@ -1533,7 +1533,7 @@ def stringify_instruction_list(kernel): raise LoopyError("unexpected instruction type: %s" % type(insn).__name__) - adapt_to_new_inames_list(kernel.insn_inames(insn)) + adapt_to_new_inames_list(insn.within_inames) options = ["id="+Fore.GREEN+insn.id+Style.RESET_ALL] if insn.priority: diff --git a/loopy/options.py b/loopy/options.py index 2dc8f22cd8a205da89d86b5157af8792a37111ed..46ff37947b66c02e3751a815ab660d9807e86724 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -98,6 +98,12 @@ class Options(ImmutableRecord): Do not do any checking (data type, data layout, shape, etc.) on arguments for a minor performance gain. + .. versionchanged:: 2021.1 + + This now defaults to the same value as the ``optimize`` + sub-flag from :data:`sys.flags`. This flag can be controlled + (i.e. set to *True*) by running Python with the ``-O`` flag. + .. attribute:: no_numpy Do not check for or accept :mod:`numpy` arrays as @@ -196,6 +202,7 @@ class Options(ImmutableRecord): allow_terminal_colors_def = ( ALLOW_TERMINAL_COLORS and allow_terminal_colors_def) + import sys ImmutableRecord.__init__( self, @@ -203,7 +210,7 @@ class Options(ImmutableRecord): trace_assignments=kwargs.get("trace_assignments", False), trace_assignment_values=kwargs.get("trace_assignment_values", False), - skip_arg_checks=kwargs.get("skip_arg_checks", False), + skip_arg_checks=kwargs.get("skip_arg_checks", sys.flags.optimize), no_numpy=kwargs.get("no_numpy", False), cl_exec_manage_array_events=kwargs.get("no_numpy", True), return_dict=kwargs.get("return_dict", False), diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 12f1cb4691cf749ebd147c65582900c9ce3dce04..40b5827343ae7c4cf2fb2886d88a5324c930285a 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -256,7 +256,6 @@ def find_temporary_address_space(kernel): overall_aspace = max(desired_aspace_per_insn) - from pytools import all if not all(iaspace == overall_aspace for iaspace in desired_aspace_per_insn): raise LoopyError("not all instructions agree on the " "the desired address space (private/local/global) of the " @@ -1004,7 +1003,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_reduction_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import AddressSpace acc_var_names = make_temporaries( @@ -1041,7 +1040,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, update_id = insn_id_gen( based_on="{}_{}_update".format(insn.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | set(expr.inames) + update_insn_iname_deps = insn.within_inames | set(expr.inames) if insn.within_inames_is_final: update_insn_iname_deps = insn.within_inames | set(expr.inames) @@ -1126,7 +1125,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, size = _get_int_iname_size(red_iname) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames @@ -1363,7 +1362,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, def map_scan_seq(expr, rec, nresults, arg_dtypes, reduction_dtypes, sweep_iname, scan_iname, sweep_min_value, scan_min_value, stride): - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames inames_to_remove.add(scan_iname) track_iname = var_name_gen( @@ -1417,7 +1416,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, update_id = insn_id_gen( based_on="{}_{}_update".format(insn.id, "_".join(expr.inames))) - update_insn_iname_deps = temp_kernel.insn_inames(insn) | {track_iname} + update_insn_iname_deps = insn.within_inames | {track_iname} if insn.within_inames_is_final: update_insn_iname_deps = insn.within_inames | {track_iname} @@ -1461,7 +1460,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, return map_reduction_seq( expr, rec, nresults, arg_dtypes, reduction_dtypes) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames from loopy.kernel.data import LocalIndexTagBase outer_local_inames = tuple(oiname for oiname in outer_insn_inames @@ -1668,7 +1667,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, infer_arg_and_reduction_dtypes_for_reduction_expression( temp_kernel, expr, unknown_types_ok)) - outer_insn_inames = temp_kernel.insn_inames(insn) + outer_insn_inames = insn.within_inames bad_inames = frozenset(expr.inames) & outer_insn_inames if bad_inames: raise LoopyError("reduction used within loop(s) that it was " @@ -1854,7 +1853,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True, no_sync_with=insn.no_sync_with | frozenset(new_insn_add_no_sync_with), within_inames=( - temp_kernel.insn_inames(insn) + insn.within_inames | new_insn_add_within_inames)) kwargs.pop("id") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 936c7c4d605cfcaebe57d4d61b862000b0b3bc3c..ccfe0d5ff9b403b9ed68bfabf7d69ec36bd66b57 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -182,7 +182,6 @@ def has_barrier_within(kernel, sched_index): if isinstance(sched_item, BeginBlockItem): loop_contents, _ = gather_schedule_block( kernel.schedule, sched_index) - from pytools import any return any(isinstance(subsched_item, Barrier) for subsched_item in loop_contents) elif isinstance(sched_item, Barrier): @@ -296,7 +295,7 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): continue dep_insn = kernel.id_to_insn[dep_insn_id] - dep_insn_inames = kernel.insn_inames(dep_insn) + dep_insn_inames = dep_insn.within_inames if iname in dep_insn_inames: # Nothing to be learned, dependency is in loop over iname @@ -940,7 +939,7 @@ def generate_loop_schedules_internal( if not is_ready: continue - want = kernel.insn_inames(insn) - sched_state.parallel_inames + want = insn.within_inames - sched_state.parallel_inames have = active_inames_set - sched_state.parallel_inames if want != have: @@ -1046,8 +1045,9 @@ def generate_loop_schedules_internal( sched_state.active_group_counts.keys()): new_insn_ids_to_try = None - new_toposorted_insns = sched_state.insns_in_topologically_sorted_order[:] - new_toposorted_insns.remove(insn) + # explicitly use id to compare to avoid performance issues like #199 + new_toposorted_insns = [x for x in + sched_state.insns_in_topologically_sorted_order if x.id != insn.id] # }}} @@ -1106,7 +1106,7 @@ def generate_loop_schedules_internal( for insn_id in sched_state.unscheduled_insn_ids: insn = kernel.id_to_insn[insn_id] - if last_entered_loop in kernel.insn_inames(insn): + if last_entered_loop in insn.within_inames: if debug_mode: print("cannot leave '%s' because '%s' still depends on it" % (last_entered_loop, format_insn(kernel, insn.id))) @@ -1294,7 +1294,7 @@ def generate_loop_schedules_internal( for insn_id in reachable_insn_ids: insn = kernel.id_to_insn[insn_id] - want = kernel.insn_inames(insn) + want = insn.within_inames if hypothetically_active_loops <= want: if usefulness is None: diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py new file mode 100644 index 0000000000000000000000000000000000000000..d9066030fbe499508d568ab561739fa8c31e07e5 --- /dev/null +++ b/loopy/schedule/checker/lexicographic_order_map.py @@ -0,0 +1,198 @@ +# coding: utf-8 +__copyright__ = "Copyright (C) 2019 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import islpy as isl + + +def get_statement_ordering_map( + sched_before, sched_after, lex_map, before_marker="'"): + """Return a statement ordering represented as a map from each statement + instance to all statement instances occurring later. + + :arg sched_before: An :class:`islpy.Map` representing a schedule + as a mapping from statement instances (for one particular statement) + to lexicographic time. The statement represented will typically + be the dependee in a dependency relationship. + + :arg sched_after: An :class:`islpy.Map` representing a schedule + as a mapping from statement instances (for one particular statement) + to lexicographic time. The statement represented will typically + be the depender in a dependency relationship. + + :arg lex_map: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. E.g.:: + + {[i0', i1', i2', ...] -> [i0, i1, i2, ...] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2) ...} + + :arg before_marker: A :class:`str` to be appended to the names of the + map dimensions representing the 'before' statement in the + 'happens before' relationship. + + :returns: An :class:`islpy.Map` representing the statement odering as + a mapping from each statement instance to all statement instances + occurring later. I.e., we compose relations B, L, and A as + B ∘ L ∘ A^-1, where B is `sched_before`, A is `sched_after`, + and L is `lex_map`. + + """ + + # Perform the composition of relations + sio = sched_before.apply_range( + lex_map).apply_range(sched_after.reverse()) + + # Append marker to in_ dims + from loopy.schedule.checker.utils import ( + append_marker_to_isl_map_var_names, + ) + return append_marker_to_isl_map_var_names( + sio, isl.dim_type.in_, before_marker) + + +def get_lex_order_set(before_names, after_names, islvars=None): + """Return an :class:`islpy.Set` representing a lexicographic ordering + with the number of dimensions provided in `before_names` + (equal to the number of dimensions in `after_names`). + + :arg before_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs before another point, which will be represented using + `after_names`. (see example below) + + :arg after_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs after another point, which will be represented using + `before_names`. (see example below) + + :arg islvars: A dictionary mapping variable names in `before_names` and + `after_names` to :class:`islpy.PwAff` instances that represent each + of the variables (islvars may be produced by `islpy.make_zero_and_vars`). + The key '0' is also include and represents a :class:`islpy.PwAff` zero + constant. This dictionary defines the space to be used for the set. If no + value is passed, the dictionary will be made using `before_names` + and `after_names`. + + :returns: An :class:`islpy.Set` representing a big-endian lexicographic ordering + with the number of dimensions provided in `before_names`. The set + has one dimension for each name in *both* `before_names` and + `after_names`, and contains all points which meet a 'happens before' + constraint defining the lexicographic ordering. E.g., if + `before_names = [i0', i1', i2']` and `after_names = [i0, i1, i2]`, + return the set containing all points in a 3-dimensional, big-endian + lexicographic ordering such that point + `[i0', i1', i2']` happens before `[i0, i1, i2]`. I.e., return:: + + {[i0', i1', i2', i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + # If no islvars passed, make them using the names provided + if islvars is None: + islvars = isl.make_zero_and_vars(before_names+after_names, []) + + # Initialize set with constraint i0' < i0 + lex_order_set = islvars[before_names[0]].lt_set(islvars[after_names[0]]) + + # For each dim d, starting with d=1, equality_conj_set will be constrained + # by d equalities, e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1)). + equality_conj_set = islvars[0].eq_set(islvars[0]) # initialize to 'true' + + for i in range(1, len(before_names)): + + # Add the next equality constraint to equality_conj_set + equality_conj_set = equality_conj_set & \ + islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]]) + + # Create a set constrained by adding a less-than constraint for this dim, + # e.g., (i1' < i1), to the current equality conjunction set. + # For each dim d, starting with d=1, this full conjunction will have + # d equalities and one inequality, e.g., + # (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id) + full_conj_set = islvars[before_names[i]].lt_set( + islvars[after_names[i]]) & equality_conj_set + + # Union this new constraint with the current lex_order_set + lex_order_set = lex_order_set | full_conj_set + + return lex_order_set + + +def create_lex_order_map( + n_dims=None, + before_names=None, + after_names=None, + ): + """Return a map from each point in a lexicographic ordering to every + point that occurs later in the lexicographic ordering. + + :arg n_dims: An :class:`int` representing the number of dimensions + in the lexicographic ordering. If not provided, `n_dims` will be + set to length of `after_names`. + + :arg before_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs before another point, which will be represented using + `after_names`. (see example below) + + :arg after_names: A list of :class:`str` variable names to be used + to describe lexicographic space dimensions for a point in a lexicographic + ordering that occurs after another point, which will be represented using + `before_names`. (see example below) + + :returns: An :class:`islpy.Map` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time. + E.g., if `before_names = [i0', i1', i2']` and + `after_names = [i0, i1, i2]`, return the map:: + + {[i0', i1', i2'] -> [i0, i1, i2] : + i0' < i0 or (i0' = i0 and i1' < i1) + or (i0' = i0 and i1' = i1 and i2' < i2)} + + """ + + if after_names is None: + after_names = ["i%s" % (i) for i in range(n_dims)] + if before_names is None: + from loopy.schedule.checker.utils import ( + append_marker_to_strings, + ) + before_names = append_marker_to_strings(after_names, marker="'") + if n_dims is None: + n_dims = len(after_names) + + assert len(before_names) == len(after_names) == n_dims + dim_type = isl.dim_type + + # First, get a set representing the lexicographic ordering. + lex_order_set = get_lex_order_set(before_names, after_names) + + # Now convert that set to a map. + lex_map = isl.Map.from_domain(lex_order_set) + return lex_map.move_dims( + dim_type.out, 0, dim_type.in_, + len(before_names), len(after_names)) diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py index bc71df5d8f9658c788141e17a6eaf948cf5aa635..a947da3ac029b8b49868d65472d8eb893d79a946 100644 --- a/loopy/schedule/checker/schedule.py +++ b/loopy/schedule/checker/schedule.py @@ -313,3 +313,27 @@ def generate_pairwise_schedules( pairwise_schedules[tuple(insn_ids)] = tuple(sched_maps) return pairwise_schedules + + +def get_lex_order_map_for_sched_space(schedule): + """Return an :class:`islpy.BasicMap` that maps each point in a + lexicographic ordering to every point that occurs later. + + :arg schedule: A :class:`islpy.Map` representing the ordering of + statement instances as a mapping from statement instances to + lexicographic time. + + :returns: An :class:`islpy.BasicMap` representing a lexicographic + ordering as a mapping from each point in lexicographic time + to every point that occurs later in lexicographic time, with + the dimension count and names matching the output dimension + of `schedule`. + + """ + + from loopy.schedule.checker.lexicographic_order_map import ( + create_lex_order_map, + ) + + lex_dim_names = schedule.space.get_var_names(isl.dim_type.out) + return create_lex_order_map(after_names=lex_dim_names) diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py index 8e2a82a016202f054bb808887caa27c2f363842b..db1d861c8a76268a54468b12e9e9d77e016d58db 100644 --- a/loopy/schedule/checker/utils.py +++ b/loopy/schedule/checker/utils.py @@ -87,6 +87,35 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map): return isl.align_spaces(obj_map, tgt_map) +def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"): + """Return an :class:`islpy.Map` with a marker appended to the specified + dimension names. + + :arg old_isl_map: An :class:`islpy.Map`. + + :arg dim_type: An :class:`islpy.dim_type`, i.e., an :class:`int`, + specifying the dimension to be marked. + + :arg marker: A :class:`str` to be appended to the specified dimension + names. If not provided, `marker` defaults to an apostrophe. + + :returns: An :class:`islpy.Map` matching `old_isl_map` with + `marker` appended to the `dim_type` dimension names. + + """ + + new_map = old_isl_map.copy() + for i in range(len(old_isl_map.get_var_names(dim_type))): + new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name( + dim_type, i)+marker) + return new_map + + +def append_marker_to_strings(strings, marker="'"): + assert isinstance(strings, list) + return [s+marker for s in strings] + + def sorted_union_of_names_in_isl_sets( isl_sets, set_dim=isl.dim_type.set): diff --git a/loopy/statistics.py b/loopy/statistics.py index eda750120bc8456e9090304cbd2905a02ff2358e..a0a0f9c7ed7c62e4ec8f6ca517809696abfd2a8d 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1239,7 +1239,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): l_used = set() from loopy.kernel.data import LocalIndexTag, GroupIndexTag - for iname in knl.insn_inames(insn): + for iname in insn.within_inames: tags = knl.iname_tags_of_type(iname, (LocalIndexTag, GroupIndexTag), max_num=1) if tags: @@ -1273,7 +1273,7 @@ def get_unused_hw_axes_factor(knl, insn, disregard_local_axes, space=None): def count_insn_runs(knl, insn, count_redundant_work, disregard_local_axes=False): - insn_inames = knl.insn_inames(insn) + insn_inames = insn.within_inames if disregard_local_axes: from loopy.kernel.data import LocalIndexTag diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 7e5de3164de761c01f41981a850bb14f6895c95d..77f8228b66a9af0e2cb500bb7d012887e9c94fcc 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1002,20 +1002,77 @@ class RuleAwareIdentityMapper(IdentityMapper): lambda expr: self(expr, kernel, insn))) for insn in kernel.instructions] - return kernel.copy(instructions=new_insns) + from functools import partial + + non_insn_self = partial(self, kernel=kernel, insn=None) + + from loopy.kernel.array import ArrayBase + + # {{{ args + + new_args = [ + arg.map_exprs(non_insn_self) if isinstance(arg, ArrayBase) else arg + for arg in kernel.args] + + # }}} + + # {{{ tvs + + new_tvs = { + tv_name: tv.map_exprs(non_insn_self) + for tv_name, tv in kernel.temporary_variables.items()} + + # }}} + + # domains, var names: not exprs => do not map + + return kernel.copy(instructions=new_insns, + args=new_args, + temporary_variables=new_tvs) class RuleAwareSubstitutionMapper(RuleAwareIdentityMapper): + """ + Mapper to substitute expressions and record any divergence of substitution + rule expressions of :class:`loopy.LoopKernel`. + + .. attribute:: rule_mapping_context + + An instance of :class:`SubstitutionRuleMappingContext` to record + divergence of substitution rules. + + .. attribute:: within + + An instance of :class:`loopy.match.StackMatchComponent`. + :class:`RuleAwareSubstitutionMapper` would perform + substitutions in the expression if the stack match is ``True`` or + if the expression does not arise from an :class:`~loopy.InstructionBase`. + + .. note:: + + The mapped kernel should be passed through + :meth:`SubstitutionRuleMappingContext.finish_kernel` to perform any + renaming mandated by the rule expression divergences. + """ def __init__(self, rule_mapping_context, subst_func, within): super().__init__(rule_mapping_context) self.subst_func = subst_func - self.within = within + self._within = within + + def within(self, kernel, instruction, stack): + if instruction is None: + # always perform substitutions on expressions not coming from + # instructions. + return True + else: + return self._within(kernel, instruction, stack) def map_variable(self, expr, expn_state): if (expr.name in expn_state.arg_context or not self.within( expn_state.kernel, expn_state.instruction, expn_state.stack)): + # expr not in within => do nothing (call IdentityMapper) return super().map_variable( expr, expn_state) @@ -1525,7 +1582,13 @@ def qpolynomial_from_expr(space, expr): def simplify_using_aff(kernel, expr): inames = get_dependencies(expr) & kernel.all_inames() - domain = kernel.get_inames_domain(inames) + # FIXME: Ideally, we should find out what inames are usable and allow + # the simplification to use all of those. For now, fall back to making + # sure that the simplification only uses inames that were already there. + domain = ( + kernel + .get_inames_domain(inames) + .project_out_except(inames, [dim_type.set])) try: aff = guarded_aff_from_expr(domain.space, expr) @@ -1679,6 +1742,25 @@ def isl_set_from_expr(space, expr): return set_ + +def condition_to_set(space, expr): + """ + Returns an instance of :class:`islpy.Set` if *expr* can be expressed as an + ISL-set on *space*, if not then returns *None*. + """ + from loopy.symbolic import get_dependencies + if get_dependencies(expr) <= frozenset( + space.get_var_dict()): + try: + from loopy.symbolic import isl_set_from_expr + return isl_set_from_expr(space, expr) + except ExpressionToAffineConversionError: + # non-affine condition: can't do much + return None + else: + # data-dependent condition: can't do much + return None + # }}} @@ -2036,7 +2118,7 @@ class AccessRangeOverlapChecker: arm = BatchedAccessRangeMapper(self.kernel, self.vars, overestimate=True) for expr in exprs: - arm(expr, self.kernel.insn_inames(insn)) + arm(expr, insn.within_inames) for name, arange in arm.access_ranges.items(): if arm.bad_subscripts[name]: diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 6bad214ec4e10a91e36b3566f454eabab00dde26..8af47c41222416fbd2dbe3dc5a88d4090a4a06f0 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -39,6 +39,14 @@ __doc__ = """ .. autoclass:: NumbaTarget .. autoclass:: NumbaCudaTarget +References to Canonical Names +----------------------------- + +.. currentmodule:: loopy.target + +.. class:: TargetBase + + See :class:`loopy.TargetBase`. """ diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 3234da45d469e2cc71de4733f3186aea8a93b065..d1e474c2054a15688f00b2bd5f5c6d9e6e9975df 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -34,6 +34,9 @@ from loopy.symbolic import IdentityMapper from loopy.types import NumpyType import pymbolic.primitives as p +from loopy.tools import remove_common_indentation +import re + from pytools import memoize_method __doc__ = """ @@ -172,6 +175,46 @@ def _preamble_generator(preamble_info): yield ("04_%s" % func_name, func_body) yield undef_integer_types_macro + for func in preamble_info.seen_functions: + if func.name == "int_pow": + base_ctype = preamble_info.kernel.target.dtype_to_typename( + func.arg_dtypes[0]) + exp_ctype = preamble_info.kernel.target.dtype_to_typename( + func.arg_dtypes[1]) + res_ctype = preamble_info.kernel.target.dtype_to_typename( + func.result_dtypes[0]) + + if func.arg_dtypes[1].numpy_dtype.kind == "u": + signed_exponent_preamble = "" + else: + signed_exponent_preamble = "\n" + remove_common_indentation( + """ + if (n < 0) { + x = 1.0/x; + n = -n; + }""") + + yield(f"07_{func.c_name}", f""" + inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{ + if (n == 0) + return 1; + {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)} + + {res_ctype} y = 1; + + while (n > 1) {{ + if (n % 2) {{ + y = x * y; + x = x * x; + }} + else + x = x * x; + n = n / 2; + }} + + return x*y; + }}""") + # }}} @@ -447,14 +490,14 @@ def c_math_mangler(target, name, arg_dtypes, modify_name=True): arg_dtypes=arg_dtypes) # binary functions - if (name in ["fmax", "fmin", "copysign"] + if (name in ["fmax", "fmin", "copysign", "pow"] and len(arg_dtypes) == 2): dtype = np.find_common_type( [], [dtype.numpy_dtype for dtype in arg_dtypes]) if dtype.kind == "c": - raise LoopyTypeError("%s does not support complex numbers") + raise LoopyTypeError(f"{name} does not support complex numbers") elif dtype.kind == "f": if modify_name: @@ -942,7 +985,8 @@ class CFamilyASTBuilder(ASTBuilderBase): codegen_state.seen_functions.add( SeenFunction(func_id, mangle_result.target_name, - mangle_result.arg_dtypes)) + mangle_result.arg_dtypes, + mangle_result.result_dtypes)) from pymbolic import var for i, (a, tgt_dtype) in enumerate( diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 74f1ead8bcb3240f2cf3775048f7cc809e367a1f..9ec99c784f5955232038644de6ee06dd6466237a 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -325,7 +325,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.codegen_state.seen_functions.add( SeenFunction( name, f"{name}_{suffix}", - (result_dtype, result_dtype))) + (result_dtype, result_dtype), + (result_dtype,))) if den_nonneg: if num_nonneg: @@ -538,7 +539,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, - mangle_result.arg_dtypes or par_dtypes)) + mangle_result.arg_dtypes or par_dtypes, + mangle_result.result_dtypes)) return var(mangle_result.target_name)(*processed_parameters) @@ -701,6 +703,10 @@ class ExpressionToCExpressionMapper(IdentityMapper): self.rec(expr.denominator, type_context, tgt_dtype)) def map_power(self, expr, type_context): + tgt_dtype = self.infer_type(expr) + base_dtype = self.infer_type(expr.base) + exponent_dtype = self.infer_type(expr.exponent) + def base_impl(expr, type_context): from pymbolic.primitives import is_constant, is_zero if is_constant(expr.exponent): @@ -711,14 +717,24 @@ class ExpressionToCExpressionMapper(IdentityMapper): elif is_zero(expr.exponent - 2): return self.rec(expr.base*expr.base, type_context) - return type(expr)( - self.rec(expr.base, type_context), - self.rec(expr.exponent, type_context)) + if exponent_dtype.is_integral(): + from loopy.codegen import SeenFunction + func_name = ("loopy_pow_" + f"{tgt_dtype.numpy_dtype}_{exponent_dtype.numpy_dtype}") + + self.codegen_state.seen_functions.add( + SeenFunction( + "int_pow", func_name, + (tgt_dtype, exponent_dtype), + (tgt_dtype, ))) + return var(func_name)(self.rec(expr.base, type_context), + self.rec(expr.exponent, type_context)) + else: + return self.rec(var("pow")(expr.base, expr.exponent), type_context) if not self.allow_complex: return base_impl(expr, type_context) - tgt_dtype = self.infer_type(expr) if tgt_dtype.is_complex(): if expr.exponent in [2, 3, 4]: value = expr.base @@ -726,8 +742,8 @@ class ExpressionToCExpressionMapper(IdentityMapper): value = value * expr.base return self.rec(value, type_context) else: - b_complex = self.infer_type(expr.base).is_complex() - e_complex = self.infer_type(expr.exponent).is_complex() + b_complex = base_dtype.is_complex() + e_complex = exponent_dtype.is_complex() if b_complex and not e_complex: return var("%s_powr" % self.complex_type_name(tgt_dtype))( @@ -754,6 +770,7 @@ class ExpressionToCExpressionMapper(IdentityMapper): # {{{ C expression to code mapper class CExpressionToCodeMapper(RecursiveMapper): + # {{{ helpers def parenthesize_if_needed(self, s, enclosing_prec, my_prec): @@ -954,9 +971,8 @@ class CExpressionToCodeMapper(RecursiveMapper): return self._map_division_operator("%", expr, enclosing_prec) def map_power(self, expr, enclosing_prec): - return "pow({}, {})".format( - self.rec(expr.base, PREC_NONE), - self.rec(expr.exponent, PREC_NONE)) + raise RuntimeError(f"'{expr}' should have been transformed to 'Call'" + " expression node.") def map_array_literal(self, expr, enclosing_prec): return "{ %s }" % self.join_rec(", ", expr.children, PREC_NONE) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 2023077bf8f286d9c28cdee2e37f194276dc211a..67dc1fe249af91d9b73a7162867dcd98c7ef6bc7 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -127,6 +127,18 @@ def cuda_function_mangler(kernel, name, arg_dtypes): return dtype, name + if name in ["pow"] and len(arg_dtypes) == 2: + dtype = np.find_common_type([], arg_dtypes) + + if dtype == np.float64: + pass # pow + elif dtype == np.float32: + name = name + "f" # powf + else: + raise RuntimeError(f"{name} does not support type {dtype}") + + return dtype, name + if name in "atan2" and len(arg_dtypes) == 2: return arg_dtypes[0], name diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 74819b93932e0852a59c3ebacb99f9eaafab0a05..74887155b920e6d514df673c1ed8897486a4f81f 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -281,20 +281,20 @@ class ExecutionWrapperGeneratorBase: 'passed array")' % (arg.name, impl_array_name)) - base_arg = kernel.impl_arg_to_arg[impl_array_name] - - if not options.skip_arg_checks: - gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" - % (arg.name, impl_array_name, stride_impl_axis, - base_arg.dtype.dtype.itemsize)) + base_arg = kernel.impl_arg_to_arg[impl_array_name] - gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " - ' is not divisible by its dtype itemsize"' - % (stride_impl_axis, impl_array_name)) - gen("del _lpy_remdr") - else: - gen("%s = _lpy_offset // %d" - % (arg.name, base_arg.dtype.itemsize)) + if not options.skip_arg_checks: + gen("%s, _lpy_remdr = divmod(%s.strides[%d], %d)" + % (arg.name, impl_array_name, stride_impl_axis, + base_arg.dtype.dtype.itemsize)) + + gen("assert _lpy_remdr == 0, \"Stride %d of array '%s' " + ' is not divisible by its dtype itemsize"' + % (stride_impl_axis, impl_array_name)) + gen("del _lpy_remdr") + else: + gen("%s = _lpy_offset // %d" + % (arg.name, base_arg.dtype.itemsize)) gen("# }}}") gen("") @@ -639,8 +639,6 @@ class ExecutionWrapperGeneratorBase: if issubclass(idi.arg_class, KernelArgument) ]) - gen.add_to_preamble("from __future__ import division") - gen.add_to_preamble("") self.target_specific_preamble(gen) gen.add_to_preamble("") self.generate_host_code(gen, codegen_result) diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 2ff9ede55e8c3ab5b5e1237b2a66c72635e1454b..c409df380c5a6b1e47cfcc9773aee2bee16ba1a8 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -28,7 +28,7 @@ import numpy as np from loopy.target.c import CFamilyTarget, CFamilyASTBuilder from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from pytools import memoize_method -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, LoopyTypeError from loopy.types import NumpyType from loopy.target.c import DTypeRegistryWrapper, c_math_mangler from loopy.kernel.data import AddressSpace, CallMangleInfo @@ -181,6 +181,22 @@ def opencl_function_mangler(kernel, name, arg_dtypes): result_dtypes=(result_dtype,), arg_dtypes=2*(result_dtype,)) + if name == "pow" and len(arg_dtypes) == 2: + dtype = np.find_common_type( + [], [dtype.numpy_dtype for dtype in arg_dtypes]) + if dtype == np.float64: + name = "powf64" + elif dtype == np.float32: + name = "powf32" + else: + raise LoopyTypeError(f"'pow' does not support type {dtype}.") + + result_dtype = NumpyType(dtype) + return CallMangleInfo( + target_name=name, + result_dtypes=(result_dtype,), + arg_dtypes=2*(result_dtype,)) + if name == "dot": scalar_dtype, offset, field_name = arg_dtypes[0].numpy_dtype.fields["s0"] return CallMangleInfo( @@ -286,6 +302,19 @@ def opencl_preamble_generator(preamble_info): """ % dict(idx_ctype=kernel.target.dtype_to_typename( kernel.index_dtype)))) + for func in preamble_info.seen_functions: + if func.name == "pow" and func.c_name == "powf32": + yield("08_clpowf32", """ + inline float powf32(float x, float y) { + return pow(x, y); + }""") + + if func.name == "pow" and func.c_name == "powf64": + yield("08_clpowf64", """ + inline double powf64(double x, double y) { + return pow(x, y); + }""") + # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index a17416c47bb290285972390ae161771bac8f77e9..8d0c309b08b8df4cda7e13c097441ef272449a02 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -509,14 +509,6 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): Raise('RuntimeError("input argument \'{name}\' ' 'must be supplied")'.format(name=idi.name)))) - if idi.dtype.is_integral(): - gen(Comment("cast to Python int to avoid trouble " - "with struct packing or Boost.Python")) - py_type = "int" - - gen(Assign(idi.name, f"{py_type}({idi.name})")) - gen(Line()) - if idi.dtype.is_composite(): gen(S("_lpy_knl.set_arg(%d, %s)" % (cl_arg_idx, idi.name))) cl_arg_idx += 1 @@ -578,7 +570,7 @@ def generate_value_arg_setup(kernel, devices, implemented_data_info): fp_arg_count += 1 gen(S( - "_lpy_knl.set_arg(%d, _lpy_pack('%s', %s))" + "_lpy_knl._set_arg_buf(%d, _lpy_pack('%s', %s))" % (cl_arg_idx, idi.dtype.dtype.char, idi.name))) cl_arg_idx += 1 @@ -632,25 +624,22 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): if not issubclass(idi.arg_class, TemporaryVariable)] + ["wait_for=None", "allocator=None"]) - from genpy import (For, Function, Suite, Import, ImportAs, Return, - FromImport, Line, Statement as S) + from genpy import (For, Function, Suite, Return, Line, Statement as S) return Function( codegen_result.current_program(codegen_state).name, args, Suite([ - FromImport("struct", ["pack as _lpy_pack"]), - ImportAs("pyopencl", "_lpy_cl"), - Import("pyopencl.tools"), Line(), ] + [ Line(), function_body, Line(), - ] + [ - For("_tv", "_global_temporaries", - # free global temporaries - S("_tv.release()")) - ] + [ + ] + ([ + For("_tv", "_global_temporaries", + # free global temporaries + S("_tv.release()")) + ] if self._get_global_temporaries(codegen_state) else [] + ) + [ Line(), Return("_lpy_evt"), ])) @@ -660,6 +649,14 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): # no such thing in Python return None + def _get_global_temporaries(self, codegen_state): + from loopy.kernel.data import AddressSpace + + return sorted( + (tv for tv in codegen_state.kernel.temporary_variables.values() + if tv.address_space == AddressSpace.GLOBAL), + key=lambda tv: tv.name) + def get_temporary_decls(self, codegen_state, schedule_state): from genpy import Assign, Comment, Line @@ -668,18 +665,12 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): from operator import mul return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1) - from loopy.kernel.data import AddressSpace - - global_temporaries = sorted( - (tv for tv in codegen_state.kernel.temporary_variables.values() - if tv.address_space == AddressSpace.GLOBAL), - key=lambda tv: tv.name) - from pymbolic.mapper.stringifier import PREC_NONE ecm = self.get_expression_to_code_mapper(codegen_state) + global_temporaries = self._get_global_temporaries(codegen_state) if not global_temporaries: - return [Assign("_global_temporaries", "[]"), Line()] + return [] return [ Comment("{{{ allocate global temporaries"), @@ -734,8 +725,13 @@ class PyOpenCLPythonASTBuilder(PythonASTBuilderBase): arry_arg_code, Assign("_lpy_evt", "%(pyopencl_module_name)s.enqueue_nd_range_kernel(" "queue, _lpy_knl, " - "%(gsize)s, %(lsize)s, wait_for=wait_for, " - "g_times_l=True, allow_empty_ndrange=True)" + "%(gsize)s, %(lsize)s, " + # using positional args because pybind is slow with kwargs + "None, " # offset + "wait_for, " + "True, " # g_times_l + "True, " # allow_empty_ndrange + ")" % dict( pyopencl_module_name=self.target.pyopencl_module_name, gsize=ecm(gsize, prec=PREC_NONE, type_context="i"), diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 7fc20f19167af62f86e9fb18690b2f03f932e63b..cdee5600bb5dd0dce3a3971583604f737c6913d9 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -142,6 +142,7 @@ class PyOpenCLExecutionWrapperGenerator(ExecutionWrapperGeneratorBase): gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") + gen.add_to_preamble("from struct import pack as _lpy_pack") def initialize_system_args(self, gen): """ diff --git a/loopy/target/python.py b/loopy/target/python.py index e54aa622f0b56360cb1b3f04be118c1319db7d3b..a1557e47bdf8990e7aa89472b59f3c9fc3666a05 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -118,7 +118,8 @@ class ExpressionToPythonMapper(StringifyMapper): self.codegen_state.seen_functions.add( SeenFunction(identifier, mangle_result.target_name, - mangle_result.arg_dtypes or par_dtypes)) + mangle_result.arg_dtypes or par_dtypes, + mangle_result.result_dtypes)) return "{}({})".format(mangle_result.target_name, ", ".join(str_parameters)) diff --git a/loopy/transform/data.py b/loopy/transform/data.py index a50725d20d579109f6e061fba0a1f408a6e23e93..e946a67c0cf067b4701a5ab4bcd86594d42c5b4c 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -631,6 +631,8 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): raise LoopyError("argument name '%s' conflicts with an existing identifier" "--cannot rename" % new_name) + # {{{ instructions + from pymbolic import var subst_dict = {old_name: var(new_name)} @@ -644,7 +646,11 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): make_subst_func(subst_dict), within=lambda kernel, insn, stack: True) - kernel = smap.map_kernel(kernel) + kernel = rule_mapping_context.finish_kernel(smap.map_kernel(kernel)) + + # }}} + + # {{{ args new_args = [] for arg in kernel.args: @@ -653,7 +659,22 @@ def rename_argument(kernel, old_name, new_name, existing_ok=False): new_args.append(arg) - return kernel.copy(args=new_args) + # }}} + + # {{{ domain + + new_domains = [] + for dom in kernel.domains: + dom_var_dict = dom.get_var_dict() + if old_name in dom_var_dict: + dt, pos = dom_var_dict[old_name] + dom = dom.set_dim_name(dt, pos, new_name) + + new_domains.append(dom) + + # }}} + + return kernel.copy(domains=new_domains, args=new_args) # }}} diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 241c1492d4c41124c21befb2739fae349538c908..fb5e8d781ebc3f8c806dfa7b531560f0855c98d5 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -118,25 +118,25 @@ def prioritize_loops(kernel, loop_priority): class _InameSplitter(RuleAwareIdentityMapper): def __init__(self, rule_mapping_context, within, - split_iname, outer_iname, inner_iname, replacement_index): + iname_to_split, outer_iname, inner_iname, replacement_index): super().__init__(rule_mapping_context) self.within = within - self.split_iname = split_iname + self.iname_to_split = iname_to_split self.outer_iname = outer_iname self.inner_iname = inner_iname self.replacement_index = replacement_index def map_reduction(self, expr, expn_state): - if (self.split_iname in expr.inames - and self.split_iname not in expn_state.arg_context + if (self.iname_to_split in expr.inames + and self.iname_to_split not in expn_state.arg_context and self.within( expn_state.kernel, expn_state.instruction)): new_inames = list(expr.inames) - new_inames.remove(self.split_iname) + new_inames.remove(self.iname_to_split) new_inames.extend([self.outer_iname, self.inner_iname]) from loopy.symbolic import Reduction @@ -147,8 +147,8 @@ class _InameSplitter(RuleAwareIdentityMapper): return super().map_reduction(expr, expn_state) def map_variable(self, expr, expn_state): - if (expr.name == self.split_iname - and self.split_iname not in expn_state.arg_context + if (expr.name == self.iname_to_split + and self.iname_to_split not in expn_state.arg_context and self.within( expn_state.kernel, expn_state.instruction)): @@ -157,7 +157,58 @@ class _InameSplitter(RuleAwareIdentityMapper): return super().map_variable(expr, expn_state) -def _split_iname_backend(kernel, split_iname, +def _split_iname_in_set(s, iname_to_split, inner_iname, outer_iname, fixed_length, + fixed_length_is_inner): + var_dict = s.get_var_dict() + + if iname_to_split not in var_dict: + return s + + orig_dim_type, _ = var_dict[iname_to_split] + # orig_dim_type may be set or param (the latter if the iname is + # used as a parameter in a subdomain). + + # NB: dup_iname_to_split is not a globally valid identifier: only unique + # wrt the set s. + from pytools import generate_unique_names + for dup_iname_to_split in generate_unique_names(f"dup_{iname_to_split}"): + if dup_iname_to_split not in var_dict: + break + + from loopy.isl_helpers import duplicate_axes + s = duplicate_axes(s, (iname_to_split,), (dup_iname_to_split,)) + + outer_var_nr = s.dim(orig_dim_type) + inner_var_nr = s.dim(orig_dim_type)+1 + + s = s.add_dims(orig_dim_type, 2) + s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) + s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) + + from loopy.isl_helpers import make_slab + + if fixed_length_is_inner: + fixed_iname, var_length_iname = inner_iname, outer_iname + else: + fixed_iname, var_length_iname = outer_iname, inner_iname + + space = s.get_space() + s = s & ( + make_slab(space, fixed_iname, 0, fixed_length) + # name = fixed_iname + fixed_length*var_length_iname + .add_constraint(isl.Constraint.eq_from_names( + space, { + dup_iname_to_split: 1, + fixed_iname: -1, + var_length_iname: -fixed_length}))) + + dup_iname_dim_type, dup_name_idx = space.get_var_dict()[dup_iname_to_split] + s = s.project_out(dup_iname_dim_type, dup_name_idx, 1) + + return s + + +def _split_iname_backend(kernel, iname_to_split, fixed_length, fixed_length_is_inner, make_new_loop_index, outer_iname=None, inner_iname=None, @@ -186,88 +237,47 @@ def _split_iname_backend(kernel, split_iname, # }}} - existing_tags = kernel.iname_tags(split_iname) + existing_tags = kernel.iname_tags(iname_to_split) from loopy.kernel.data import ForceSequentialTag, filter_iname_tags_by_type if (do_tagged_check and existing_tags and not filter_iname_tags_by_type(existing_tags, ForceSequentialTag)): - raise LoopyError("cannot split already tagged iname '%s'" % split_iname) + raise LoopyError(f"cannot split already tagged iname '{iname_to_split}'") - if split_iname not in kernel.all_inames(): - raise ValueError("cannot split loop for unknown variable '%s'" % split_iname) + if iname_to_split not in kernel.all_inames(): + raise ValueError( + f"cannot split loop for unknown variable '{iname_to_split}'") applied_iname_rewrites = kernel.applied_iname_rewrites[:] vng = kernel.get_var_name_generator() if outer_iname is None: - outer_iname = vng(split_iname+"_outer") + outer_iname = vng(iname_to_split+"_outer") if inner_iname is None: - inner_iname = vng(split_iname+"_inner") - - def process_set(s): - var_dict = s.get_var_dict() - - if split_iname not in var_dict: - return s - - orig_dim_type, _ = var_dict[split_iname] + inner_iname = vng(iname_to_split+"_inner") - outer_var_nr = s.dim(orig_dim_type) - inner_var_nr = s.dim(orig_dim_type)+1 - - s = s.add_dims(orig_dim_type, 2) - s = s.set_dim_name(orig_dim_type, outer_var_nr, outer_iname) - s = s.set_dim_name(orig_dim_type, inner_var_nr, inner_iname) - - from loopy.isl_helpers import make_slab - - if fixed_length_is_inner: - fixed_iname, var_length_iname = inner_iname, outer_iname - else: - fixed_iname, var_length_iname = outer_iname, inner_iname - - space = s.get_space() - fixed_constraint_set = ( - make_slab(space, fixed_iname, 0, fixed_length) - # name = fixed_iname + fixed_length*var_length_iname - .add_constraint(isl.Constraint.eq_from_names( - space, { - split_iname: 1, - fixed_iname: -1, - var_length_iname: -fixed_length}))) - - name_dim_type, name_idx = space.get_var_dict()[split_iname] - s = s.intersect(fixed_constraint_set) - - def _project_out_only_if_all_instructions_in_within(): - for insn in kernel.instructions: - if split_iname in insn.within_inames and ( - not within(kernel, insn)): - return s - - return s.project_out(name_dim_type, name_idx, 1) - - return _project_out_only_if_all_instructions_in_within() - - new_domains = [process_set(dom) for dom in kernel.domains] + new_domains = [ + _split_iname_in_set(dom, iname_to_split, inner_iname, outer_iname, + fixed_length, fixed_length_is_inner) + for dom in kernel.domains] from pymbolic import var inner = var(inner_iname) outer = var(outer_iname) new_loop_index = make_new_loop_index(inner, outer) - subst_map = {var(split_iname): new_loop_index} + subst_map = {var(iname_to_split): new_loop_index} applied_iname_rewrites.append(subst_map) # {{{ update within_inames new_insns = [] for insn in kernel.instructions: - if split_iname in insn.within_inames and ( + if iname_to_split in insn.within_inames and ( within(kernel, insn)): new_within_inames = ( (insn.within_inames.copy() - - frozenset([split_iname])) + - frozenset([iname_to_split])) | frozenset([outer_iname, inner_iname])) else: new_within_inames = insn.within_inames @@ -286,7 +296,7 @@ def _split_iname_backend(kernel, split_iname, for prio in kernel.loop_priority: new_prio = () for prio_iname in prio: - if prio_iname == split_iname: + if prio_iname == iname_to_split: new_prio = new_prio + (outer_iname, inner_iname) else: new_prio = new_prio + (prio_iname,) @@ -302,7 +312,7 @@ def _split_iname_backend(kernel, split_iname, rule_mapping_context = SubstitutionRuleMappingContext( kernel.substitutions, kernel.get_var_name_generator()) ins = _InameSplitter(rule_mapping_context, within, - split_iname, outer_iname, inner_iname, new_loop_index) + iname_to_split, outer_iname, inner_iname, new_loop_index) kernel = ins.map_kernel(kernel) kernel = rule_mapping_context.finish_kernel(kernel) @@ -311,7 +321,10 @@ def _split_iname_backend(kernel, split_iname, kernel = tag_inames(kernel, {outer_iname: existing_tag, inner_iname: existing_tag}) - return tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) + kernel = tag_inames(kernel, {outer_iname: outer_tag, inner_iname: inner_tag}) + kernel = remove_unused_inames(kernel, [iname_to_split]) + + return kernel # }}} @@ -319,6 +332,7 @@ def _split_iname_backend(kernel, split_iname, # {{{ split iname def split_iname(kernel, split_iname, inner_length, + *, outer_iname=None, inner_iname=None, outer_tag=None, inner_tag=None, slabs=(0, 0), do_tagged_check=True, @@ -1197,16 +1211,22 @@ def remove_unused_inames(kernel, inames=None): # {{{ remove them - from loopy.kernel.tools import DomainChanger - + domains = kernel.domains for iname in unused_inames: - domch = DomainChanger(kernel, (iname,)) + new_domains = [] + + for dom in domains: + try: + dt, idx = dom.get_var_dict()[iname] + except KeyError: + pass + else: + dom = dom.project_out(dt, idx, 1) + new_domains.append(dom) - dom = domch.domain - dt, idx = dom.get_var_dict()[iname] - dom = dom.project_out(dt, idx, 1) + domains = new_domains - kernel = kernel.copy(domains=domch.get_domains_with(dom)) + kernel = kernel.copy(domains=domains) # }}} @@ -1589,7 +1609,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): insns = [insn for insn in kernel.instructions if match(kernel, insn)] for insn in insns: - for iname in kernel.insn_inames(insn): + for iname in insn.within_inames: if kernel.iname_tags_of_type(iname, kind): used_axes.add(kind.axis) diff --git a/loopy/transform/privatize.py b/loopy/transform/privatize.py index 8527023bc789c9b3c9e18fe7ad6827c82a6e7a55..ce2d7942b70c68a79fd5c6ddc36b24fd6896cc04 100644 --- a/loopy/transform/privatize.py +++ b/loopy/transform/privatize.py @@ -124,7 +124,7 @@ def privatize_temporaries_with_inames( for writer_insn_id in wmap.get(tv.name, []): writer_insn = kernel.id_to_insn[writer_insn_id] - priv_axis_inames = kernel.insn_inames(writer_insn) & privatizing_inames + priv_axis_inames = writer_insn.within_inames & privatizing_inames referenced_priv_axis_inames = (priv_axis_inames & writer_insn.write_dependency_names()) diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 64337864f48e42f096ab851dd5b71afd607f067e..787966efc7fd00ad282e60990846ce07004e7906 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -216,8 +216,12 @@ class TypeInferenceMapper(CombineMapper): # Numpy types are sized return [NumpyType(np.dtype(type(expr)))] elif dt.kind == "f": - # deduce the smaller type by default - return [NumpyType(np.dtype(np.float32))] + if np.float32(expr) == np.float64(expr): + # No precision is lost by 'guessing' single precision, use that. + # This at least covers simple cases like '1j'. + return [NumpyType(np.dtype(np.float32))] + + return [NumpyType(np.dtype(np.float64))] elif dt.kind == "c": if np.complex64(expr) == np.complex128(expr): # (COMPLEX_GUESS_LOGIC) diff --git a/loopy/version.py b/loopy/version.py index fddd44479adcae87ec96f470a690274b154fde54..6f66c5347c55042ebf7b220a658bb4ebf3fef04d 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -42,7 +42,7 @@ else: # }}} -VERSION = (2020, 2, 1) +VERSION = (2020, 2, 2) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/requirements.txt b/requirements.txt index 2105aede063c65752ef4a9262eb960f749778a8a..8016ee7a86fbb4646d534bd66182f563b2cc9a44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/inducer/pytools.git#egg=pytools +git+https://github.com/inducer/pytools.git#egg=pytools >= 2021.1 git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/cgen.git#egg=cgen git+https://github.com/inducer/pyopencl.git#egg=pyopencl @@ -6,7 +6,7 @@ git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/genpy.git#egg=genpy git+https://github.com/inducer/codepy.git#egg=codepy -git+https://github.com/inducer/f2py +git+https://github.com/inducer/f2py#egg=f2py # Optional, needed for using the C preprocessor on Fortran ply>=3.6 diff --git a/setup.py b/setup.py index ddc47fefca853321d383bad4aeaa6f24f6d5c901..fcf284bc8574dc118e4b319c1b9ff38b0b24685d 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ setup(name="loopy", python_requires="~=3.6", install_requires=[ - "pytools>=2020.4", + "pytools>=2021.1", "pymbolic>=2019.2", "genpy>=2016.1.2", "cgen>=2016.1", diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py index 3c927a9cea09df50c4d0fe70dee7435b6ce3c129..56882416b8c361e09074b41d7af5b96cdcb90d2f 100644 --- a/test/test_linearization_checker.py +++ b/test/test_linearization_checker.py @@ -43,7 +43,9 @@ from loopy.schedule.checker.schedule import ( logger = logging.getLogger(__name__) -def test_lexschedule_creation(): +# {{{ test pairwise schedule creation + +def test_pairwise_schedule_creation(): import islpy as isl from loopy.schedule.checker import ( get_schedules_for_statement_pairs, @@ -296,6 +298,272 @@ def test_lexschedule_creation(): assert sched_map_before == sched_map_before_expected assert sched_map_after == sched_map_after_expected +# }}} + + +# {{{ test lex order map creation + +def test_lex_order_map_creation(): + import islpy as isl + from loopy.schedule.checker.lexicographic_order_map import ( + create_lex_order_map, + ) + from loopy.schedule.checker.utils import ( + append_marker_to_isl_map_var_names, + ) + + def _check_lex_map(expected_lex_order_map, n_dims): + # Isl ignores the apostrophes, so explicitly add them + expected_lex_order_map = append_marker_to_isl_map_var_names( + expected_lex_order_map, isl.dim_type.in_, "'") + + lex_order_map = create_lex_order_map( + n_dims=n_dims, + before_names=["%s%d'" % (LEX_VAR_PREFIX, i) for i in range(n_dims)], + after_names=["%s%d" % (LEX_VAR_PREFIX, i) for i in range(n_dims)], + ) + + assert lex_order_map == expected_lex_order_map + assert ( + lex_order_map.get_var_names(isl.dim_type.in_) == + expected_lex_order_map.get_var_names(isl.dim_type.in_)) + assert ( + lex_order_map.get_var_names(isl.dim_type.out) == + expected_lex_order_map.get_var_names(isl.dim_type.out)) + + expected_lex_order_map = isl.Map( + "{{ " + "[{0}0', {0}1', {0}2', {0}3', {0}4'] -> [{0}0, {0}1, {0}2, {0}3, {0}4] :" + "(" + "{0}0' < {0}0 " + ") or (" + "{0}0'={0}0 and {0}1' < {0}1 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2' < {0}2 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3' < {0}3 " + ") or (" + "{0}0'={0}0 and {0}1'={0}1 and {0}2'={0}2 and {0}3'={0}3 and {0}4' < {0}4" + ")" + "}}".format(LEX_VAR_PREFIX)) + + _check_lex_map(expected_lex_order_map, 5) + + expected_lex_order_map = isl.Map( + "{{ " + "[{0}0'] -> [{0}0] :" + "(" + "{0}0' < {0}0 " + ")" + "}}".format(LEX_VAR_PREFIX)) + + _check_lex_map(expected_lex_order_map, 1) + +# }}} + + +# {{{ test statement instance ordering creation + +def test_statement_instance_ordering_creation(): + import islpy as isl + from loopy.schedule.checker import ( + get_schedules_for_statement_pairs, + ) + from loopy.schedule.checker.schedule import ( + get_lex_order_map_for_sched_space, + ) + from loopy.schedule.checker.utils import ( + ensure_dim_names_match_and_align, + append_marker_to_isl_map_var_names, + ) + from loopy.schedule.checker.lexicographic_order_map import ( + get_statement_ordering_map, + create_lex_order_map, + ) + + # example kernel (add deps to fix loop order) + knl = lp.make_kernel( + [ + "{[i]: 0<=itemp = b[i,k] {id=insn_a} + end + for j + a[i,j] = temp + 1 {id=insn_b,dep=insn_a} + c[i,j] = d[i,j] {id=insn_c,dep=insn_b} + end + end + for t + e[t] = f[t] {id=insn_d, dep=insn_c} + end + """, + name="example", + assumptions="pi,pj,pk,pt >= 1", + lang_version=(2018, 2) + ) + knl = lp.add_and_infer_dtypes( + knl, + {"b": np.float32, "d": np.float32, "f": np.float32}) + knl = lp.prioritize_loops(knl, "i,k") + knl = lp.prioritize_loops(knl, "i,j") + + # get a linearization + knl = preprocess_kernel(knl) + knl = get_one_linearized_kernel(knl) + linearization_items = knl.linearization + + # Get pairwise schedules + insn_id_pairs = [ + ("insn_a", "insn_b"), + ("insn_a", "insn_c"), + ("insn_a", "insn_d"), + ("insn_b", "insn_c"), + ("insn_b", "insn_d"), + ("insn_c", "insn_d"), + ] + sched_maps = get_schedules_for_statement_pairs( + knl, + linearization_items, + insn_id_pairs, + ) + + def check_sio_for_insn_pair( + insn_id_before, + insn_id_after, + expected_lex_dims, + expected_sio, + ): + + # Get pairwise schedule + sched_map_before, sched_map_after = sched_maps[ + (insn_id_before, insn_id_after)] + + # Get map representing lexicographic ordering + sched_lex_order_map = get_lex_order_map_for_sched_space(sched_map_before) + + # Get expected lex order map + expected_lex_order_map = create_lex_order_map( + n_dims=expected_lex_dims, + before_names=["%s%d'" % (LEX_VAR_PREFIX, i) + for i in range(expected_lex_dims)], + after_names=["%s%d" % (LEX_VAR_PREFIX, i) + for i in range(expected_lex_dims)], + ) + + assert sched_lex_order_map == expected_lex_order_map + + # create statement instance ordering, + # maps each statement instance to all statement instances occuring later + sio = get_statement_ordering_map( + sched_map_before, + sched_map_after, + sched_lex_order_map, + ) + + sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio) + + assert sio_aligned == expected_sio + + # Relationship between insn_a and insn_b --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_b", 2, expected_sio) + + # Relationship between insn_a and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; " + "[{0}'=0, i', k'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_c", 2, expected_sio) + + # Relationship between insn_a and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pk] -> {{ " + "[{0}'=0, i', k'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_a", "insn_d", 1, expected_sio) + + # Relationship between insn_b and insn_c --------------------------------------- + + expected_sio = isl.Map( + "[pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, i, j] : " + "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; " + "[{0}'=0, i', j'] -> [{0}=1, i=i', j] : " + "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; " + "[{0}'=0, i', j'] -> [{0}=1, i=i', j=j'] : " + "0 <= i' < pi and 0 <= j' < pj " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_b", "insn_c", 3, expected_sio) + + # Relationship between insn_b and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_b", "insn_d", 1, expected_sio) + + # Relationship between insn_c and insn_d --------------------------------------- + + expected_sio = isl.Map( + "[pt, pi, pj] -> {{ " + "[{0}'=0, i', j'] -> [{0}=1, t] : " + "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt " + "}}".format(STATEMENT_VAR_NAME) + ) + # isl ignores these apostrophes, so explicitly add them + expected_sio = append_marker_to_isl_map_var_names( + expected_sio, isl.dim_type.in_, "'") + + check_sio_for_insn_pair("insn_c", "insn_d", 1, expected_sio) + +# }}} + if __name__ == "__main__": if len(sys.argv) > 1: diff --git a/test/test_loopy.py b/test/test_loopy.py index 41b5315e890bbd8199a2a3b67fe4cf8b0ae48f8d..be595aaa5d837abcf9ff189c415e73f7393b78df 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2920,6 +2920,123 @@ def test_access_check_with_conditionals(): lp.generate_code_v2(legal_but_nonaffine_condition_knl) +def test_access_check_with_insn_predicates(): + knl = lp.make_kernel( + "{[i]: 0 1: exec(sys.argv[1]) diff --git a/test/test_statistics.py b/test/test_statistics.py index 757f59e865b350c8f452977d36f2639393923fad..bcdc542cb8c4eac50143b07ef09f1460f5abd9c5 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -1070,7 +1070,7 @@ def test_floor_div_coefficient_collector(): [ "for i_outer", "for j_outer", - "<> loc[i_inner,j_inner] = 3.14 {id=loc_init}", + "<> loc[i_inner,j_inner] = 3.14f {id=loc_init}", "loc[i_inner,(j_inner+r+4) %% %d] = loc[i_inner,(j_inner+r) %% %d]" " {id=add,dep=loc_init}" % (bsize, bsize), "out0[i_outer*16+i_inner,j_outer*16+j_inner] = loc[i_inner,j_inner]" diff --git a/test/test_transform.py b/test/test_transform.py index 546f86838929a70f42044c3894ad972ff9f354b9..daa659808d1e7aa12f51d7b4b897672aa3344874 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -670,6 +670,49 @@ def test_add_inames_for_unused_hw_axes(ctx_factory): parameters={"n": n}) +def test_rename_argument_of_domain_params(ctx_factory): + knl = lp.make_kernel( + "{[i, j]: 0<=i 1: exec(sys.argv[1])