From 844ff2923726450991df300d13b6e7aa2481bef6 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 24 Feb 2016 01:17:02 -0600 Subject: [PATCH] Start to clean out the Augean stables of loopy docs --- doc/conf.py | 9 +- doc/index.rst | 5 +- doc/ref_creation.rst | 33 ++ doc/{reference.rst => ref_kernel.rst} | 570 ++++++++++---------------- doc/ref_other.rst | 46 +++ doc/ref_transform.rst | 135 ++++++ doc/tutorial.rst | 8 +- loopy/__init__.py | 34 +- loopy/compiled.py | 18 +- loopy/frontend/fortran/__init__.py | 5 +- loopy/frontend/fortran/translator.py | 4 +- loopy/kernel/__init__.py | 11 +- loopy/kernel/creation.py | 22 +- loopy/kernel/data.py | 20 +- loopy/statistics.py | 14 + loopy/target/__init__.py | 19 +- loopy/target/c/__init__.py | 3 + loopy/target/cuda.py | 2 + loopy/target/ispc.py | 5 + loopy/target/opencl.py | 3 + loopy/target/pyopencl.py | 9 + loopy/transform/arithmetic.py | 99 ----- loopy/transform/batch.py | 4 + loopy/transform/iname.py | 138 ++++++- loopy/transform/parameter.py | 43 ++ loopy/transform/subst.py | 2 +- 26 files changed, 761 insertions(+), 500 deletions(-) create mode 100644 doc/ref_creation.rst rename doc/{reference.rst => ref_kernel.rst} (72%) create mode 100644 doc/ref_other.rst create mode 100644 doc/ref_transform.rst diff --git a/doc/conf.py b/doc/conf.py index 301604607..74e1aec30 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -46,7 +46,7 @@ master_doc = 'index' # General information about the project. project = u'loopy' -copyright = u'2011, Andreas Klöckner' +copyright = u'2016, Andreas Klöckner' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -54,7 +54,10 @@ copyright = u'2011, Andreas Klöckner' # # The short X.Y version. ver_dic = {} -exec(compile(open("../loopy/version.py").read(), "../loopy/version.py", 'exec'), ver_dic) +with open("../loopy/version.py") as vpy_file: + version_py = vpy_file.read() + +exec(compile(version_py, "../loopy/version.py", 'exec'), ver_dic) version = ".".join(str(x) for x in ver_dic["VERSION"]) # The full version, including alpha/beta/rc tags. release = ver_dic["VERSION_TEXT"] @@ -249,4 +252,4 @@ intersphinx_mapping = { 'http://docs.scipy.org/doc/numpy/': None, } -autoclass_content = "both" +autoclass_content = "class" diff --git a/doc/index.rst b/doc/index.rst index 19bbe8772..a0bad2898 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -63,7 +63,10 @@ Please check :ref:`installation` to get started. :maxdepth: 2 tutorial - reference + ref_creation + ref_kernel + ref_transform + ref_other misc Indices and tables diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst new file mode 100644 index 000000000..92eff09c9 --- /dev/null +++ b/doc/ref_creation.rst @@ -0,0 +1,33 @@ +.. module:: loopy +.. moduleauthor:: Andreas Kloeckner <inform@tiker.net> + +.. _creating-kernels: + +Reference: Creating Kernels +=========================== + +From Loop Domains and Instructions +---------------------------------- + +.. autofunction:: make_kernel + +From Fortran +------------ + +.. autofunction:: parse_fortran + +.. autofunction:: parse_transformed_fortran + +.. autofunction:: c_preprocess + +From Other Kernels +------------------ + +.. autofunction:: fuse_kernels + +To Copy between Data Formats +---------------------------- + +.. autofunction:: make_copy_kernel + +.. vim: tw=75:spell:fdm=marker diff --git a/doc/reference.rst b/doc/ref_kernel.rst similarity index 72% rename from doc/reference.rst rename to doc/ref_kernel.rst index 351a2374a..ff27d84f5 100644 --- a/doc/reference.rst +++ b/doc/ref_kernel.rst @@ -1,21 +1,39 @@ -.. _reference: +.. currentmodule:: loopy -Reference Guide -=============== +Reference: Loopy's Model of a Kernel +==================================== -.. module:: loopy -.. moduleauthor:: Andreas Kloeckner <inform@tiker.net> +.. _domain-tree: -This guide defines all functionality exposed by loopy. If you would like -a more gentle introduction, you may consider reading the example-based -:ref:`tutorial` instead. +Loop Domain Tree +---------------- -.. _inames: +.. {{{ + +Example:: + + { [i]: 0<=i<n } + +A kernel's iteration domain is given by a list of :class:`islpy.BasicSet` +instances (which parametrically represent multi-dimensional sets of +tuples of integers). They define the integer values of the loop variables +for which instructions (see below) will be executed. +It is written in :ref:`isl-syntax`. :mod:`loopy` calls the loop variables +*inames*. In this case, *i* is the sole iname. The loop +domain is given as a conjunction of affine equality +and inequality constraints. Integer divisibility constraints (resulting +in strides) are also allowed. In the absence of divisibility +constraints, the loop domain is convex. -Domain Tree ------------ +Note that *n* in the example is not an iname. It is a +:ref:`domain-parameter` that is passed to the kernel by the user. +To accommodate some data-dependent control flow, there is not actually +a single loop domain, but rather a *tree of loop domains*, +allowing more deeply nested domains to depend on inames +introduced by domains closer to the root. +.. _inames: Inames ^^^^^^ @@ -25,161 +43,10 @@ dependency semantics--otherwise e.g. a fetch could happen inside one loop nest, and then the instruction using that fetch could be inside a wholly different loop nest. -Instructions ------------- - -Expressions -^^^^^^^^^^^ - -Loopy's expressions are a slight superset of the expressions supported by -:mod:`pymbolic`. - -* `if` -* `reductions` - * duplication of reduction inames -* complex-valued arithmetic -* tagging of array access and substitution rule use ("$") -* ``indexof``, ``indexof_vec`` - -.. _types: - -Specifying Types ----------------- - -:mod:`loopy` uses the same type system as :mod:`numpy`. (See -:class:`numpy.dtype`) It also uses :mod:`pyopencl` for a registry of -user-defined types and their C equivalents. See :func:`pyopencl.tools.get_or_register_dtype` -and related functions. - -For a string representation of types, all numpy types (e.g. ``float32`` etc.) -are accepted, in addition to what is registered in :mod:`pyopencl`. - -.. _iname-tags: - -Iname Implementation Tags -------------------------- - -=============================== ==================================================== -Tag Meaning -=============================== ==================================================== -``None`` | ``"for"`` Sequential loop -``"l.N"`` Local (intra-group) axis N ("local") -``"g.N"`` Group-number axis N ("group") -``"unr"`` Unroll -``"ilp"`` | ``"ilp.unr"`` Unroll using instruction-level parallelism -``"ilp.seq"`` Realize parallel iname as innermost loop -``"like.INAME"`` Can be used when tagging inames to tag like another -``"unused.g"`` | ``"unused.l"`` Can be to tag as the next unused group/local axis -=============================== ==================================================== - -(Throughout this table, `N` must be replaced by an actual, zero-based number.) - -"ILP" does three things: - -* Restricts loops to be innermost -* Duplicates reduction storage for any reductions nested around ILP usage -* Causes a loop (unrolled or not) to be opened/generated for each - involved instruction - -.. _data-dim-tags: - -Data Axis Tags --------------- - -Data axis tags specify how a multi-dimensional array (which is loopy's -main way of storing data) is represented in (linear, 1D) computer -memory. This storage format is given as a number of "tags", as listed -in the table below. Each axis of an array has a tag corresponding to it. -In the user interface, array dim tags are specified as a tuple of these -tags or a comma-separated string containing them, such as the following:: - - c,vec,sep,c - -The interpretation of these tags is order-dependent, they are read -from left to right. - -===================== ==================================================== -Tag Meaning -===================== ==================================================== -``c`` Nest current axis around the ones that follow -``f`` Nest current axis inside the ones that follow -``N0`` ... ``N9`` Specify an explicit nesting level for this axis -``stride:EXPR`` A fixed stride -``sep`` Implement this axis by mapping to separate arrays -``vec`` Implement this axis as entries in a vector -===================== ==================================================== - -``sep`` and ``vec`` obviously require the number of entries -in the array along their respective axis to be known at code -generation time. - -When the above speaks about 'nesting levels', this means that axes -"nested inside" others are "faster-moving" when viewed from linear -memory. - -In addition, each tag may be followed by a question mark (``?``), -which indicates that if there are more dimension tags specified -than array axes present, that this axis should be omitted. Axes -with question marks are omitted in a left-first manner until the correct -number of dimension tags is achieved. - -Some examples follow, all of which use a three-dimensional array of shape -*(3, M, 4)*. For simplicity, we assume that array entries have size one. - -* ``c,c,c``: The axes will have strides *(M*4, 4, 1)*, - leading to a C-like / row-major layout. - -* ``f,f,f``: The axes will have strides *(1, 3, 3*M)*, - leading to a Fortran-like / row-major layout. - -* ``sep,c,c``: The array will be mapped to three arrays of - shape *(M, 4)*, each with strides *(4, 1)*. - -* ``c,c,vec``: The array will be mapped to an array of - ``float4`` vectors, with (``float4``-based) strides of - *(M, 1)*. - -* ``N1,N0,N2``: The axes will have strides *(M, 1, 3*M)*. - -.. _creating-kernels: - -Creating Kernels ----------------- - -.. autoclass:: auto - -.. _arguments: - -Arguments -^^^^^^^^^ - -.. autoclass:: ValueArg - :members: - :undoc-members: - -.. autoclass:: GlobalArg - :members: - :undoc-members: - -.. autoclass:: ConstantArg - :members: - :undoc-members: - -.. autoclass:: ImageArg - :members: - :undoc-members: - -.. _temporaries: - -Loop domains -^^^^^^^^^^^^ - -TODO: Explain the domain tree - .. _isl-syntax: ISL syntax -~~~~~~~~~~ +^^^^^^^^^^ The general syntax of an ISL set is the following:: @@ -217,27 +84,73 @@ Examples of constructs that are **not** allowed: (**Note:** This may be added in a future version of loopy. For now, loop domains have to be convex.) -Temporary Variables -^^^^^^^^^^^^^^^^^^^ +.. _domain-parameters: -Temporary variables model OpenCL's ``private`` and ``local`` address spaces. Both -have the lifetime of a kernel invocation. +Domain parameters +^^^^^^^^^^^^^^^^^ -.. autoclass:: TemporaryVariable - :members: - :undoc-members: +Domain parameters are identifiers being used in loop domains that are not +*inames*, i.e. they do not define loop variables. In the following domain +specification, *n* is a domain parameter:: + + {[i,j]: 0 <= i,j < n} + +Values of domain parameters arise from + +* being passed to the kernel as :ref:`arguments` + +* being assigned to :ref:`temporaries` to feed into domains + lower in the :ref:`domain-tree`. + +.. _iname-tags: + +Iname Implementation Tags +^^^^^^^^^^^^^^^^^^^^^^^^^ + +=============================== ==================================================== +Tag Meaning +=============================== ==================================================== +``None`` | ``"for"`` Sequential loop +``"l.N"`` Local (intra-group) axis N ("local") +``"g.N"`` Group-number axis N ("group") +``"unr"`` Unroll +``"ilp"`` | ``"ilp.unr"`` Unroll using instruction-level parallelism +``"ilp.seq"`` Realize parallel iname as innermost loop +``"like.INAME"`` Can be used when tagging inames to tag like another +``"unused.g"`` | ``"unused.l"`` Can be to tag as the next unused group/local axis +=============================== ==================================================== + +(Throughout this table, `N` must be replaced by an actual, zero-based number.) + +"ILP" does three things: + +* Restricts loops to be innermost +* Duplicates reduction storage for any reductions nested around ILP usage +* Causes a loop (unrolled or not) to be opened/generated for each + involved instruction + +.. }}} + +.. _instructions: Instructions -^^^^^^^^^^^^ +------------ -.. autoclass:: UniqueName +.. {{{ .. autoclass:: InstructionBase .. _assignments: -Assignments -~~~~~~~~~~~ +Assignment objects +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: Assignment + +.. _assignment-syntax: + +Textual Assignment Syntax +^^^^^^^^^^^^^^^^^^^^^^^^^ The general syntax of an instruction is a simple assignment:: @@ -349,254 +262,205 @@ These are usually key-value pairs. The following attributes are recognized: given instruction groups. See :class:`InstructionBase.conflicts_with_groups`. -Assignment instructions are expressed as instances of the following class: +.. _expression-syntax: -.. autoclass:: ExpressionInstruction +Expressions +^^^^^^^^^^^ -.. _expression-syntax: +Loopy's expressions are a slight superset of the expressions supported by +:mod:`pymbolic`. -Expression Syntax -~~~~~~~~~~~~~~~~~ +* `if` +* `reductions` + * duplication of reduction inames +* complex-valued arithmetic +* tagging of array access and substitution rule use ("$") +* ``indexof``, ``indexof_vec`` TODO: Functions TODO: Reductions C Block Instructions -~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: CInstruction -.. _substitution-rule: - -Substitution Rules -^^^^^^^^^^^^^^^^^^ - -Syntax of a substitution rule:: - - rule_name(arg1, arg2) := EXPRESSION +.. }}} -Kernels -^^^^^^^ +Data: Arguments and Temporaries +------------------------------- -.. class:: LoopKernel +.. {{{ -Do not create :class:`LoopKernel` objects directly. Instead, use the following -function, which is responsible for creating kernels: +Kernels operate on two types of data: 'arguments' carrying data into and out of a kernel, +and temporaries with lifetimes tied to the runtime of the kernel. -.. autofunction:: make_kernel - -.. autofunction:: parse_fortran - -.. autofunction:: parse_transformed_fortran +.. _arguments: -.. autofunction:: make_copy_kernel +Arguments +^^^^^^^^^ -.. autofunction:: fuse_kernels +.. autoclass:: KernelArgument + :members: + :undoc-members: -.. autofunction:: c_preprocess +.. autoclass:: ValueArg + :members: + :undoc-members: -Transforming Kernels --------------------- +.. autoclass:: GlobalArg + :members: + :undoc-members: -.. _context-matching: +.. autoclass:: ConstantArg + :members: + :undoc-members: -Matching contexts -^^^^^^^^^^^^^^^^^ +.. autoclass:: ImageArg + :members: + :undoc-members: -TODO: Matching instruction tags +.. _temporaries: -.. automodule:: loopy.context_matching +Temporary Variables +^^^^^^^^^^^^^^^^^^^ -.. autofunction:: parse_match +Temporary variables model OpenCL's ``private`` and ``local`` address spaces. Both +have the lifetime of a kernel invocation. -.. autofunction:: parse_stack_match +.. autoclass:: TemporaryVariable + :members: + :undoc-members: -.. currentmodule:: loopy +.. _types: -Wrangling inames +Specifying Types ^^^^^^^^^^^^^^^^ -.. autofunction:: split_iname - -.. autofunction:: chunk_iname - -.. autofunction:: join_inames - -.. autofunction:: tag_inames - -.. autofunction:: duplicate_inames - -.. undocumented .. autofunction:: link_inames - -.. autofunction:: rename_iname - -.. autofunction:: remove_unused_inames - -.. autofunction:: set_loop_priority - -.. autofunction:: split_reduction_inward - -.. autofunction:: split_reduction_outward - -.. autofunction:: affine_map_inames - -.. autofunction:: realize_ilp - -.. autofunction:: find_unused_axis_tag - -Dealing with Parameters -^^^^^^^^^^^^^^^^^^^^^^^ - -.. autofunction:: fix_parameters - -.. autofunction:: assume +:mod:`loopy` uses the same type system as :mod:`numpy`. (See +:class:`numpy.dtype`) It also uses :mod:`pyopencl` for a registry of +user-defined types and their C equivalents. See :func:`pyopencl.tools.get_or_register_dtype` +and related functions. -Dealing with Substitution Rules -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +For a string representation of types, all numpy types (e.g. ``float32`` etc.) +are accepted, in addition to what is registered in :mod:`pyopencl`. -.. autofunction:: extract_subst +.. _data-dim-tags: -.. autofunction:: assignment_to_subst +Data Axis Tags +^^^^^^^^^^^^^^ -.. autofunction:: expand_subst +Data axis tags specify how a multi-dimensional array (which is loopy's +main way of storing data) is represented in (linear, 1D) computer +memory. This storage format is given as a number of "tags", as listed +in the table below. Each axis of an array has a tag corresponding to it. +In the user interface, array dim tags are specified as a tuple of these +tags or a comma-separated string containing them, such as the following:: -.. autofunction:: find_rules_matching + c,vec,sep,c -.. autofunction:: find_one_rule_matching +The interpretation of these tags is order-dependent, they are read +from left to right. -Caching, Precomputation and Prefetching -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +===================== ==================================================== +Tag Meaning +===================== ==================================================== +``c`` Nest current axis around the ones that follow +``f`` Nest current axis inside the ones that follow +``N0`` ... ``N9`` Specify an explicit nesting level for this axis +``stride:EXPR`` A fixed stride +``sep`` Implement this axis by mapping to separate arrays +``vec`` Implement this axis as entries in a vector +===================== ==================================================== -.. autofunction:: precompute +``sep`` and ``vec`` obviously require the number of entries +in the array along their respective axis to be known at code +generation time. -.. autofunction:: add_prefetch +When the above speaks about 'nesting levels', this means that axes +"nested inside" others are "faster-moving" when viewed from linear +memory. -.. autofunction:: buffer_array +In addition, each tag may be followed by a question mark (``?``), +which indicates that if there are more dimension tags specified +than array axes present, that this axis should be omitted. Axes +with question marks are omitted in a left-first manner until the correct +number of dimension tags is achieved. -.. autofunction:: alias_temporaries +Some examples follow, all of which use a three-dimensional array of shape +*(3, M, 4)*. For simplicity, we assume that array entries have size one. -Influencing data access -^^^^^^^^^^^^^^^^^^^^^^^ +* ``c,c,c``: The axes will have strides *(M*4, 4, 1)*, + leading to a C-like / row-major layout. -.. autofunction:: change_arg_to_image +* ``f,f,f``: The axes will have strides *(1, 3, 3*M)*, + leading to a Fortran-like / row-major layout. -.. autofunction:: tag_data_axes +* ``sep,c,c``: The array will be mapped to three arrays of + shape *(M, 4)*, each with strides *(4, 1)*. -.. autofunction:: remove_unused_arguments +* ``c,c,vec``: The array will be mapped to an array of + ``float4`` vectors, with (``float4``-based) strides of + *(M, 1)*. -.. autofunction:: set_array_dim_names +* ``N1,N0,N2``: The axes will have strides *(M, 1, 3*M)*. -Padding -^^^^^^^ +.. }}} -.. autofunction:: split_array_dim +.. _substitution-rule: -.. autofunction:: find_padding_multiple +Substitution Rules +------------------ -.. autofunction:: add_padding +.. {{{ -Manipulating Instructions +Substitution Rule Objects ^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autofunction:: set_instruction_priority - -.. autofunction:: add_dependency - -.. autofunction:: remove_instructions - -.. autofunction:: tag_instructions - -Library interface -^^^^^^^^^^^^^^^^^ - -.. autofunction:: register_reduction_parser - -.. autofunction:: register_preamble_generators - -.. autofunction:: register_symbol_manglers - -.. autofunction:: register_function_manglers - -Arguments -^^^^^^^^^ - -.. autofunction:: set_argument_order - -.. autofunction:: add_dtypes - -.. autofunction:: infer_unknown_types - -.. autofunction:: add_and_infer_dtypes - -.. autofunction:: rename_argument - -Batching -^^^^^^^^ +.. autoclass:: SubstitutionRule -.. autofunction:: to_batched +.. _subst-rule-syntax: -Finishing up -^^^^^^^^^^^^ +Textual Syntax for Substitution Rules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. autofunction:: generate_loop_schedules - -.. autofunction:: get_one_scheduled_kernel - -.. autofunction:: generate_code - -Running -------- - -.. autoclass:: CompiledKernel - -Automatic Testing ------------------ - -.. autofunction:: auto_test_vs_ref - -Troubleshooting ---------------- - -Printing :class:`LoopKernel` objects -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you're confused about things loopy is referring to in an error message or -about the current state of the :class:`LoopKernel` you are transforming, the -following always works:: - - print kernel +Syntax of a substitution rule:: -(And it yields a human-readable--albeit terse--representation of *kernel*.) + rule_name(arg1, arg2) := EXPRESSION -.. autofunction:: preprocess_kernel +.. }}} -.. autofunction:: get_dot_dependency_graph +Kernel Options +-------------- -.. autofunction:: show_dependency_graph +.. autoclass:: Options -Options +Targets ------- -.. autoclass:: Options +.. automodule:: loopy.target -.. autofunction:: set_options +Helper values +------------- -Controlling caching -------------------- +.. {{{ -.. autofunction:: set_caching_enabled +.. autoclass:: auto -.. autoclass:: CacheMode +.. autoclass:: UniqueName -Obtaining Kernel Statistics ---------------------------- +.. }}} -.. autofunction:: get_op_poly +The Kernel Object +----------------- -.. autofunction:: get_gmem_access_poly +Do not create :class:`LoopKernel` objects directly. Instead, refer to +:ref:`creating-kernels`. -.. autofunction:: sum_mem_access_to_bytes +.. autoclass:: LoopKernel -.. autofunction:: get_barrier_poly +.. autoclass:: kernel_state + :members: + :undoc-members: -.. vim: tw=75:spell +.. vim: tw=75:spell:fdm=marker diff --git a/doc/ref_other.rst b/doc/ref_other.rst new file mode 100644 index 000000000..71d6c54b1 --- /dev/null +++ b/doc/ref_other.rst @@ -0,0 +1,46 @@ +Reference: Other Functionality +============================== + +Obtaining Kernel Performance Statistics +--------------------------------------- + +.. automodule:: loopy.statistics + +Controlling caching +------------------- + +.. autofunction:: set_caching_enabled + +.. autoclass:: CacheMode + +Running Kernels +--------------- + +In addition to simply calling kernels using :class:`LoopKernel.__call__`, +the following underlying functionality may be used: + +.. autoclass:: CompiledKernel + +Automatic Testing +----------------- + +.. autofunction:: auto_test_vs_ref + +Troubleshooting +--------------- + +Printing :class:`LoopKernel` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you're confused about things loopy is referring to in an error message or +about the current state of the :class:`LoopKernel` you are transforming, the +following always works:: + + print(kernel) + +(And it yields a human-readable--albeit terse--representation of *kernel*.) + +.. autofunction:: get_dot_dependency_graph + +.. autofunction:: show_dependency_graph + diff --git a/doc/ref_transform.rst b/doc/ref_transform.rst new file mode 100644 index 000000000..4c3c24873 --- /dev/null +++ b/doc/ref_transform.rst @@ -0,0 +1,135 @@ +.. _reference-transform: + +Reference: Transforming Kernels +=============================== + +Dealing with Parameters +----------------------- + +.. automodule:: loopy.transform.parameter + +Wrangling inames +---------------- + +.. automodule:: loopy.transform.iname + +Dealing with Substitution Rules +------------------------------- + +.. currentmodule:: loopy + +.. autofunction:: extract_subst + +.. autofunction:: assignment_to_subst + +.. autofunction:: expand_subst + +.. autofunction:: find_rules_matching + +.. autofunction:: find_one_rule_matching + +Caching, Precomputation and Prefetching +--------------------------------------- + +.. autofunction:: precompute + +.. autofunction:: add_prefetch + +.. autofunction:: buffer_array + +.. autofunction:: alias_temporaries + +Influencing data access +----------------------- + +.. autofunction:: change_arg_to_image + +.. autofunction:: tag_data_axes + +.. autofunction:: remove_unused_arguments + +.. autofunction:: set_array_dim_names + +Padding Data +------------ + +.. autofunction:: split_array_dim + +.. autofunction:: find_padding_multiple + +.. autofunction:: add_padding + +Manipulating Instructions +------------------------- + +.. autofunction:: set_instruction_priority + +.. autofunction:: add_dependency + +.. autofunction:: remove_instructions + +.. autofunction:: tag_instructions + +Registering Library Routines +---------------------------- + +.. autofunction:: register_reduction_parser + +.. autofunction:: register_preamble_generators + +.. autofunction:: register_symbol_manglers + +.. autofunction:: register_function_manglers + +Modifying Arguments +------------------- + +.. autofunction:: set_argument_order + +.. autofunction:: add_dtypes + +.. autofunction:: infer_unknown_types + +.. autofunction:: add_and_infer_dtypes + +.. autofunction:: rename_argument + +Creating Batches of Operations +------------------------------ + +.. automodule:: loopy.transform.batch + +Finishing up +------------ + +.. currentmodule:: loopy + +.. autofunction:: preprocess_kernel + +.. autofunction:: generate_loop_schedules + +.. autofunction:: get_one_scheduled_kernel + +.. autofunction:: generate_code + +Setting options +--------------- + +.. autofunction:: set_options + +.. _context-matching: + +Matching contexts +----------------- + +TODO: Matching instruction tags + +.. automodule:: loopy.context_matching + +.. autofunction:: parse_match + +.. autofunction:: parse_stack_match + + +.. vim: tw=75:spell + diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 24ec0ce4f..4421fd80f 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1186,8 +1186,12 @@ across the remaining axis of the workgroup would emerge. TODO -Obtaining Kernel Statistics ---------------------------- +.. }}} + +Obtaining Performance Statistics +-------------------------------- + +.. {{{ Operations, array access, and barriers can all be counted, which may facilitate performance prediction and optimization of a :mod:`loopy` kernel. diff --git a/loopy/__init__.py b/loopy/__init__.py index bf0a2be1b..c71a03fec 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -38,11 +38,13 @@ from loopy.library.function import ( from loopy.kernel.data import ( auto, + KernelArgument, ValueArg, GlobalArg, ConstantArg, ImageArg, InstructionBase, Assignment, ExpressionInstruction, CInstruction, - TemporaryVariable) + TemporaryVariable, + SubstitutionRule) -from loopy.kernel import LoopKernel +from loopy.kernel import LoopKernel, kernel_state from loopy.kernel.tools import ( get_dot_dependency_graph, show_dependency_graph, @@ -54,9 +56,10 @@ from loopy.library.reduction import register_reduction_parser # {{{ import transforms from loopy.transform.iname import ( - assume, set_loop_priority, + set_loop_priority, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, link_inames, remove_unused_inames, + split_reduction_inward, split_reduction_outward, affine_map_inames, find_unused_axis_tag) from loopy.transform.instruction import ( @@ -79,8 +82,7 @@ from loopy.transform.buffer import buffer_array from loopy.transform.fusion import fuse_kernels from loopy.transform.arithmetic import ( - split_reduction_inward, - split_reduction_outward, fold_constants, + fold_constants, collect_common_factors_on_increment) from loopy.transform.padding import ( @@ -89,7 +91,7 @@ from loopy.transform.padding import ( from loopy.transform.ilp import realize_ilp from loopy.transform.batch import to_batched -from loopy.transform.parameter import fix_parameters +from loopy.transform.parameter import assume, fix_parameters # }}} @@ -107,15 +109,24 @@ from loopy.auto_test import auto_test_vs_ref from loopy.frontend.fortran import (c_preprocess, parse_transformed_fortran, parse_fortran) +from loopy.target import TargetBase +from loopy.target.c import CTarget +from loopy.target.cuda import CudaTarget +from loopy.target.opencl import OpenCLTarget +from loopy.target.pyopencl import PyOpenCLTarget +from loopy.target.ispc import ISPCTarget + __all__ = [ "TaggedVariable", "Reduction", "LinearSubscript", "auto", - "LoopKernel", + "LoopKernel", "kernel_state", + "KernelArgument", "ValueArg", "ScalarArg", "GlobalArg", "ArrayArg", "ConstantArg", "ImageArg", "TemporaryVariable", + "SubstitutionRule", "InstructionBase", "Assignment", "ExpressionInstruction", "CInstruction", @@ -127,10 +138,11 @@ __all__ = [ # {{{ transforms - "assume", "set_loop_priority", + "set_loop_priority", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "link_inames", "remove_unused_inames", + "split_reduction_inward", "split_reduction_outward", "affine_map_inames", "find_unused_axis_tag", "add_prefetch", "change_arg_to_image", "tag_data_axes", @@ -148,7 +160,6 @@ __all__ = [ "precompute", "buffer_array", "fuse_kernels", - "split_reduction_inward", "split_reduction_outward", "fold_constants", "collect_common_factors_on_increment", "split_array_dim", "split_arg_axis", "find_padding_multiple", @@ -158,7 +169,7 @@ __all__ = [ "to_batched", - "fix_parameters", + "assume", "fix_parameters", # }}} @@ -186,6 +197,9 @@ __all__ = [ "LoopyError", "LoopyWarning", + "TargetBase", "CTarget", "CudaTarget", "OpenCLTarget", + "PyOpenCLTarget", "ISPCTarget", + # {{{ from this file "register_preamble_generators", diff --git a/loopy/compiled.py b/loopy/compiled.py index 6d4396b5a..32536021d 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -791,8 +791,16 @@ class _CLKernelInfo(Record): class CompiledKernel: + """An object connecting a kernel to a :class:`pyopencl.Context` + for execution. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + def __init__(self, context, kernel): """ + :arg context: a :class:`pyopencl.Context` :arg kernel: may be a loopy.LoopKernel, a generator returning kernels (a warning will be issued if more than one is returned). If the kernel has not yet been loop-scheduled, that is done, too, with no @@ -908,10 +916,12 @@ class CompiledKernel: def __call__(self, queue, **kwargs): """ - :arg allocator: - :arg wait_for: - :arg out_host: - + :arg allocator: a callable passed a byte count and returning + a :class:`pyopencl.Buffer`. A :class:`pyopencl` allocator + maybe. + :arg wait_for: A list of :class:`pyopencl.Event` instances + for which to wait. + :arg out_host: :class:`bool` Decides whether output arguments (i.e. arguments written by the kernel) are to be returned as :mod:`numpy` arrays. *True* for yes, *False* for no. diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index f2bbb2882..b3a7c2133 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -132,7 +132,7 @@ def _extract_loopy_lines(source): def parse_transformed_fortran(source, free_form=True, strict=True, pre_transform_code=None, transform_code_context=None, - filename="<floopy code>"): + filename="<floopy code>", target=None): """ :arg source: a string of Fortran source code which must include a snippet of transform code as described below. @@ -257,7 +257,8 @@ def parse_fortran(source, filename="<floopy code>", free_form=True, strict=True, "and returned invalid data (Sorry!)") from loopy.frontend.fortran.translator import F2LoopyTranslator - f2loopy = F2LoopyTranslator(filename, auto_dependencies=auto_dependencies) + f2loopy = F2LoopyTranslator(filename, auto_dependencies=auto_dependencies, + target=None) f2loopy(tree) return f2loopy.make_kernels() diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index 53ea602f1..68dd5fa95 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -198,10 +198,11 @@ class Scope(object): # {{{ translator class F2LoopyTranslator(FTreeWalkerBase): - def __init__(self, filename, auto_dependencies): + def __init__(self, filename, auto_dependencies, target=None): FTreeWalkerBase.__init__(self) self.auto_dependencies = auto_dependencies + self.target = target self.scope_stack = [] @@ -679,6 +680,7 @@ class F2LoopyTranslator(FTreeWalkerBase): name=sub.subprogram_name, default_order="F", index_dtype=self.index_dtype, + target=self.target, ) from loopy.loop import fuse_loop_domains diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 42f83bfdf..91c22dbeb 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -90,11 +90,16 @@ class LoopKernel(RecordWithoutPickling): .. attribute:: domains a list of :class:`islpy.BasicSet` instances + representing the :ref:`domain-tree`. .. attribute:: instructions + + A list of :class:`InstructionBase` instances, e.g. + :class:`Assignment`. See :ref:`instructions`. + .. attribute:: args - A list of :class:`loopy.kernel.data.KernelArgument` + A list of :class:`loopy.KernelArgument` .. attribute:: schedule @@ -108,7 +113,7 @@ class LoopKernel(RecordWithoutPickling): .. attribute:: temporary_variables A :class:`dict` of mapping variable names to - :class:`loopy.kernel.data.TemporaryVariable` + :class:`loopy.TemporaryVariable` instances. .. attribute:: iname_to_tag @@ -155,7 +160,7 @@ class LoopKernel(RecordWithoutPickling): .. attribute:: target - A subclass of :class:`loopy.target.TargetBase`. + A subclass of :class:`loopy.TargetBase`. """ # {{{ constructor diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index f7c478dff..6a5d523e6 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1011,8 +1011,22 @@ def resolve_wildcard_deps(knl): def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): """User-facing kernel creation entrypoint. - :arg domains: :class:`islpy.BasicSet` + :arg domains: + + A list of :class:`islpy.BasicSet` (i.e. convex set) instances + representing the :ref:`domain-tree`. May also be a list of strings + which will be parsed into such instances according to :ref:`isl-syntax`. + :arg instructions: + + A list of :class:`Assignment` (or other :class:`InstructionBase` + subclasses), possibly intermixed with instances of + :class:`SubstitutionRule`. This same list may also contain strings + which will be parsed into such objects using the + :ref:`assignment-syntax` and the :ref:`subst-rule-syntax`. May also be + a single multi-line string which will be split into lines and then + parsed. + :arg kernel_data: A list of :class:`ValueArg`, :class:`GlobalArg`, ... (etc.) instances. @@ -1054,7 +1068,7 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): strides. They are expanded only upon kernel creation. :arg default_order: "C" (default) or "F" :arg default_offset: 0 or :class:`loopy.auto`. The default value of - *offset* in :attr:`loopy.kernel.data.GlobalArg` for guessed arguments. + *offset* in :attr:`GlobalArg` for guessed arguments. Defaults to 0. :arg function_manglers: list of functions of signature (name, arg_dtypes) returning a tuple (result_dtype, c_name) @@ -1074,8 +1088,8 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): to silence :arg options: an instance of :class:`loopy.Options` or an equivalent string representation - :arg target: an instance of :class:`loopy.target.TargetBase`, or *None*, - to use the default target. (see :func:`loopy.set_default_target`) + :arg target: an instance of :class:`loopy.TargetBase`, or *None*, + to use the default target. """ defines = kwargs.pop("defines", {}) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9d0848788..5b0cf57e5 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -185,6 +185,8 @@ def parse_tag(tag): # {{{ arguments class KernelArgument(Record): + """Base class for all argument types""" + def __init__(self, **kwargs): kwargs["name"] = intern(kwargs.pop("name")) @@ -418,7 +420,9 @@ class SubstitutionRule(Record): # {{{ base class class InstructionBase(Record): - """ + """A base class for all types of instruction that can occur in + a kernel. + .. attribute:: id An (otherwise meaningless) identifier that is unique within @@ -427,7 +431,7 @@ class InstructionBase(Record): .. attribute:: depends_on a :class:`frozenset` of :attr:`id` values of :class:`Instruction` instances - that *must* be executed before this one. Note that + that *must* be executed before this one. Note that :func:`loopy.preprocess_kernel` (usually invoked automatically) augments this by adding dependencies on any writes to temporaries read by this instruction. @@ -493,6 +497,14 @@ class InstructionBase(Record): A tuple of string identifiers that can be used to identify groups of instructions. + + .. automethod:: __init__ + .. automethod:: assignees_and_indices + .. automethod:: with_transformed_expressions + .. automethod:: write_dependency_names + .. automethod:: dependency_names + .. automethod:: assignee_var_names + .. automethod:: copy """ fields = set("id depends_on depends_on_is_final " @@ -568,10 +580,12 @@ class InstructionBase(Record): predicates=predicates, tags=tags) + # legacy @property def insn_deps(self): return self.depends_on + # legacy @property def insn_deps_is_final(self): return self.depends_on_is_final @@ -740,6 +754,8 @@ class Assignment(InstructionBase): if not *None*, a type that will be assigned to the new temporary variable created from the assignee + + .. automethod:: __init__ """ fields = InstructionBase.fields | \ diff --git a/loopy/statistics.py b/loopy/statistics.py index 40bf6da2b..f0a01463a 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -35,6 +35,20 @@ from loopy.kernel.data import Assignment from loopy.diagnostic import warn +__doc__ = """ + +.. currentmodule:: loopy + +.. autofunction:: get_op_poly + +.. autofunction:: get_gmem_access_poly + +.. autofunction:: sum_mem_access_to_bytes + +.. autofunction:: get_barrier_poly + +""" + # {{{ ToCountMap class ToCountMap: diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index ee28594a5..85e58a809 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -24,9 +24,26 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +__doc__ = """ + +.. currentmodule:: loopy + +.. autoclass:: TargetBase +.. autoclass:: CTarget +.. autoclass:: CudaTarget +.. autoclass:: OpenCLTarget +.. autoclass:: PyOpenCLTarget +.. autoclass:: ISPCTarget + +""" + class TargetBase(object): - """Objects of this type must be picklable.""" + """Base class for all targets, i.e. different types of code that + loopy can generate. + + Objects of this type must be picklable. + """ # {{{ persistent hashing diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 73310c4db..5d6a856d1 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -56,6 +56,9 @@ def _preamble_generator(kernel, seen_dtypes, seen_functions): class CTarget(TargetBase): + """A target for plain "C", without any parallel extensions. + """ + hash_fields = TargetBase.hash_fields + ("fortran_abi",) comparison_fields = TargetBase.comparison_fields + ("fortran_abi",) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 94a144cb9..55f8da4d6 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -162,6 +162,8 @@ class LoopyCudaCCodeMapper(LoopyCCodeMapper): # {{{ target class CudaTarget(CTarget): + """A target for Nvidia's CUDA GPU programming language.""" + def __init__(self, extern_c=True): """ :arg extern_c: If *True*, declare kernels using "extern C" to diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 737ee0d99..2c751e103 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -55,6 +55,11 @@ class LoopyISPCCodeMapper(LoopyCCodeMapper): class ISPCTarget(CTarget): + """A code generation target for Intel's `ISPC <https://ispc.github.io/>`_ + SPMD programming language, to target Intel's Knight's hardware and modern + Intel CPUs with wide vector units. + """ + def __init__(self, occa_mode=False): """ :arg occa_mode: Whether to modify the generated call signature to diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index a009e9336..cfdc8620b 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -189,6 +189,9 @@ class LoopyOpenCLCCodeMapper(LoopyCCodeMapper): # {{{ target class OpenCLTarget(CTarget): + """A target for the OpenCL C heterogeneous compute programming language. + """ + # {{{ library def function_manglers(self): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index d13384534..3c9e8aac7 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -244,8 +244,17 @@ class _LegacyTypeRegistryStub(object): from pyopencl.compyte.dtypes import dtype_to_ctype return dtype_to_ctype(dtype) +# }}} + + +# {{{ target class PyOpenCLTarget(OpenCLTarget): + """A code generation target that takes special advantage of :mod:`pyopencl` + features such as run-time knowledge of the target device (to generate + warnings) and support for complex numbers. + """ + def __init__(self, device=None): super(PyOpenCLTarget, self).__init__() diff --git a/loopy/transform/arithmetic.py b/loopy/transform/arithmetic.py index a68300929..d41222c26 100644 --- a/loopy/transform/arithmetic.py +++ b/loopy/transform/arithmetic.py @@ -25,108 +25,9 @@ THE SOFTWARE. import six -from loopy.symbolic import (RuleAwareIdentityMapper, - SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError -# {{{ split_reduction - -class _ReductionSplitter(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, within, inames, direction): - super(_ReductionSplitter, self).__init__( - rule_mapping_context) - - self.within = within - self.inames = inames - self.direction = direction - - def map_reduction(self, expr, expn_state): - if set(expr.inames) & set(expn_state.arg_context): - # FIXME - raise NotImplementedError() - - if (self.inames <= set(expr.inames) - and self.within( - expn_state.kernel, - expn_state.instruction, - expn_state.stack)): - leftover_inames = set(expr.inames) - self.inames - - from loopy.symbolic import Reduction - if self.direction == "in": - return Reduction(expr.operation, tuple(leftover_inames), - Reduction(expr.operation, tuple(self.inames), - self.rec(expr.expr, expn_state))) - elif self.direction == "out": - return Reduction(expr.operation, tuple(self.inames), - Reduction(expr.operation, tuple(leftover_inames), - self.rec(expr.expr, expn_state))) - else: - assert False - else: - return super(_ReductionSplitter, self).map_reduction(expr, expn_state) - - -def _split_reduction(kernel, inames, direction, within=None): - if direction not in ["in", "out"]: - raise ValueError("invalid value for 'direction': %s" % direction) - - if isinstance(inames, str): - inames = inames.split(",") - inames = set(inames) - - from loopy.context_matching import parse_stack_match - within = parse_stack_match(within) - - rule_mapping_context = SubstitutionRuleMappingContext( - kernel.substitutions, kernel.get_var_name_generator()) - rsplit = _ReductionSplitter(rule_mapping_context, - within, inames, direction) - return rule_mapping_context.finish_kernel( - rsplit.map_kernel(kernel)) - - -def split_reduction_inward(kernel, inames, within=None): - """Takes a reduction of the form:: - - sum([i,j,k], ...) - - and splits it into two nested reductions:: - - sum([j,k], sum([i], ...)) - - In this case, *inames* would have been ``"i"`` indicating that - the iname ``i`` should be made the iname governing the inner reduction. - - :arg inames: A list of inames, or a comma-separated string that can - be parsed into those - """ - - return _split_reduction(kernel, inames, "in", within) - - -def split_reduction_outward(kernel, inames, within=None): - """Takes a reduction of the form:: - - sum([i,j,k], ...) - - and splits it into two nested reductions:: - - sum([i], sum([j,k], ...)) - - In this case, *inames* would have been ``"i"`` indicating that - the iname ``i`` should be made the iname governing the outer reduction. - - :arg inames: A list of inames, or a comma-separated string that can - be parsed into those - """ - - return _split_reduction(kernel, inames, "out", within) - -# }}} - - # {{{ fold constants def fold_constants(kernel): diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 59239104e..1dc54f94b 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -29,6 +29,10 @@ from loopy.symbolic import (RuleAwareIdentityMapper, SubstitutionRuleMappingCont from loopy.kernel.data import ValueArg, GlobalArg import islpy as isl +__doc__ = """ +.. autofunction:: to_batched +""" + # {{{ to_batched diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8cf16bfd3..728934915 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -35,25 +35,38 @@ from loopy.symbolic import ( from loopy.diagnostic import LoopyError -# {{{ assume +__doc__ = """ +.. currentmodule:: loopy -def assume(kernel, assumptions): - if isinstance(assumptions, str): - assumptions_set_str = "[%s] -> { : %s}" \ - % (",".join(s for s in kernel.outer_params()), - assumptions) - assumptions = isl.BasicSet.read_from_str(kernel.domains[0].get_ctx(), - assumptions_set_str) +.. autofunction:: split_iname - if not isinstance(assumptions, isl.BasicSet): - raise TypeError("'assumptions' must be a BasicSet or a string") +.. autofunction:: chunk_iname - old_assumptions, new_assumptions = isl.align_two(kernel.assumptions, assumptions) +.. autofunction:: join_inames - return kernel.copy( - assumptions=old_assumptions.params() & new_assumptions.params()) +.. autofunction:: tag_inames -# }}} +.. autofunction:: duplicate_inames + +.. undocumented .. autofunction:: link_inames + +.. autofunction:: rename_iname + +.. autofunction:: remove_unused_inames + +.. autofunction:: set_loop_priority + +.. autofunction:: split_reduction_inward + +.. autofunction:: split_reduction_outward + +.. autofunction:: affine_map_inames + +.. autofunction:: realize_ilp + +.. autofunction:: find_unused_axis_tag + +""" # {{{ set loop priority @@ -1021,6 +1034,103 @@ def remove_unused_inames(knl, inames=None): # }}} +# {{{ split_reduction + +class _ReductionSplitter(RuleAwareIdentityMapper): + def __init__(self, rule_mapping_context, within, inames, direction): + super(_ReductionSplitter, self).__init__( + rule_mapping_context) + + self.within = within + self.inames = inames + self.direction = direction + + def map_reduction(self, expr, expn_state): + if set(expr.inames) & set(expn_state.arg_context): + # FIXME + raise NotImplementedError() + + if (self.inames <= set(expr.inames) + and self.within( + expn_state.kernel, + expn_state.instruction, + expn_state.stack)): + leftover_inames = set(expr.inames) - self.inames + + from loopy.symbolic import Reduction + if self.direction == "in": + return Reduction(expr.operation, tuple(leftover_inames), + Reduction(expr.operation, tuple(self.inames), + self.rec(expr.expr, expn_state))) + elif self.direction == "out": + return Reduction(expr.operation, tuple(self.inames), + Reduction(expr.operation, tuple(leftover_inames), + self.rec(expr.expr, expn_state))) + else: + assert False + else: + return super(_ReductionSplitter, self).map_reduction(expr, expn_state) + + +def _split_reduction(kernel, inames, direction, within=None): + if direction not in ["in", "out"]: + raise ValueError("invalid value for 'direction': %s" % direction) + + if isinstance(inames, str): + inames = inames.split(",") + inames = set(inames) + + from loopy.context_matching import parse_stack_match + within = parse_stack_match(within) + + rule_mapping_context = SubstitutionRuleMappingContext( + kernel.substitutions, kernel.get_var_name_generator()) + rsplit = _ReductionSplitter(rule_mapping_context, + within, inames, direction) + return rule_mapping_context.finish_kernel( + rsplit.map_kernel(kernel)) + + +def split_reduction_inward(kernel, inames, within=None): + """Takes a reduction of the form:: + + sum([i,j,k], ...) + + and splits it into two nested reductions:: + + sum([j,k], sum([i], ...)) + + In this case, *inames* would have been ``"i"`` indicating that + the iname ``i`` should be made the iname governing the inner reduction. + + :arg inames: A list of inames, or a comma-separated string that can + be parsed into those + """ + + return _split_reduction(kernel, inames, "in", within) + + +def split_reduction_outward(kernel, inames, within=None): + """Takes a reduction of the form:: + + sum([i,j,k], ...) + + and splits it into two nested reductions:: + + sum([i], sum([j,k], ...)) + + In this case, *inames* would have been ``"i"`` indicating that + the iname ``i`` should be made the iname governing the outer reduction. + + :arg inames: A list of inames, or a comma-separated string that can + be parsed into those + """ + + return _split_reduction(kernel, inames, "out", within) + +# }}} + + # {{{ affine map inames def affine_map_inames(kernel, old_inames, new_inames, equations): diff --git a/loopy/transform/parameter.py b/loopy/transform/parameter.py index 1567263cd..f7600b212 100644 --- a/loopy/transform/parameter.py +++ b/loopy/transform/parameter.py @@ -28,6 +28,42 @@ from loopy.symbolic import (RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) import islpy as isl +__doc__ = """ + +.. currentmodule:: loopy + +.. autofunction:: fix_parameters + +.. autofunction:: assume +""" + + +# {{{ assume + +def assume(kernel, assumptions): + """Include an assumption about :ref:`domain-parameters` in the kernel, e.g. + `n mod 4 = 0`. + + :arg assumptions: a :class:`islpy.BasicSet` or a string representation of + the assumptions in :ref:`isl-syntax`. + """ + if isinstance(assumptions, str): + assumptions_set_str = "[%s] -> { : %s}" \ + % (",".join(s for s in kernel.outer_params()), + assumptions) + assumptions = isl.BasicSet.read_from_str(kernel.domains[0].get_ctx(), + assumptions_set_str) + + if not isinstance(assumptions, isl.BasicSet): + raise TypeError("'assumptions' must be a BasicSet or a string") + + old_assumptions, new_assumptions = isl.align_two(kernel.assumptions, assumptions) + + return kernel.copy( + assumptions=old_assumptions.params() & new_assumptions.params()) + +# }}} + # {{{ fix_parameter @@ -99,6 +135,13 @@ def _fix_parameter(kernel, name, value): def fix_parameters(kernel, **value_dict): + """Fix the values of the arguments to specific constants. + + *value_dict* consists of *name*/*value* pairs, where *name* will be fixed + to be *value*. *name* may refer to :ref:`domain-parameters` or + :ref:`arguments`. + """ + for name, value in six.iteritems(value_dict): kernel = _fix_parameter(kernel, name, value) diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 7623fb891..9ce1f9c54 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -289,7 +289,7 @@ class AssignmentToSubstChanger(RuleAwareIdentityMapper): def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, force_retain_argument=False): """Extract an assignment (to a temporary variable or an argument) - as a :ref:`substituiton-rule`. The temporary may be an array, in + as a :ref:`substitution-rule`. The temporary may be an array, in which case the array indices will become arguments to the substitution rule. -- GitLab