diff --git a/doc/index.rst b/doc/index.rst index 8f114eb72cdc530dd4109257c4981118c5046f06..052ee20596ba023c409035be7a01c2a4f3283726 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -47,6 +47,7 @@ Please check :ref:`installation` to get started. ref_transform ref_other misc + ref_internals Indices and tables ================== diff --git a/doc/ref_creation.rst b/doc/ref_creation.rst index 3f1035d27332f4a7e3233ef72a3d25a7190f48e8..05e0edb88245086cabea806e5aa108fa6688a9a8 100644 --- a/doc/ref_creation.rst +++ b/doc/ref_creation.rst @@ -1,6 +1,4 @@ -.. moduleauthor:: Andreas Kloeckner <inform@tiker.net> -.. module:: loopy - +.. currentmodule:: loopy .. _creating-kernels: Reference: Creating Kernels @@ -32,10 +30,4 @@ To Copy between Data Formats .. automodule:: loopy.version -Checks ------- -Before code generation phase starts a series of checks are performed. - -.. automodule:: loopy.check - .. vim: tw=75:spell:fdm=marker diff --git a/doc/ref_internals.rst b/doc/ref_internals.rst index 3c0b994011aad2ae9554fbd0fc2439fcda432fd6..3dc0a2bd7306e4b7e68d44e5956fe69e32c9c97f 100644 --- a/doc/ref_internals.rst +++ b/doc/ref_internals.rst @@ -1,7 +1,6 @@ Reference: Documentation for Internal API ========================================= - Targets ------- @@ -9,16 +8,13 @@ See also :ref:`targets`. .. automodule:: loopy.target.c - Symbolic -------- See also :ref:`expression-syntax`. - .. automodule:: loopy.symbolic - Types ----- @@ -28,30 +24,30 @@ the codegen pipeline user-provided types are converted to .. automodule:: loopy.types - Codegen ------- .. automodule:: loopy.codegen - Reduction Operation ------------------- .. automodule:: loopy.library.reduction - Iname Tags ---------- .. automodule:: loopy.kernel.data - Array ----- .. automodule:: loopy.kernel.array +Checks +------ + +.. automodule:: loopy.check Schedule -------- diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index a16e9100ff61b498a88dbf8c759b34ad3cc86ac6..d339e1b19caae740401c5b98ffbf8927d2477551 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -360,8 +360,6 @@ These are usually key-value pairs. The following attributes are recognized: Expressions ^^^^^^^^^^^ -.. automodule:: loopy.symbolic - Loopy's expressions are a slight superset of the expressions supported by :mod:`pymbolic`. @@ -472,7 +470,7 @@ Temporary Variables Temporary variables model OpenCL's ``private`` and ``local`` address spaces. Both have the lifetime of a kernel invocation. -.. autoclass:: temp_var_scope +.. autoclass:: AddressSpace .. autoclass:: TemporaryVariable :members: @@ -638,11 +636,8 @@ Do not create :class:`LoopKernel` objects directly. Instead, refer to :members: :undoc-members: -Implementation Details ----------------------- - -The Base Array -^^^^^^^^^^^^^^ +Implementation Details: The Base Array +-------------------------------------- All array-like data in :mod:`loopy` (such as :class:`ArrayArg` and :class:`TemporaryVariable`) derive from single, shared base array type, @@ -653,14 +648,4 @@ described next. .. autoclass:: ArrayBase -Types and pickling -^^^^^^^^^^^^^^^^^^ - -DTypes of variables in a :class:`loopy.LoopKernel` must be picklable, so in -the codegen pipeline user-provided types are converted to -:class:`loopy.types.LoopyTypes`. - -.. automodule:: loopy.types - - .. vim: tw=75:spell:fdm=marker diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 97014464db15ac41fcedfaa9c6124d7048311aca..05f928dea515a934dc365bcb5744dd887711c556 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -179,11 +179,11 @@ for good measure. >>> assert (out.get() == (2*x_vec_dev).get()).all() We can have loopy print the OpenCL kernel it generated -by passing :attr:`loopy.Options.write_cl`. +by passing :attr:`loopy.Options.write_code`. .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -227,7 +227,7 @@ inspect that code, too, using :attr:`loopy.Options.write_wrapper`: .. doctest:: - >>> knl = lp.set_options(knl, write_wrapper=True, write_cl=False) + >>> knl = lp.set_options(knl, write_wrapper=True, write_code=False) >>> evt, (out,) = knl(queue, a=x_vec_host) from __future__ import division ... @@ -247,11 +247,11 @@ inspect that code, too, using :attr:`loopy.Options.write_wrapper`: ... You can also pass options to the OpenCL implementation -by passing :attr:`loopy.Options.cl_build_options`. +by passing :attr:`loopy.Options.build_options`. .. doctest:: - >>> knl = lp.set_options(knl, cl_build_options=["-cl-mad-enable"]) + >>> knl = lp.set_options(knl, build_options=["-cl-mad-enable"]) Generating code @@ -260,12 +260,12 @@ Generating code Instead of using loopy to run the code it generates, you can also just use loopy as a code generator and take care of executing the generated kernels yourself. In this case, make sure loopy knows about all types, and then -call :func:`loopy.generate_code`: +call :func:`loopy.generate_code_v2`: .. doctest:: >>> typed_knl = lp.add_dtypes(knl, dict(a=np.float32)) - >>> code, _ = lp.generate_code(typed_knl) + >>> code = lp.generate_code_v2(typed_knl).device_code() >>> print(code) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -388,7 +388,7 @@ Let us take a look at the generated code for the above kernel: .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> knl = lp.prioritize_loops(knl, "i,j") >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) @@ -438,7 +438,7 @@ Now the intended code is generated and our test passes. .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -493,7 +493,7 @@ ambiguous. .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -531,7 +531,7 @@ is overwritten with the new kernel:: knl = lp.do_something(knl, arguments...) We've already seen an example of a transformation above: -For instance, :func:`prioritize_loops` fit the pattern. +For instance, :func:`loopy.prioritize_loops` fit the pattern. :func:`loopy.split_iname` is another fundamental (and useful) transformation. It turns one existing iname (recall that this is loopy's word for a 'loop @@ -551,7 +551,7 @@ Consider this example: ... "a[i] = 0", assumptions="n>=1") >>> knl = lp.split_iname(knl, "i", 16) >>> knl = lp.prioritize_loops(knl, "i_outer,i_inner") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -582,7 +582,7 @@ relation to loop nesting. For example, it's perfectly possible to request ... "a[i] = 0", assumptions="n>=1") >>> knl = lp.split_iname(knl, "i", 16) >>> knl = lp.prioritize_loops(knl, "i_inner,i_outer") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -607,7 +607,7 @@ commonly called 'loop tiling': >>> knl = lp.split_iname(knl, "i", 16) >>> knl = lp.split_iname(knl, "j", 16) >>> knl = lp.prioritize_loops(knl, "i_outer,j_outer,i_inner") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=a_mat_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -649,7 +649,7 @@ loop's tag to ``"unr"``: >>> knl = lp.split_iname(knl, "i", 4) >>> knl = lp.tag_inames(knl, dict(i_inner="unr")) >>> knl = lp.prioritize_loops(knl, "i_outer,i_inner") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) @@ -724,7 +724,7 @@ Let's try this out on our vector fill kernel by creating workgroups of size ... "a[i] = 0", assumptions="n>=0") >>> knl = lp.split_iname(knl, "i", 128, ... outer_tag="g.0", inner_tag="l.0") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -770,7 +770,7 @@ assumption: >>> knl = lp.split_iname(knl, "i", 4) >>> knl = lp.tag_inames(knl, dict(i_inner="unr")) >>> knl = lp.prioritize_loops(knl, "i_outer,i_inner") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -789,7 +789,7 @@ assumption: While these conditionals enable the generated code to deal with arbitrary *n*, they come at a performance cost. Loopy allows generating separate code for the last iteration of the *i_outer* loop, by using the *slabs* keyword -argument to :func:`split_iname`. Since this last iteration of *i_outer* is +argument to :func:`loopy.split_iname`. Since this last iteration of *i_outer* is the only iteration for which ``i_inner + 4*i_outer`` can become larger than *n*, only the (now separate) code for that iteration contains conditionals, enabling some cost savings: @@ -798,7 +798,7 @@ enabling some cost savings: >>> knl = orig_knl >>> knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> knl = lp.prioritize_loops(knl, "i_outer,i_inner") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) @@ -894,7 +894,7 @@ memory, local to each work item. .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out1, out2) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -955,7 +955,7 @@ Consider the following example: ... """) >>> knl = lp.tag_inames(knl, dict(i_outer="g.0", i_inner="l.0")) >>> knl = lp.set_temporary_scope(knl, "a_temp", "local") - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> evt, (out,) = knl(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) ... @@ -1020,7 +1020,7 @@ transformation exists in :func:`loopy.add_prefetch`: ... out[16*i_outer + i_inner] = sum(k, a[16*i_outer + i_inner]) ... """) >>> knl = lp.tag_inames(knl, dict(i_outer="g.0", i_inner="l.0")) - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> knl_pf = lp.add_prefetch(knl, "a") >>> evt, (out,) = knl_pf(queue, a=x_vec_dev) #define lid(N) ((int) get_local_id(N)) @@ -1118,7 +1118,7 @@ work item: * *Local barriers* ensure consistency of memory accesses to items within *the same* work group. This synchronizes with all instructions in the work group. The type of memory (local or global) may be specified by the - :attr:`loopy.instruction.BarrierInstruction.mem_kind` + :attr:`loopy.BarrierInstruction.mem_kind` * *Global barriers* ensure consistency of memory accesses across *all* work groups, i.e. it synchronizes with every work item @@ -1366,7 +1366,7 @@ a loopy kernel by simply calling them, e.g.:: Additionally, all functions of one variable are currently recognized during code-generation however additional implementation may be required for custom functions. The full lists of available functions may be found in a the -:class:`TargetBase` implementation (e.g. :class:`CudaTarget`) +:class:`loopy.TargetBase` implementation (e.g. :class:`loopy.CudaTarget`) Custom user functions may be represented using the method described in :ref:`functions` @@ -1476,7 +1476,7 @@ When we ask to see the code, the issue becomes apparent: .. doctest:: - >>> knl = lp.set_options(knl, "write_cl") + >>> knl = lp.set_options(knl, "write_code") >>> from warnings import catch_warnings >>> with catch_warnings(): ... filterwarnings("always", category=lp.LoopyWarning) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index b4811dc9966921fa612aabef9a726d6b53fd4052..f775574e493a40bdd65aad76a66a127686cc2d17 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -35,6 +35,22 @@ from loopy.version import DATA_MODEL_VERSION import logging logger = logging.getLogger(__name__) +__doc__ = """ +.. currentmodule:: loopy.codegen + +.. autoclass:: ImplementedDataInfo + +.. autoclass:: PreambleInfo + +.. autoclass:: VectorizationInfo + +.. autoclass:: SeenFunction + +.. autoclass:: CodeGenerationState + +.. automodule:: loopy.codegen.result +""" + # {{{ implemented data info diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 4318ad71c1b16deeaac98f8408d5ca82f2de1714..980fe82eaf2ef614d1cd4f4b62d40c0f1998aa47 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -43,6 +43,19 @@ def process_preambles(preambles): for lines in dedup_preambles] +__doc__ = """ +.. currentmodule:: loopy.codegen.result + +.. autoclass:: GeneratedProgram + +.. autoclass:: CodeGenerationResult + +.. autofunction:: merge_codegen_results + +.. autofunction:: generate_host_or_device_program +""" + + # {{{ code generation result class GeneratedProgram(ImmutableRecord): diff --git a/loopy/compiled.py b/loopy/compiled.py index 613bca56fc1de23a66d45d8f990f91f9d3f9b949..baf57e2f96379aa939e949d52a5514cf36b4c0ac 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -30,6 +30,9 @@ from loopy.target.pyopencl_execution import ( # noqa # {{{ compatibility class CompiledKernel(PyOpenCLKernelExecutor): + """ + .. automethod:: __call__ + """ def __init__(self, context, kernel): from warnings import warn warn("CompiledKernel is deprecated. Use LoopKernel.__call__ directly.", diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 2d926aad4faa511aa2919630c9b0e96b7f253ad9..76785e79dd94a76193034d6afb014db26bf7e2ba 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -238,6 +238,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: target A subclass of :class:`loopy.TargetBase`. + + .. automethod:: __call__ + .. automethod:: copy """ # {{{ constructor @@ -1439,6 +1442,9 @@ class LoopKernel(ImmutableRecordWithoutPickling): # {{{ direct execution def __call__(self, *args, **kwargs): + """ + Execute the :class:`LoopKernel`. + """ key = self.target.get_kernel_executor_cache_key(*args, **kwargs) try: kex = self._kernel_executor_cache[key] diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 15b936fb9dce9a9fc5d82fa203253b910142ce9e..bbaf347ba6e0bbf6012ae91d2a3a9da7f834de66 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -38,6 +38,25 @@ from loopy.diagnostic import LoopyError from loopy.tools import is_integer +__doc__ = """ +.. currentmodule:: loopy.kernel.array + +.. autoclass:: ArrayDimImplementationTag + +.. autoclass:: _StrideArrayDimTagBase + +.. autoclass:: FixedStrideArrayDimTag + +.. autoclass:: ComputedStrideArrayDimTag + +.. autoclass:: SeparateArrayArrayDimTag + +.. autoclass:: VectorArrayDimTag + +.. autofunction:: parse_array_dim_tags +""" + + # {{{ array dimension tags class ArrayDimImplementationTag(ImmutableRecord): @@ -69,9 +88,8 @@ class _StrideArrayDimTagBase(ArrayDimImplementationTag): The lowest nesting level varies fastest when viewed in linear memory. - May be None on :class:`FixedStrideArrayDimTag`, in which - case no :class:`ComputedStrideArrayDimTag` instances may - occur. + May be None on :class:`FixedStrideArrayDimTag`, in which case no + :class:`ComputedStrideArrayDimTag` instances may occur. """ @@ -132,8 +150,8 @@ class ComputedStrideArrayDimTag(_StrideArrayDimTagBase): :attr:`ArrayBase.dtype` granularity to which to pad this dimension - This type of stride arg dim gets converted to :class:`FixedStrideArrayDimTag` - on input to :class:`ArrayBase` subclasses. + This type of stride arg dim gets converted to + :class:`FixedStrideArrayDimTag` on input to :class:`ArrayBase` subclasses. """ def __init__(self, layout_nesting_level, pad_to=None, target_axis=0, ): @@ -653,7 +671,7 @@ class ArrayBase(ImmutableRecord): or a string which can be parsed into the previous form. :arg dim_tags: A comma-separated list of tags as understood by - :func:`parse_array_dim_tag`. + :func:`loopy.kernel.array.parse_array_dim_tags`. :arg strides: May be one of the following: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index a67af6572ff023efe69f264a9771cc8a5ce4ffb7..82e9d36c225fb60d00a725371201d687d9a418e7 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -45,6 +45,28 @@ from loopy.kernel.instruction import ( # noqa CInstruction) from warnings import warn +__doc__ = """ +.. currentmodule:: loopy.kernel.data + +.. autofunction:: filter_iname_tags_by_type + +.. autoclass:: IndexTag + +.. autoclass:: ConcurrentTag + +.. autoclass:: UniqueTag + +.. autoclass:: AxisTag + +.. autoclass:: LocalIndexTag + +.. autoclass:: GroupIndexTag + +.. autoclass:: VectorizeTag + +.. autoclass:: UnrollTag +""" + class auto(object): # noqa """A generic placeholder object for something that should be automatically @@ -787,12 +809,12 @@ class CallMangleInfo(ImmutableRecord): .. attribute:: result_dtypes - A tuple of :class:`LoopyType` instances indicating what + A tuple of :class:`loopy.types.LoopyType` instances indicating what types of values the function returns. .. attribute:: arg_dtypes - A tuple of :class:`LoopyType` instances indicating what + A tuple of :class:`loopy.types.LoopyType` instances indicating what types of arguments the function actually receives. """ diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 53d05a28e7245e381be769af12d6066ffb486541..aacce544b35a31359cb535dfeacc46d6e7e2acda 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -30,6 +30,22 @@ from loopy.symbolic import FunctionIdentifier from loopy.diagnostic import LoopyError from loopy.types import NumpyType +__doc__ = """ +.. currentmodule:: loopy.library.reduction + +.. autoclass:: ReductionOperation + +.. autoclass:: ScalarReductionOperation + +.. autoclass:: SumReductionOperation + +.. autoclass:: ProductReductionOperation + +.. autoclass:: MaxReductionOperation + +.. autoclass:: MinReductionOperation +""" + class ReductionOperation(object): """Subclasses of this type have to be hashable, picklable, and @@ -414,7 +430,7 @@ _REDUCTION_OP_PARSERS = [ def register_reduction_parser(parser): - """Register a new :class:`ReductionOperation`. + """Register a new :class:`loopy.library.reduction.ReductionOperation`. :arg parser: A function that receives a string and returns a subclass of ReductionOperation. diff --git a/loopy/options.py b/loopy/options.py index ede2b3f59748072828f5fc4ed5eaaf412d17c0fb..8f62926960e7b21b240be8754a21800107621ef3 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -111,7 +111,7 @@ class Options(ImmutableRecord): .. attribute:: cl_exec_manage_array_events Within the PyOpenCL executor, respect and udpate - :attr:`pyopencl.array.Array.event`. + :attr:`pyopencl.array.Array.events`. Defaults to *True*. @@ -140,7 +140,7 @@ class Options(ImmutableRecord): .. attribute:: edit_code Invoke an editor (given by the environment variable - :envvar:`EDITOR`) on the generated kernel code, + ``EDITOR``) on the generated kernel code, allowing for tweaks before the code is passed on to the target for compilation. diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index b2b6553c5d9f7a92ec8f61a9b58ab4b54c7c64dc..4840d3999e680a096a61967ca32e484b7f95d490 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -39,6 +39,15 @@ import logging logger = logging.getLogger(__name__) +__doc__ = """ +.. currentmodule:: loopy.schedule + +.. autoclass:: ScheduleItem + +.. autoclass:: MinRecursionLimitForScheduling +""" + + # {{{ schedule items class ScheduleItem(ImmutableRecord): @@ -1775,8 +1784,8 @@ def generate_loop_schedules(kernel, debug_args={}): .. warning:: This function needs to be called inside (another layer) of a - :class:`MinRecursionLimitForScheduling` context manager, and the - context manager needs to end *after* the last reference to the + :class:`loopy.schedule.MinRecursionLimitForScheduling` context manager, + and the context manager needs to end *after* the last reference to the generators has gone out of scope. Otherwise, the high-recursion-limit generator chain may not be successfully garbage-collected and cause an internal error in the Python runtime. diff --git a/loopy/statistics.py b/loopy/statistics.py index 53ac44dc078debdf56a6227f5b3898bad992ff46..bde743ebd68b19994a8475bacb3dc1a80b7fbd8e 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -551,7 +551,7 @@ class MemAccess(Record): .. attribute:: dtype - A :class:`loopy.LoopyType` or :class:`numpy.dtype` that specifies the + A :class:`loopy.types.LoopyType` or :class:`numpy.dtype` that specifies the data type accessed. .. attribute:: lid_strides @@ -585,7 +585,7 @@ class MemAccess(Record): .. attribute:: variable_tag A :class:`str` that specifies the variable tag of a - :class:`pymbolic.primitives.TaggedVariable`. + :class:`loopy.symbolic.TaggedVariable`. .. attribute:: count_granularity @@ -1369,7 +1369,7 @@ def get_op_map(knl, numpy_types=True, count_redundant_work=False, :arg numpy_types: A :class:`bool` specifying whether the types in the returned mapping should be numpy types instead of - :class:`loopy.LoopyType`. + :class:`loopy.types.LoopyType`. :arg count_redundant_work: Based on usage of hardware axes or other specifics, a kernel may perform work redundantly. This :class:`bool` @@ -1531,7 +1531,7 @@ def get_mem_access_map(knl, numpy_types=True, count_redundant_work=False, :arg numpy_types: A :class:`bool` specifying whether the types in the returned mapping should be numpy types instead of - :class:`loopy.LoopyType`. + :class:`loopy.types.LoopyType`. :arg count_redundant_work: Based on usage of hardware axes or other specifics, a kernel may perform work redundantly. This :class:`bool` diff --git a/loopy/symbolic.py b/loopy/symbolic.py index be40777bc8d53fd20d13e13e02bfadd543fc9484..c8314642c8061679639fa4977f1efa2e33fe670b 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -404,7 +404,7 @@ class Literal(LoopyExpressionBase): .. note:: Only used in the output of - :mod:`loopy.target.c.expression.ExpressionToCExpressionMapper` (and + :mod:`loopy.target.c.codegen.expression.ExpressionToCExpressionMapper` (and similar mappers). Not for use in Loopy source representation. """ @@ -425,7 +425,7 @@ class ArrayLiteral(LoopyExpressionBase): .. note:: Only used in the output of - :mod:`loopy.target.c.expression.ExpressionToCExpressionMapper` (and + :mod:`loopy.target.c.codegen.expression.ExpressionToCExpressionMapper` (and similar mappers). Not for use in Loopy source representation. """ @@ -573,8 +573,7 @@ class TaggedVariable(LoopyExpressionBase, p.Variable): class Reduction(LoopyExpressionBase): """ - Represents a reduction operation on :attr:`exprs` - across :attr:`inames`. + Represents a reduction operation on :attr:`expr` across :attr:`inames`. .. attribute:: operation diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 73d2a6328af87cb51fb90d43efcde34d39aa8299..9389872f05212bc0d8dbeb4a5ff9f20fcad115a2 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -30,7 +30,6 @@ __doc__ = """ .. autoclass:: TargetBase .. autoclass:: ASTBuilderBase - .. autoclass:: CFamilyTarget .. autoclass:: CTarget .. autoclass:: ExecutableCTarget diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 909a8f6fb9f45e8175bd4e88c73d81fb18b81896..802cc7044bf73d51567e23a6eaac791982709d51 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -39,6 +39,16 @@ import pymbolic.primitives as p from pytools import memoize_method +__doc__ = """ +.. currentmodule loopy.target.c + +.. autoclass:: POD + +.. autoclass:: ScopingBlock + +.. automodule:: loopy.target.c.codegen.expression +""" + # {{{ dtype registry wrapper @@ -207,7 +217,7 @@ class POD(Declarator): class ScopingBlock(Block): """A block that is mandatory for scoping and may not be simplified away - by :func:`loopy.codegen.results.merge_codegen_results`. + by :func:`loopy.codegen.result.merge_codegen_results`. """ @@ -1046,7 +1056,7 @@ def generate_header(kernel, codegen_result=None): """ :arg kernel: a :class:`loopy.LoopKernel` :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult` - :returns: a list of AST nodes (which may have :func:`str` + :returns: a list of AST nodes (which may have :class:`str` called on them to produce a string) representing function declarations for the generated device functions. diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 6776fb3d2aa78e0dd138ec04fe0f3af044b7a543..bb9ab6355c2b15ae1435da510567d20643ac4792 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -47,9 +47,20 @@ from loopy.types import LoopyType from loopy.target.c import CExpression +__doc__ = """ +.. currentmodule:: loopy.target.c.codegen.expression + +.. autoclass:: ExpressionToCExpressionMapper +""" + + # {{{ Loopy expression to C expression mapper class ExpressionToCExpressionMapper(IdentityMapper): + """ + Mapper that converts a loopy-semantic expression to a C-semantic expression + with typecasts, appropriate arithmetic semantic mapping, etc. + """ def __init__(self, codegen_state, fortran_abi=False, type_inf_mapper=None): self.kernel = codegen_state.kernel self.codegen_state = codegen_state diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 822d7df991e2a2e6ccc0a1bd3fa026fd1b8cc0f7..30ab11f878df7d9691bbf1202480d005a3e79395 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -325,7 +325,7 @@ class PyOpenCLKernelExecutor(KernelExecutorBase): def __call__(self, queue, **kwargs): """ :arg allocator: a callable passed a byte count and returning - a :class:`pyopencl.Buffer`. A :class:`pyopencl` allocator + a :class:`pyopencl.Buffer`. A :mod:`pyopencl` allocator maybe. :arg wait_for: A list of :class:`pyopencl.Event` instances for which to wait. diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 1f0161c06868da4a7c71ba1ebf9eab8ef02eeb3d..5356d49038a142945c781e58943eb86492d12b3f 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -389,7 +389,7 @@ def tag_array_axes(knl, ary_names, dim_tags): """ :arg dim_tags: a tuple of :class:`loopy.kernel.array.ArrayDimImplementationTag` or a string that - parses to one. See :func:`loopy.kernel.array.parse_dim_tags` for a + parses to one. See :func:`loopy.kernel.array.parse_array_dim_tags` for a description of the allowed string format. For example, *dim_tags* could be ``"N2,N0,N1"`` to determine @@ -398,7 +398,7 @@ def tag_array_axes(knl, ary_names, dim_tags): .. versionchanged:: 2016.2 - This function was called :func:`tag_data_axes` before version 2016.2. + This function was called ``tag_data_axes`` before version 2016.2. """ from loopy.kernel.tools import ArrayChanger @@ -434,7 +434,7 @@ def set_array_axis_names(kernel, ary_names, dim_names): """ .. versionchanged:: 2016.2 - This function was called :func:`set_array_dim_names` before version 2016.2. + This function was called ``set_array_dim_names`` before version 2016.2. """ from loopy.kernel.tools import ArrayChanger if isinstance(ary_names, str): @@ -669,7 +669,7 @@ def set_temporary_scope(kernel, temp_var_names, scope): :arg temp_var_names: a container with membership checking, or a comma-separated string of variables for which the scope is to be set. - :arg scope: One of the values from :class:`AddressSpace`, or one + :arg scope: One of the values from :class:`loopy.AddressSpace`, or one of the strings ``"private"``, ``"local"``, or ``"global"``. """ diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index de705660a036ca3f9791632abcfd92c5f4d4f990..2ae00d365f41144993af63d7073477d6d021aa2f 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1572,9 +1572,9 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): :func:`loopy.match.parse_match`. :arg kind: may be "l" or "g", or the corresponding tag class name - :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` - that is not being used within the instructions matched by - *insn_match*. + :returns: an :class:`loopy.kernel.data.GroupIndexTag` or + :class:`loopy.kernel.data.LocalIndexTag` that is not being used within + the instructions matched by *insn_match*. """ used_axes = set() @@ -1740,9 +1740,9 @@ def add_inames_to_insn(knl, inames, insn_match): :arg insn_match: An instruction match as understood by :func:`loopy.match.parse_match`. - :returns: an :class:`GroupIndexTag` or :class:`LocalIndexTag` - that is not being used within the instructions matched by - *insn_match*. + :returns: an :class:`loopy.kernel.data.GroupIndexTag` or + :class:`loopy.kernel.data.LocalIndexTag` that is not being used within + the instructions matched by *insn_match*. .. versionadded:: 2016.3 """ diff --git a/loopy/transform/padding.py b/loopy/transform/padding.py index e626d2680d37195a2b270ce5f0ce2f0cf72c079f..274f181fe1affe07f0161a0f927185878faebe04 100644 --- a/loopy/transform/padding.py +++ b/loopy/transform/padding.py @@ -385,7 +385,7 @@ def split_array_axis(kernel, array_names, axis_nr, count, order="C"): .. versionchanged:: 2016.2 There was a more complicated, dumber function called - :func:`loopy.split_array_dim` that had the role of this function in + ``loopy.split_array_dim`` that had the role of this function in versions prior to 2016.2. """