diff --git a/doc/source/tools.rst b/doc/source/tools.rst index 845d9ceae0d43714d33f509e099e3ac49e463d8b..d69a31340ef1cc4ee707984311f7eba43715a095 100644 --- a/doc/source/tools.rst +++ b/doc/source/tools.rst @@ -100,3 +100,8 @@ Testing PYOPENCL_TEST_PLATFORM_BLACKLIST=nvidia,intel PYOPENCL_TEST_DEVICE_BLACKLIST=nvidia:260,intel:i5 + +Device Characterization +----------------------- + +.. automodule:: pyopencl.characterize diff --git a/pyopencl/characterize.py b/pyopencl/characterize.py index 9a3d233af61bb8da554cd481bab226632aa83e82..d3f0344c89db60451ea3b5b3e18c5ee715285254 100644 --- a/pyopencl/characterize.py +++ b/pyopencl/characterize.py @@ -48,6 +48,10 @@ def reasonable_work_group_size_multiple(dev, ctx=None): def nv_compute_capability(dev): + """If *dev* is an Nvidia GPU :class:`pyopencl.Device`, return a tuple + *(major, minor)* indicating the device's compute capability. + """ + try: return (dev.compute_capability_major_nv, dev.compute_capability_minor_nv) @@ -61,6 +65,7 @@ def usable_local_mem_size(dev, nargs=None): """Return an estimate of the usable local memory size. :arg nargs: Number of 32-bit arguments passed. """ + usable_local_mem_size = dev.local_mem_size nv_compute_cap = nv_compute_capability(dev) @@ -165,6 +170,10 @@ def why_not_local_access_conflict_free(dev, itemsize, :param itemsize: size of accessed data in bytes :param array_shape: array dimensions, fastest-moving last (C order) + + :returns: a tuple (multiplicity, explanation), where *multiplicity* + is the number of work items that will conflict on a bank when accessing + local memory. *explanation* is a string detailing the found conflict. """ # FIXME: Treat 64-bit access on NV CC 2.x + correctly