diff --git a/doc/source/array.rst b/doc/source/array.rst index a7df8eebbd52ab6a314bf628140da71b90e82aa6..946a88ccada0bf487ae9fabf6ca6a7e2185f46d7 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -443,7 +443,7 @@ Generating Arrays of Random Numbers .. module:: pyopencl.clrandom -.. class:: RanluxGenerator(self, queue, num_work_items, max_work_items, luxury=2, seed=None) +.. class:: RanluxGenerator(self, queue, num_work_items=None, luxury=2, seed=None, max_work_items=None) :param queue: :class:`pyopencl.CommandQueue`, only used for initialization :param luxury: the "luxury value" of the generator, and should be 0-4, where 0 is fastest @@ -451,16 +451,20 @@ Generating Arrays of Random Numbers sets the p-value of RANLUXCL. :param num_work_items: is the number of generators to initialize, usually corresponding to the number of work-items in the NDRange RANLUXCL will be used with. + May be `None`, in which case a default value is used. :param max_work_items: should reflect the maximum number of work-items that will be used on any parallel instance of RANLUXCL. So for instance if we are launching 5120 work-items on GPU1 and 10240 work-items on GPU2, GPU1's RANLUXCLTab would be generated by calling ranluxcl_intialization with numWorkitems = 5120 while GPU2's RANLUXCLTab would use numWorkitems = 10240. However maxWorkitems must be at least 10240 for both GPU1 and GPU2, and it must be set to the same value - for both. + for both. (may be `None`) .. versionadded:: 2011.2 + .. versionchanged:: 2012.2 + Added default value for `num_work_items`. + .. attribute:: state A :class:`pyopencl.array.Array` containing the state of the generator. @@ -783,4 +787,3 @@ Fast Fourier Transforms Bogdan Opanchuk's `pyfft <http://pypi.python.org/pypi/pyfft>`_ package offers a variety of GPU-based FFT implementations. - diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index 47e1fc608a35f6bccd1154cc2992bd5b128b07e6..cc503e994157811467623cfa9f1e2a5e30e2dab6 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -10,12 +10,18 @@ import numpy as np class RanluxGenerator(object): - def __init__(self, queue, num_work_items, + def __init__(self, queue, num_work_items=None, luxury=None, seed=None, no_warmup=False, use_legacy_init=False, max_work_items=None): if luxury is None: luxury = 4 + if num_work_items is None: + if queue.device.type == cl.device_type.CPU: + num_work_items = 8 * queue.device.max_compute_units + else: + num_work_items = 64 * queue.device.max_compute_units + if seed is None: from time import time seed = int(time()*1e6) % 2<<30 @@ -253,12 +259,7 @@ class RanluxGenerator(object): @first_arg_dependent_memoize def _get_generator(queue, luxury=None): - if queue.device.type == cl.device_type.CPU: - num_work_items = 8 * queue.device.max_compute_units - else: - num_work_items = 64 * queue.device.max_compute_units - - gen = RanluxGenerator(queue, num_work_items, luxury=luxury) + gen = RanluxGenerator(queue, luxury=luxury) queue.finish() return gen