Skip to content
Snippets Groups Projects
Commit 8e2d29fb authored by Andreas Klöckner's avatar Andreas Klöckner
Browse files

Default more args of the RNG.

parent c27ccd26
No related branches found
No related tags found
No related merge requests found
...@@ -443,7 +443,7 @@ Generating Arrays of Random Numbers ...@@ -443,7 +443,7 @@ Generating Arrays of Random Numbers
.. module:: pyopencl.clrandom .. module:: pyopencl.clrandom
.. class:: RanluxGenerator(self, queue, num_work_items, max_work_items, luxury=2, seed=None) .. class:: RanluxGenerator(self, queue, num_work_items=None, luxury=2, seed=None, max_work_items=None)
:param queue: :class:`pyopencl.CommandQueue`, only used for initialization :param queue: :class:`pyopencl.CommandQueue`, only used for initialization
:param luxury: the "luxury value" of the generator, and should be 0-4, where 0 is fastest :param luxury: the "luxury value" of the generator, and should be 0-4, where 0 is fastest
...@@ -451,16 +451,20 @@ Generating Arrays of Random Numbers ...@@ -451,16 +451,20 @@ Generating Arrays of Random Numbers
sets the p-value of RANLUXCL. sets the p-value of RANLUXCL.
:param num_work_items: is the number of generators to initialize, usually corresponding :param num_work_items: is the number of generators to initialize, usually corresponding
to the number of work-items in the NDRange RANLUXCL will be used with. to the number of work-items in the NDRange RANLUXCL will be used with.
May be `None`, in which case a default value is used.
:param max_work_items: should reflect the maximum number of work-items that will be used :param max_work_items: should reflect the maximum number of work-items that will be used
on any parallel instance of RANLUXCL. So for instance if we are launching 5120 on any parallel instance of RANLUXCL. So for instance if we are launching 5120
work-items on GPU1 and 10240 work-items on GPU2, GPU1's RANLUXCLTab would be work-items on GPU1 and 10240 work-items on GPU2, GPU1's RANLUXCLTab would be
generated by calling ranluxcl_intialization with numWorkitems = 5120 while generated by calling ranluxcl_intialization with numWorkitems = 5120 while
GPU2's RANLUXCLTab would use numWorkitems = 10240. However maxWorkitems must GPU2's RANLUXCLTab would use numWorkitems = 10240. However maxWorkitems must
be at least 10240 for both GPU1 and GPU2, and it must be set to the same value be at least 10240 for both GPU1 and GPU2, and it must be set to the same value
for both. for both. (may be `None`)
.. versionadded:: 2011.2 .. versionadded:: 2011.2
.. versionchanged:: 2012.2
Added default value for `num_work_items`.
.. attribute:: state .. attribute:: state
A :class:`pyopencl.array.Array` containing the state of the generator. A :class:`pyopencl.array.Array` containing the state of the generator.
...@@ -783,4 +787,3 @@ Fast Fourier Transforms ...@@ -783,4 +787,3 @@ Fast Fourier Transforms
Bogdan Opanchuk's `pyfft <http://pypi.python.org/pypi/pyfft>`_ package offers a Bogdan Opanchuk's `pyfft <http://pypi.python.org/pypi/pyfft>`_ package offers a
variety of GPU-based FFT implementations. variety of GPU-based FFT implementations.
...@@ -10,12 +10,18 @@ import numpy as np ...@@ -10,12 +10,18 @@ import numpy as np
class RanluxGenerator(object): class RanluxGenerator(object):
def __init__(self, queue, num_work_items, def __init__(self, queue, num_work_items=None,
luxury=None, seed=None, no_warmup=False, luxury=None, seed=None, no_warmup=False,
use_legacy_init=False, max_work_items=None): use_legacy_init=False, max_work_items=None):
if luxury is None: if luxury is None:
luxury = 4 luxury = 4
if num_work_items is None:
if queue.device.type == cl.device_type.CPU:
num_work_items = 8 * queue.device.max_compute_units
else:
num_work_items = 64 * queue.device.max_compute_units
if seed is None: if seed is None:
from time import time from time import time
seed = int(time()*1e6) % 2<<30 seed = int(time()*1e6) % 2<<30
...@@ -253,12 +259,7 @@ class RanluxGenerator(object): ...@@ -253,12 +259,7 @@ class RanluxGenerator(object):
@first_arg_dependent_memoize @first_arg_dependent_memoize
def _get_generator(queue, luxury=None): def _get_generator(queue, luxury=None):
if queue.device.type == cl.device_type.CPU: gen = RanluxGenerator(queue, luxury=luxury)
num_work_items = 8 * queue.device.max_compute_units
else:
num_work_items = 64 * queue.device.max_compute_units
gen = RanluxGenerator(queue, num_work_items, luxury=luxury)
queue.finish() queue.finish()
return gen return gen
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment