diff --git a/doc/source/array.rst b/doc/source/array.rst index eddf91ca43931637d1b94bc33cec61667b0048d5..34efb1cff9485fe8bce7c9f9e7112f3e5f75c697 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -529,11 +529,18 @@ Quasirandom numbers are more expensive to generate. .. versionadded:: 2012.2 - .. method:: fill_poisson(data, lambda_value, stream=None) + .. method:: fill_poisson(data, lambda_value=None, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. @@ -621,8 +628,15 @@ Quasirandom numbers are more expensive to generate. .. method:: fill_poisson(data, lambda_value, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. @@ -735,8 +749,15 @@ Quasirandom numbers are more expensive to generate. .. method:: fill_poisson(data, lambda_value, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. @@ -826,8 +847,15 @@ Quasirandom numbers are more expensive to generate. .. method:: fill_poisson(data, lambda_value, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. @@ -914,8 +942,15 @@ Quasirandom numbers are more expensive to generate. .. method:: fill_poisson(data, lambda_value, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. @@ -1005,8 +1040,15 @@ Quasirandom numbers are more expensive to generate. .. method:: fill_poisson(data, lambda_value, stream=None) Fills in :class:`GPUArray` *data* with Poisson distributed - pseudorandom values with lambda *lambda_value*. *data* must - be of type 32-bit unsigned int. + pseudorandom values. + + If *lambda_value* is not None, it is used as lambda, + and *data* must be of type 32-bit unsigned int. + + If *lambda_value* is None, the lambda value is read + from each *data* array element (similarly to numpy.random.poisson), + and the array is overwritten by the pseudorandom values. + *data* must be of type 32-bit unsigned int, 32 or 64-bit float. CUDA 5.0 and above. diff --git a/pycuda/curandom.py b/pycuda/curandom.py index 78d660e7a2a2cd5e892957d1628a7d79e2e868d4..a5d3d38d0a241522251999037eef4d0f86a73ee2 100644 --- a/pycuda/curandom.py +++ b/pycuda/curandom.py @@ -296,6 +296,17 @@ __global__ void %(name)s(%(state_type)s *s, %(out_type)s *d, double lambda, cons } """ +gen_poisson_inplace_template = """ +__global__ void %(name)s(%(state_type)s *s, %(inout_type)s *d, const int n) +{ + const int tidx = blockIdx.x*blockDim.x+threadIdx.x; + const int delta = blockDim.x*gridDim.x; + for (int idx = tidx; idx < n; idx += delta) + d[idx] = (%(inout_type)s)(curand_poisson%(suffix)s(&s[tidx], double(d[idx]))); +} +""" + + random_source = """ // Uses C++ features (templates); do not surround with extern C #include @@ -373,6 +384,12 @@ class _RandomNumberGeneratorBase(object): ("poisson_int", "unsigned int", ""), ] + gen_poisson_inplace_info = [ + ("poisson_inplace_float", "float", ""), + ("poisson_inplace_double", "double", ""), + ("poisson_inplace_int", "unsigned int", ""), + ] + def __init__(self, state_type, vector_type, generator_bits, additional_source, scramble_type=None): if get_curand_version() < (3, 2, 0): @@ -409,6 +426,10 @@ class _RandomNumberGeneratorBase(object): (name, out_type, suffix) for name, out_type, suffix in self.gen_poisson_info if do_generate(out_type)] + my_poisson_inplace_generators = [ + (name, inout_type, suffix) + for name, inout_type, suffix in self.gen_poisson_inplace_info + if do_generate(inout_type)] generator_sources = [ gen_template % { @@ -429,6 +450,11 @@ class _RandomNumberGeneratorBase(object): "name": name, "out_type": out_type, "suffix": suffix, "state_type": state_type, } for name, out_type, suffix in my_poisson_generators]) + generator_sources.extend([ + gen_poisson_inplace_template % { + "name": name, "inout_type": inout_type, "suffix": suffix, + "state_type": state_type, } + for name, inout_type, suffix in my_poisson_inplace_generators]) source = (random_source + additional_source) % { "state_type": state_type, @@ -457,6 +483,10 @@ class _RandomNumberGeneratorBase(object): gen_func = module.get_function(name) gen_func.prepare("PPdi") self.generators[name] = gen_func + for name, inout_type, suffix in my_poisson_inplace_generators: + gen_func = module.get_function(name) + gen_func.prepare("PPi") + self.generators[name] = gen_func self.generator_bits = generator_bits self._prepare_skipahead() @@ -566,17 +596,32 @@ class _RandomNumberGeneratorBase(object): return result if get_curand_version() >= (5, 0, 0): - def fill_poisson(self, data, lambda_value, stream=None): - if data.dtype == np.uint32: - func_name = "poisson_int" + def fill_poisson(self, data, lambda_value=None, stream=None): + if lambda_value is None: + if data.dtype == np.float32: + func_name = "poisson_inplace_float" + elif data.dtype == np.float64: + func_name = "poisson_inplace_double" + elif data.dtype == np.uint32: + func_name = "poisson_inplace_int" + else: + raise NotImplementedError else: - raise NotImplementedError + if data.dtype == np.uint32: + func_name = "poisson_int" + else: + raise NotImplementedError func = self.generators[func_name] - func.prepared_async_call( + if lambda_value is None: + func.prepared_async_call( (self.block_count, 1), (self.generators_per_block, 1, 1), stream, - self.state, data.gpudata, lambda_value, data.size) + self.state, data.gpudata, data.size) + else: + func.prepared_async_call( + (self.block_count, 1), (self.generators_per_block, 1, 1), stream, + self.state, data.gpudata, lambda_value, data.size) def gen_poisson(self, shape, dtype, lambda_value, stream=None): result = array.empty(shape, dtype) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 98ce1813dc53f537941e0c00564e385745a2f1fd..b8f2d43e3addf57a723db48fdf52624a4eee209a 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -298,6 +298,16 @@ class TestGPUArray: gen.gen_uniform(10000, np.uint32) if get_curand_version() >= (5, 0, 0): gen.gen_poisson(10000, np.uint32, 13.0) + for dtype in dtypes + [np.uint32]: + a = gpuarray.empty(1000000, dtype=dtype) + v = 10 + a.fill(v) + gen.fill_poisson(a) + tmp = (a.get() == (v-1)).sum() / a.size + # Commented out for CI on the off chance it'd fail + # # Check Poisson statistics (need 1e6 values) + # # Compare with scipy.stats.poisson.pmf(v - 1, v) + # assert np.isclose(0.12511, tmp, atol=0.002) @mark_cuda_test def test_array_gt(self):