Newer
Older
#!/usr/bin/env python3
#
# Pi-by-MonteCarlo using PyCUDA/PyOpenCL
#
# performs an estimation of Pi using Monte Carlo method
# a large amount of iterations is divided and distributed to compute units
# a lot of options are provided to perform scalabilty tests
#
# use -h for complete set of options
#
# CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
# Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
#
# Thanks to Andreas Klockner for PyCUDA:
# http://mathema.tician.de/software/pycuda
# Thanks to Andreas Klockner for PyOpenCL:
# http://mathema.tician.de/software/pyopencl
# 2013-01-01 : problems with launch timeout
# http://stackoverflow.com/questions/497685/how-do-you-get-around-the-maximum-cuda-run-time
# Option "Interactive" "0" in /etc/X11/xorg.conf
# Common tools
import numpy
import sys
import getopt
import time
import itertools
from socket import gethostname
class PenStacle:
"""Pentacle of Statistics from data"""
Avg = 0
Med = 0
Std = 0
Min = 0
Max = 0
def __init__(self, Data):
self.Avg = numpy.average(Data)
self.Med = numpy.median(Data)
self.Std = numpy.std(Data)
self.Max = numpy.max(Data)
self.Min = numpy.min(Data)
print("%s %s %s %s %s" % (self.Avg, self.Med, self.Std, self.Min, self.Max))
DeviceStyle = ""
DeviceId = 0
AvgD = 0
MedD = 0
StdD = 0
MinD = 0
MaxD = 0
AvgR = 0
MedR = 0
StdR = 0
MinR = 0
MaxR = 0
def __init__(self, DeviceStyle, DeviceId, Iterations):
self.DeviceStyle = DeviceStyle
self.DeviceId = DeviceId
def Metrology(self, Data):
Duration = PenStacle(Data)
Rate = PenStacle(Iterations / Data)
print("Duration %s" % Duration)
print("Rate %s" % Rate)
def DictionariesAPI():
Marsaglia = {"CONG": 0, "SHR3": 1, "MWC": 2, "KISS": 3}
Computing = {"INT32": 0, "INT64": 1, "FP32": 2, "FP64": 3}
Test = {True: 1, False: 0}
return (Marsaglia, Computing, Test)
# find prime factors of a number
# Get for WWW :
# http://pythonism.wordpress.com/2008/05/17/looking-at-factorisation-in-python/
def PrimeFactors(x):
factorlist = numpy.array([]).astype("uint32")
loop = 2
while loop <= x:
if x % loop == 0:
x /= loop
factorlist = numpy.append(factorlist, [loop])
# Try to find the best thread number in Hybrid approach (Blocks&Threads)
# output is thread number
def BestThreadsNumber(jobs):
factors = PrimeFactors(jobs)
matrix = numpy.append([factors], [factors[::-1]], axis=0)
threads = 1
threads = threads * factor
if threads * threads > jobs or threads > 512:
# Predicted Amdahl Law (Reduced with s=1-p)
# Predicted Amdahl Law
def Amdahl(N, T1, s, p):
def Mylq(N, T1, s, c, p):
return T1 * (s + p / N) + c * N
def Mylq2(N, T1, s, c1, c2, p):
return T1 * (s + p / N) + c1 * N + c2 * N * N
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#define TCONG 0
#define TSHR3 1
#define TMWC 2
#define TKISS 3
#define TINT32 0
#define TINT64 1
#define TFP32 2
#define TFP64 3
#define IFTHEN 1
// Marsaglia RNG very simple implementation
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
#define wnew ((w=18000*(w&65535)+(w>>16))&65535)
#define MWC (znew+wnew)
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
#define CONG (jcong=69069*jcong+1234567)
#define KISS ((MWC^CONG)+SHR3)
#define MWCfp MWC * 2.328306435454494e-10f
#define KISSfp KISS * 2.328306435454494e-10f
#define SHR3fp SHR3 * 2.328306435454494e-10f
#define CONGfp CONG * 2.328306435454494e-10f
__device__ ulong MainLoop(ulong iterations,uint seed_w,uint seed_z,size_t work)
{
#if TRNG == TCONG
uint jcong=seed_z+work;
#elif TRNG == TSHR3
uint jsr=seed_w+work;
#elif TRNG == TMWC
uint z=seed_z+work;
uint w=seed_w+work;
#elif TRNG == TKISS
uint jcong=seed_z+work;
uint jsr=seed_w+work;
uint z=seed_z-work;
uint w=seed_w-work;
#endif
ulong total=0;
for (ulong i=0;i<iterations;i++) {
#if TYPE == TINT32
#define THEONE 1073741824
#if TRNG == TCONG
uint x=CONG>>17 ;
uint y=CONG>>17 ;
#elif TRNG == TSHR3
uint x=SHR3>>17 ;
uint y=SHR3>>17 ;
#elif TRNG == TMWC
uint x=MWC>>17 ;
uint y=MWC>>17 ;
#elif TRNG == TKISS
uint x=KISS>>17 ;
Loading
Loading full blame...