Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
pyopencl
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Andreas Klöckner
pyopencl
Commits
82936bbf
Commit
82936bbf
authored
12 years ago
by
Andreas Klöckner
Browse files
Options
Downloads
Plain Diff
Merge pull request
#8
from davethej/master
Made some changes to the benchmark example... let me know what you think.
parents
0724c5b6
fa833504
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
examples/benchmark.py
+45
-20
45 additions, 20 deletions
examples/benchmark.py
with
45 additions
and
20 deletions
examples/benchmark
-all
.py
→
examples/benchmark.py
+
45
−
20
View file @
82936bbf
# example provided by Roger Pau Monn'e
# example provided by Roger Pau Monn'e
from
__future__
import
print_function
import
pyopencl
as
cl
import
pyopencl
as
cl
import
numpy
import
numpy
import
numpy.linalg
as
la
import
numpy.linalg
as
la
import
datetime
import
datetime
from
time
import
time
from
time
import
time
a
=
numpy
.
random
.
rand
(
1000
).
astype
(
numpy
.
float32
)
data_points
=
2
**
23
# ~8 million data points, ~32 MB data
b
=
numpy
.
random
.
rand
(
1000
).
astype
(
numpy
.
float32
)
workers
=
2
**
8
# 256 workers, play with this to see performance differences
# eg: 2**0 => 1 worker will be non-parallel execution on gpu
# data points must be a multiple of workers
a
=
numpy
.
random
.
rand
(
data_points
).
astype
(
numpy
.
float32
)
b
=
numpy
.
random
.
rand
(
data_points
).
astype
(
numpy
.
float32
)
c_result
=
numpy
.
empty_like
(
a
)
c_result
=
numpy
.
empty_like
(
a
)
# Speed in normal CPU usage
# Speed in normal CPU usage
time1
=
time
()
time1
=
time
()
for
i
in
range
(
1000
):
c_temp
=
(
a
+
b
)
# adds each element in a to its corresponding element in b
for
j
in
range
(
1000
):
c_temp
=
c_temp
*
c_temp
# element-wise multiplication
c_result
[
i
]
=
a
[
i
]
+
b
[
i
]
c_result
=
c_temp
*
(
a
/
2.0
)
# element-wise half a and multiply
c_result
[
i
]
=
c_result
[
i
]
*
(
a
[
i
]
+
b
[
i
])
c_result
[
i
]
=
c_result
[
i
]
*
(
a
[
i
]
/
2.0
)
time2
=
time
()
time2
=
time
()
print
(
"
Execution time of test without OpenCL:
"
,
time2
-
time1
,
"
s
"
)
print
(
"
Execution time of test without OpenCL:
"
,
time2
-
time1
,
"
s
"
)
...
@@ -34,6 +39,8 @@ for platform in cl.get_platforms():
...
@@ -34,6 +39,8 @@ for platform in cl.get_platforms():
print
(
"
Device memory:
"
,
device
.
global_mem_size
//
1024
//
1024
,
'
MB
'
)
print
(
"
Device memory:
"
,
device
.
global_mem_size
//
1024
//
1024
,
'
MB
'
)
print
(
"
Device max clock speed:
"
,
device
.
max_clock_frequency
,
'
MHz
'
)
print
(
"
Device max clock speed:
"
,
device
.
max_clock_frequency
,
'
MHz
'
)
print
(
"
Device compute units:
"
,
device
.
max_compute_units
)
print
(
"
Device compute units:
"
,
device
.
max_compute_units
)
print
(
"
Device max work group size:
"
,
device
.
max_work_group_size
)
print
(
"
Device max work item sizes:
"
,
device
.
max_work_item_sizes
)
# Simnple speed test
# Simnple speed test
ctx
=
cl
.
Context
([
device
])
ctx
=
cl
.
Context
([
device
])
...
@@ -49,18 +56,38 @@ for platform in cl.get_platforms():
...
@@ -49,18 +56,38 @@ for platform in cl.get_platforms():
__kernel void sum(__global const float *a,
__kernel void sum(__global const float *a,
__global const float *b, __global float *c)
__global const float *b, __global float *c)
{
{
int loop;
int gid = get_global_id(0);
int gid = get_global_id(0);
for(loop=0; loop<1000;loop++)
float a_temp;
{
float b_temp;
c[gid] = a[gid] + b[gid];
float c_temp;
c[gid] = c[gid] * (a[gid] + b[gid]);
c[gid] = c[gid] * (a[gid] / 2.0);
a_temp = a[gid]; // my a element (by global ref)
}
b_temp = b[gid]; // my b element (by global ref)
c_temp = a_temp+b_temp; // sum of my elements
c_temp = c_temp * c_temp; // product of sums
c_temp = c_temp * (a_temp/2.0); // times 1/2 my a
c[gid] = c_temp; // store result in global memory
}
}
"""
).
build
()
"""
).
build
()
exec_evt
=
prg
.
sum
(
queue
,
a
.
shape
,
None
,
a_buf
,
b_buf
,
dest_buf
)
global_size
=
(
data_points
,)
local_size
=
(
workers
,)
preferred_multiple
=
cl
.
Kernel
(
prg
,
'
sum
'
).
get_work_group_info
(
\
cl
.
kernel_work_group_info
.
PREFERRED_WORK_GROUP_SIZE_MULTIPLE
,
\
device
)
print
(
"
Data points:
"
,
data_points
)
print
(
"
Workers:
"
,
workers
)
print
(
"
Preferred work group size multiple:
"
,
preferred_multiple
)
if
(
workers
%
preferred_multiple
):
print
(
"
Number of workers not a preferred multiple (%d*N).
"
\
%
(
preferred_multiple
))
print
(
"
Performance may be reduced.
"
)
exec_evt
=
prg
.
sum
(
queue
,
global_size
,
local_size
,
a_buf
,
b_buf
,
dest_buf
)
exec_evt
.
wait
()
exec_evt
.
wait
()
elapsed
=
1e-9
*
(
exec_evt
.
profile
.
end
-
exec_evt
.
profile
.
start
)
elapsed
=
1e-9
*
(
exec_evt
.
profile
.
end
-
exec_evt
.
profile
.
start
)
...
@@ -68,11 +95,9 @@ for platform in cl.get_platforms():
...
@@ -68,11 +95,9 @@ for platform in cl.get_platforms():
c
=
numpy
.
empty_like
(
a
)
c
=
numpy
.
empty_like
(
a
)
cl
.
enqueue_read_buffer
(
queue
,
dest_buf
,
c
).
wait
()
cl
.
enqueue_read_buffer
(
queue
,
dest_buf
,
c
).
wait
()
error
=
0
equal
=
numpy
.
all
(
c
==
c_result
)
for
i
in
range
(
1000
):
if
c
[
i
]
!=
c_result
[
i
]:
if
not
equal
:
error
=
1
if
error
:
print
(
"
Results doesn
'
t match!!
"
)
print
(
"
Results doesn
'
t match!!
"
)
else
:
else
:
print
(
"
Results OK
"
)
print
(
"
Results OK
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment