From 48231196f7ba9ffd80831641f74df302d5d52383 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 6 Jul 2011 23:08:26 -0400 Subject: [PATCH] =?UTF-8?q?Reduction=20fixes=20for=20RV770=20by=20Ricardo?= =?UTF-8?q?=20Am=C3=A9zquita.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyopencl/characterize.py | 11 +++++++++++ pyopencl/reduction.py | 21 ++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pyopencl/characterize.py b/pyopencl/characterize.py index 469f05ae..a3b4c608 100644 --- a/pyopencl/characterize.py +++ b/pyopencl/characterize.py @@ -11,6 +11,17 @@ def has_double_support(dev): +def has_amd_double_support(dev): + """"Fix to allow incomplete amd double support in low end boards""" + + for ext in dev.extensions.split(" "): + if ext == "cl_amd_fp64": + return True + return False + + + + def reasonable_work_group_size_multiple(dev, ctx=None): try: return dev.warp_size_nv diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index 31e29b93..ea5adcc2 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -37,7 +37,6 @@ import pyopencl as cl from pyopencl.tools import ( context_dependent_memoize, dtype_to_ctype) -from pytools import memoize_method import numpy as np import pyopencl._mymako as mako @@ -45,14 +44,18 @@ import pyopencl._mymako as mako KERNEL = """ + #define GROUP_SIZE ${group_size} #define READ_AND_MAP(i) (${map_expr}) #define REDUCE(a, b) (${reduce_expr}) % if double_support: #pragma OPENCL EXTENSION cl_khr_fp64: enable + % elif amd_double_support: + #pragma OPENCL EXTENSION cl_amd_fp64: enable % endif + typedef ${out_type} out_type; ${preamble} @@ -149,13 +152,16 @@ def get_reduction_source( # {{{ compute group size def get_dev_group_size(device): + # dirty fix for the RV770 boards + max_work_group_size=device.max_work_group_size + if "RV770" in device.name: + max_work_group_size=64 return min( - device.max_work_group_size, + max_work_group_size, (device.local_mem_size + out_type_size - 1) // out_type_size) - group_size = min( - get_dev_group_size(dev) for dev in devices) + group_size = min(get_dev_group_size(dev) for dev in devices) if max_group_size is not None: group_size = min(max_group_size, group_size) @@ -180,7 +186,7 @@ def get_reduction_source( from mako.template import Template from pytools import all - from pyopencl.characterize import has_double_support + from pyopencl.characterize import has_double_support, has_amd_double_support src = str(Template(KERNEL).render( out_type=out_type, arguments=arguments, @@ -192,7 +198,9 @@ def get_reduction_source( name=name, preamble=preamble, double_support=all( - has_double_support(dev) for dev in devices) + has_double_support(dev) for dev in devices), + amd_double_support=all( + has_amd_double_support(dev) for dev in devices) )) from pytools import Record @@ -326,7 +334,6 @@ class ReductionKernel: (group_count,), self.dtype_out, allocator=repr_vec.allocator) - #print group_count, seq_count, stage_inf.group_size stage_inf.kernel( use_queue, (group_count*stage_inf.group_size,), -- GitLab