From 733e8cdeeef3b479dc2cb7f87fcd3b74a12cc05c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 24 Aug 2011 10:50:32 +0200 Subject: [PATCH] Make characterize func get_simd_group_size(), use in reduction. This makes sense because using Nv hardware through the Apple wrapper will not have warp size info attributes. --- pyopencl/characterize.py | 19 +++++++++++++++++++ pyopencl/reduction.py | 16 +++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/pyopencl/characterize.py b/pyopencl/characterize.py index b0a98ad0..0d9a1a46 100644 --- a/pyopencl/characterize.py +++ b/pyopencl/characterize.py @@ -242,3 +242,22 @@ def why_not_local_access_conflict_free(dev, itemsize, def get_fast_inaccurate_build_options(dev): return ["-cl-mad-enable", "-cl-fast-relaxed-math", "-cl-no-signed-zeros", "-cl-strict-aliasing"] + + + + +def get_simd_group_size(dev): + try: + return dev.warp_size_nv + except: + pass + + lc_vendor = dev.vendor.lower() + if "nvidia" in lc_vendor: + return 32 + + if ("amd" in lc_vendor or "ati" in lc_vendor) \ + and dev.type == cl.device_type.GPU: + return 32 + + return None diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index ea5adcc2..6949320b 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -171,14 +171,16 @@ def get_reduction_source( # {{{ compute synchronization-less group size def get_dev_no_sync_size(device): - try: - return device.warp_size_nv - except: - if "nvidia" in device.vendor.lower(): - from warnings import warn - warn("Reduction might be unnecessarily slow: " - "can't query warp size on Nvidia device") + from pyopencl.characterize import get_simd_group_size + result = get_simd_group_size(device) + + if result is None: + from warnings import warn + warn("Reduction might be unnecessarily slow: " + "can't query SIMD group size") return 1 + + return result no_sync_size = min(get_dev_no_sync_size(dev) for dev in devices) -- GitLab