diff --git a/cl_types.h b/cl_types.h index 2feb15c9899a576ed0913bff8f3a7cba771cc327..5df1601343b0d2ea5540fab54b1a4c8fabdeab6e 100644 --- a/cl_types.h +++ b/cl_types.h @@ -105,6 +105,12 @@ typedef struct _cl_buffer_region { /* cl_ext.h */ +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; + /* typedef cl_ulong cl_device_partition_property_ext; typedef cl_uint cl_image_pitch_info_qcom; diff --git a/doc/make_constants.py b/doc/make_constants.py index bd2acc577130209ab574ae6a7452c7efea4b1b19..51cdc8a0fb1a44f8098bf3417ad02cf69d4c522a 100644 --- a/doc/make_constants.py +++ b/doc/make_constants.py @@ -27,11 +27,16 @@ import pyopencl as cl fission = ("cl_ext_device_fission", "2011.1") nv_devattr = ("cl_nv_device_attribute_query", "0.92") gl_sharing = ("cl_khr_gl_sharing", "0.92") +cl_spir_devattr = ("cl_khr_spir", "2016.2") cl_11 = ("CL_1.1", "0.92") cl_12 = ("CL_1.2", "2011.2") cl_12_2015 = ("CL_1.2", "2015.2") cl_20 = ("CL_2.0", "2015.2") amd_devattr = ("cl_amd_device_attribute_query", "2013.2") +qcom_hp_devattr = ("cl_qcom_ext_host_ptr", "2016.2") +intel_me_devattr = ("cl_intel_advanced_motion_estimation", "2016.2") +intel_ss_devattr = ("cl_intel_simultaneous_sharing", "2016.2") +altera_temp_devattr = ("cl_altera_device_temperature", "2016.2") def get_extra_lines(tup): @@ -90,6 +95,7 @@ const_ext_lookup = { "NATIVE_VECTOR_WIDTH_DOUBLE": cl_11, "NATIVE_VECTOR_WIDTH_HALF": cl_11, "OPENCL_C_VERSION": cl_11, + "SPIR_VERSIONS": cl_spir_devattr, "COMPUTE_CAPABILITY_MAJOR_NV": nv_devattr, "COMPUTE_CAPABILITY_MINOR_NV": nv_devattr, "REGISTERS_PER_BLOCK_NV": nv_devattr, @@ -97,6 +103,9 @@ const_ext_lookup = { "GPU_OVERLAP_NV": nv_devattr, "KERNEL_EXEC_TIMEOUT_NV": nv_devattr, "INTEGRATED_MEMORY_NV": nv_devattr, + "ATTRIBUTE_ASYNC_ENGINE_COUNT_NV": nv_devattr, + "PCI_BUS_ID_NV": nv_devattr, + "PCI_BUS_SLOT_NV": nv_devattr, "DOUBLE_FP_CONFIG": ("cl_khr_fp64", "2011.1"), @@ -116,6 +125,19 @@ const_ext_lookup = { "GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD": amd_devattr, "LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD": amd_devattr, "LOCAL_MEM_BANKS_AMD": amd_devattr, + "THREAD_TRACE_SUPPORTED_AMD": amd_devattr, + "GFXIP_MAJOR_AMD": amd_devattr, + "GFXIP_MINOR_AMD": amd_devattr, + "AVAILABLE_ASYNC_QUEUES_AMD": amd_devattr, + + "ME_VERSION_INTEL": intel_me_devattr, + "SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr, + "NUM_SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr, + + "EXT_MEM_PADDING_IN_BYTES_QCOM": qcom_hp_devattr, + "PAGE_SIZE_QCOM": qcom_hp_devattr, + + "CORE_TEMPERATURE_ALTERA": altera_temp_devattr, "MAX_ATOMIC_COUNTERS_EXT": ("cl_ext_atomic_counters_64", "2013.2"), diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index ded51a4d8f65aac6e3f6e5191796def1e47cecac..2f95679b3e346d502c848a9c8e45b43aabf0c61f 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -165,6 +165,7 @@ from pyopencl.cffi_cl import ( # noqa Image, Sampler, GLTexture, + DeviceTopologyAmd, ) if _cl.have_gl(): diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index 5864f213f2df8bac887e1badcfd7cf0e21c71555..7962bf8d22f8d699cda6b6c80795adf8f1ff2d54 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -169,6 +169,8 @@ def _generic_info_to_python(info): if type_ == 'char*': ret = _ffi_pystr(value) + elif type_ == 'cl_device_topology_amd*': + ret = DeviceTopologyAmd(value.pcie.bus, value.pcie.device, value.pcie.function) elif type_.startswith('char*['): ret = list(map(_ffi_pystr, value)) _lib.free_pointer_array(info.value, len(value)) @@ -1980,4 +1982,48 @@ class GLTexture(Image, _GLObject): # }}} +class DeviceTopologyAmd(object): + # Hack around fmt.__dict__ check in test_wrapper.py + __dict__ = {} + __slots__ = ('ptr',) + + def __init__(self, bus=0, device=0, function=0): + self.ptr = _ffi.new("cl_device_topology_amd*") + self.bus = bus + self.device = device + self.function = function + + def _check_range(self, value, prop=None): + if (value < -127) or (value > 127): + raise ValueError("Value %s not in range [-127, 127].") + + @_cffi_property('pcie') + def _pcie(self): + return self.ptr + + @property + def bus(self): + return self._pcie.bus + + @bus.setter + def bus(self, value): + self._check_range(value) + self._pcie.bus = value + + @property + def device(self): + return self._pcie.device + + @device.setter + def device(self, value): + self._pcie.device = value + + @property + def function(self): + return self._pcie.function + + @function.setter + def function(self, value): + self._pcie.function = value + # vim: foldmethod=marker diff --git a/setup.py b/setup.py index fd88b6461ec742ecbf824d1b7dfc577ce42673f0..ba00bb944188617e8df40e6e93c7132e25dcb7cd 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,11 @@ def get_config_schema(): return ConfigSchema([ Switch("CL_TRACE", False, "Enable OpenCL API tracing"), Switch("CL_ENABLE_GL", False, "Enable OpenCL<->OpenGL interoperability"), + Switch("CL_USE_SHIPPED_EXT", True, + "Use the pyopencl version of CL/cl_ext.h which includes" + + " a broader range of vendor-specific OpenCL extension attributes" + + " than the standard Khronos (or vendor specific) CL/cl_ext.h." + ), Option("CL_PRETEND_VERSION", None, "Dotted CL version (e.g. 1.2) which you'd like to use."), @@ -107,6 +112,9 @@ def main(): if conf["CL_ENABLE_GL"]: extra_defines["HAVE_GL"] = 1 + if conf["CL_USE_SHIPPED_EXT"]: + extra_defines["PYOPENCL_USE_SHIPPED_EXT"] = 1 + if conf["CL_PRETEND_VERSION"]: try: major, minor = [int(x) for x in conf["CL_PRETEND_VERSION"].split(".")] diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h index 2fb6cfd2bb50805afd53154f49c82958bc228552..c88c00519b899f134a74066e2c9fa9cff547fee7 100644 --- a/src/c_wrapper/clhelper.h +++ b/src/c_wrapper/clhelper.h @@ -243,4 +243,15 @@ operator<<(std::ostream &stm, const cl_image_format &fmt) return stm; } +#ifdef CL_DEVICE_TOPOLOGY_AMD +static PYOPENCL_INLINE std::ostream& +operator<<(std::ostream &stm, const cl_device_topology_amd &topol) +{ + stm << "pcie.bus: " << topol.pcie.bus + << ",\npcie.device: " << topol.pcie.device + << ",\npcie.function: " << topol.pcie.function + << ",\npcie.type: " << topol.pcie.type; + return stm; +} +#endif #endif diff --git a/src/c_wrapper/clinfo_ext.h b/src/c_wrapper/clinfo_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..6094c52c18057b81e09526f3576c11042163e942 --- /dev/null +++ b/src/c_wrapper/clinfo_ext.h @@ -0,0 +1,165 @@ +/* Include OpenCL header, and define OpenCL extensions, since what is and is not + * available in the official headers is very system-dependent */ + +#ifndef _EXT_H +#define _EXT_H + +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif + +/* These two defines were introduced in the 1.2 headers + * on 2012-11-30, so earlier versions don't have them + * (e.g. Debian wheezy) + */ + +#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#endif + +/* 2.0 headers are not very common for the time being, so + * let's copy the defines for the new CL_DEVICE_* properties + * here. + */ +#ifndef CL_VERSION_2_0 +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A + +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +typedef cl_bitfield cl_device_svm_capabilities; +#endif + +#ifndef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif + +/* + * Extensions + */ + +/* cl_khr_icd */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + + +/* cl_khr_fp64 */ +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 + +/* cl_khr_fp16 */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* cl_khr_terminate_context */ +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F + +/* cl_nv_device_attribute_query */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 + +/* cl_ext_atomic_counters_{32,64} */ +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 + +/* cl_amd_device_attribute_query */ +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C + +#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; +#endif + +/* cl_amd_offline_devices */ +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F + +/* cl_ext_device_fission */ +#define cl_ext_device_fission 1 + +typedef cl_ulong cl_device_partition_property_ext; + +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */ +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 +#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 +#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 +#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 +#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 +#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + +/* cl_intel_advanced_motion_estimation */ +#define CL_DEVICE_ME_VERSION_INTEL 0x407E + +/* cl_qcom_ext_host_ptr */ +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 + +/* cl_khr_spir */ +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 + +/* cl_altera_device_temperature */ +#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 + +/* cl_intel_simultaneous_sharing */ +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +#endif diff --git a/src/c_wrapper/device.cpp b/src/c_wrapper/device.cpp index 5e9ec8fc29423176b6c735450b07ebe6edd9f39d..3e0525cdd3c2d7daeeefb8bd018a267a39b0e8e1 100644 --- a/src/c_wrapper/device.cpp +++ b/src/c_wrapper/device.cpp @@ -28,6 +28,27 @@ device::~device() #endif } +#ifdef CL_DEVICE_TOPOLOGY_AMD +template<typename... ArgTypes> +PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info +get_device_topology_amd(ArgTypes&&... args) +{ + const char * tpname = "cl_device_topology_amd*"; + cl_device_topology_amd value; + const char * fname = "clGetDeviceInfo"; + call_guarded(clGetDeviceInfo, fname, args..., size_arg(value), nullptr); + generic_info info; + info.dontfree = 0; + info.opaque_class = CLASS_NONE; + info.type = tpname; + info.value = cl_memdup(&value); + return info; +} + +#define pyopencl_get_device_topology_amd(...) get_device_topology_amd(__VA_ARGS__) + +#endif + generic_info device::get_info(cl_uint param_name) const { @@ -143,6 +164,15 @@ device::get_info(cl_uint param_name) const case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: case CL_DEVICE_REGISTERS_PER_BLOCK_NV: case CL_DEVICE_WARP_SIZE_NV: +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + case CL_DEVICE_PCI_BUS_ID_NV: +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + case CL_DEVICE_PCI_SLOT_ID_NV: +#endif return DEV_GET_INT_INF(cl_uint); case CL_DEVICE_GPU_OVERLAP_NV: case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: @@ -218,11 +248,14 @@ device::get_info(cl_uint param_name) const case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: return DEV_GET_INT_INF(cl_ulong); #endif - /* FIXME - #ifdef CL_DEVICE_TOPOLOGY_AMD - case CL_DEVICE_TOPOLOGY_AMD: - #endif - */ +#ifdef CL_DEVICE_TOPOLOGY_AMD + case CL_DEVICE_TOPOLOGY_AMD: + return pyopencl_get_device_topology_amd(PYOPENCL_CL_CASTABLE_THIS, param_name); +#endif +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: + return DEV_GET_INT_INF(cl_bool); +#endif #ifdef CL_DEVICE_BOARD_NAME_AMD case CL_DEVICE_BOARD_NAME_AMD: ; return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); @@ -259,12 +292,45 @@ device::get_info(cl_uint param_name) const #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD case CL_DEVICE_LOCAL_MEM_BANKS_AMD: #endif - #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + case CL_DEVICE_GFXIP_MAJOR_AMD: +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + case CL_DEVICE_GFXIP_MINOR_AMD: +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: #endif return DEV_GET_INT_INF(cl_uint); // }}} +#ifdef CL_DEVICE_ME_VERSION_INTEL + case CL_DEVICE_ME_VERSION_INTEL: +#endif +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + case CL_DEVICE_PAGE_SIZE_QCOM: +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: +#endif + return DEV_GET_INT_INF(cl_uint); +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: + return pyopencl_get_array_info(cl_uint, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); +#endif +#ifdef CL_DEVICE_SPIR_VERSIONS + case CL_DEVICE_SPIR_VERSIONS: + return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); +#endif +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + case CL_DEVICE_CORE_TEMPERATURE_ALTERA: + return DEV_GET_INT_INF(cl_int); +#endif default: throw clerror("Device.get_info", CL_INVALID_VALUE); diff --git a/src/c_wrapper/pyopencl_ext.h b/src/c_wrapper/pyopencl_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..4b5e7871e57d7c26a89830e5bc5bec4bb1c8667c --- /dev/null +++ b/src/c_wrapper/pyopencl_ext.h @@ -0,0 +1,34 @@ +#ifndef _PYOPENCL_EXT_H +#define _PYOPENCL_EXT_H + +#ifdef PYOPENCL_USE_SHIPPED_EXT + +#include "clinfo_ext.h" + +#else + +#ifdef __APPLE__ + +#include <OpenCL/opencl.h> + +#else + +#include <CL/cl.h> +#include <CL/cl_ext.h> + +#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; +#endif + +#endif + +#endif + +#endif + diff --git a/src/c_wrapper/wrap_cl.h b/src/c_wrapper/wrap_cl.h index 98e26963d4c620f8ccdaf2ac12fd2f4a28575901..dbd4115b9e2ccb6f0ac1ff59ff7979112a0710f7 100644 --- a/src/c_wrapper/wrap_cl.h +++ b/src/c_wrapper/wrap_cl.h @@ -9,12 +9,13 @@ #include <stdint.h> +#include "pyopencl_ext.h" + #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #ifdef __APPLE__ // {{{ Mac -#include <OpenCL/opencl.h> #define PYOPENCL_HAVE_EVENT_SET_CALLBACK @@ -32,9 +33,6 @@ // {{{ elsewhere -#include <CL/cl.h> -#include <CL/cl_ext.h> - #if defined(_WIN32) // {{{ Windows diff --git a/src/c_wrapper/wrap_constants.cpp b/src/c_wrapper/wrap_constants.cpp index 1a0245505acbc9397af494b8dda17dbd71277b1f..bfa882a63dac7b00c1a79393fc28d0b7660509ce 100644 --- a/src/c_wrapper/wrap_constants.cpp +++ b/src/c_wrapper/wrap_constants.cpp @@ -201,6 +201,16 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value)) ADD_ATTR("device_info", DEVICE_, GPU_OVERLAP_NV); ADD_ATTR("device_info", DEVICE_, KERNEL_EXEC_TIMEOUT_NV); ADD_ATTR("device_info", DEVICE_, INTEGRATED_MEMORY_NV); + // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + ADD_ATTR("device_info", DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV); +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + ADD_ATTR("device_info", DEVICE_, PCI_BUS_ID_NV); +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + ADD_ATTR("device_info", DEVICE_, PCI_SLOT_ID_NV); +#endif #endif #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_OFFSET_AMD); @@ -242,6 +252,19 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value)) ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_BANKS_AMD); #endif +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + ADD_ATTR("device_info", DEVICE_, THREAD_TRACE_SUPPORTED_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + ADD_ATTR("device_info", DEVICE_, GFXIP_MAJOR_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + ADD_ATTR("device_info", DEVICE_, GFXIP_MINOR_AMD); +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + ADD_ATTR("device_info", DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD); +#endif + #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT ADD_ATTR("device_info", DEVICE_, MAX_ATOMIC_COUNTERS_EXT); #endif @@ -280,7 +303,36 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value)) ADD_ATTR("device_info", DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT); ADD_ATTR("device_info", DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT); #endif + /* cl_intel_advanced_motion_estimation */ +#ifdef CL_DEVICE_ME_VERSION_INTEL + ADD_ATTR("device_info", DEVICE_, ME_VERSION_INTEL); +#endif + /* cl_qcom_ext_host_ptr */ +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + ADD_ATTR("device_info", DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM); +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + ADD_ATTR("device_info", DEVICE_, PAGE_SIZE_QCOM); +#endif + + /* cl_khr_spir */ +#ifdef CL_DEVICE_SPIR_VERSIONS + ADD_ATTR("device_info", DEVICE_, SPIR_VERSIONS); +#endif + + /* cl_altera_device_temperature */ +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + ADD_ATTR("device_info", DEVICE_, CORE_TEMPERATURE_ALTERA); +#endif + + /* cl_intel_simultaneous_sharing */ +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR("device_info", DEVICE_, SIMULTANEOUS_INTEROPS_INTEL); +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR("device_info", DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL); +#endif // device_fp_config ADD_ATTR("device_fp_config", FP_, DENORM); diff --git a/test/test_wrapper.py b/test/test_wrapper.py index e7f86c2e5eddfeeb07de1b17d9f78797c1da2dba..66b1bbc463ea95ac7c3abbe2c97737b897938a9e 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -293,6 +293,15 @@ def test_image_format_constructor(): assert iform.channel_data_type == cl.channel_type.FLOAT assert not iform.__dict__ +def test_device_topology_amd_constructor(): + # doesn't need cl_amd_device_attribute_query support to succeed + topol = cl.DeviceTopologyAmd(3,4,5) + + assert topol.bus == 3 + assert topol.device == 4 + assert topol.function == 5 + + assert not topol.__dict__ def test_nonempty_supported_image_formats(ctx_factory): context = ctx_factory()