diff --git a/cl_types.h b/cl_types.h
index 2feb15c9899a576ed0913bff8f3a7cba771cc327..5df1601343b0d2ea5540fab54b1a4c8fabdeab6e 100644
--- a/cl_types.h
+++ b/cl_types.h
@@ -105,6 +105,12 @@ typedef struct _cl_buffer_region {
 
 /* cl_ext.h */
 
+typedef union
+{
+    struct { cl_uint type; cl_uint data[5]; } raw;
+    struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} cl_device_topology_amd;
+
 /*
 typedef cl_ulong  cl_device_partition_property_ext;
 typedef cl_uint   cl_image_pitch_info_qcom;
diff --git a/doc/make_constants.py b/doc/make_constants.py
index bd2acc577130209ab574ae6a7452c7efea4b1b19..51cdc8a0fb1a44f8098bf3417ad02cf69d4c522a 100644
--- a/doc/make_constants.py
+++ b/doc/make_constants.py
@@ -27,11 +27,16 @@ import pyopencl as cl
 fission = ("cl_ext_device_fission", "2011.1")
 nv_devattr = ("cl_nv_device_attribute_query", "0.92")
 gl_sharing = ("cl_khr_gl_sharing", "0.92")
+cl_spir_devattr = ("cl_khr_spir", "2016.2")
 cl_11 = ("CL_1.1", "0.92")
 cl_12 = ("CL_1.2", "2011.2")
 cl_12_2015 = ("CL_1.2", "2015.2")
 cl_20 = ("CL_2.0", "2015.2")
 amd_devattr = ("cl_amd_device_attribute_query", "2013.2")
+qcom_hp_devattr = ("cl_qcom_ext_host_ptr", "2016.2")
+intel_me_devattr = ("cl_intel_advanced_motion_estimation", "2016.2")
+intel_ss_devattr = ("cl_intel_simultaneous_sharing", "2016.2")
+altera_temp_devattr = ("cl_altera_device_temperature", "2016.2")
 
 
 def get_extra_lines(tup):
@@ -90,6 +95,7 @@ const_ext_lookup = {
             "NATIVE_VECTOR_WIDTH_DOUBLE": cl_11,
             "NATIVE_VECTOR_WIDTH_HALF": cl_11,
             "OPENCL_C_VERSION": cl_11,
+            "SPIR_VERSIONS": cl_spir_devattr,
             "COMPUTE_CAPABILITY_MAJOR_NV": nv_devattr,
             "COMPUTE_CAPABILITY_MINOR_NV": nv_devattr,
             "REGISTERS_PER_BLOCK_NV": nv_devattr,
@@ -97,6 +103,9 @@ const_ext_lookup = {
             "GPU_OVERLAP_NV": nv_devattr,
             "KERNEL_EXEC_TIMEOUT_NV": nv_devattr,
             "INTEGRATED_MEMORY_NV": nv_devattr,
+            "ATTRIBUTE_ASYNC_ENGINE_COUNT_NV": nv_devattr,
+            "PCI_BUS_ID_NV": nv_devattr,
+            "PCI_BUS_SLOT_NV": nv_devattr,
 
             "DOUBLE_FP_CONFIG":
             ("cl_khr_fp64", "2011.1"),
@@ -116,6 +125,19 @@ const_ext_lookup = {
             "GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD": amd_devattr,
             "LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD": amd_devattr,
             "LOCAL_MEM_BANKS_AMD": amd_devattr,
+            "THREAD_TRACE_SUPPORTED_AMD": amd_devattr,
+            "GFXIP_MAJOR_AMD": amd_devattr,
+            "GFXIP_MINOR_AMD": amd_devattr,
+            "AVAILABLE_ASYNC_QUEUES_AMD": amd_devattr,
+            
+            "ME_VERSION_INTEL": intel_me_devattr,
+            "SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr,
+            "NUM_SIMULTANEOUS_INTEROPS_INTEL": intel_ss_devattr,
+            
+            "EXT_MEM_PADDING_IN_BYTES_QCOM": qcom_hp_devattr,
+            "PAGE_SIZE_QCOM": qcom_hp_devattr,
+            
+            "CORE_TEMPERATURE_ALTERA": altera_temp_devattr,
 
             "MAX_ATOMIC_COUNTERS_EXT":
             ("cl_ext_atomic_counters_64", "2013.2"),
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index ded51a4d8f65aac6e3f6e5191796def1e47cecac..2f95679b3e346d502c848a9c8e45b43aabf0c61f 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -165,6 +165,7 @@ from pyopencl.cffi_cl import (  # noqa
         Image,
         Sampler,
         GLTexture,
+        DeviceTopologyAmd,
         )
 
 if _cl.have_gl():
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index 5864f213f2df8bac887e1badcfd7cf0e21c71555..7962bf8d22f8d699cda6b6c80795adf8f1ff2d54 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -169,6 +169,8 @@ def _generic_info_to_python(info):
 
     if type_ == 'char*':
         ret = _ffi_pystr(value)
+    elif type_ == 'cl_device_topology_amd*':
+        ret = DeviceTopologyAmd(value.pcie.bus, value.pcie.device, value.pcie.function)
     elif type_.startswith('char*['):
         ret = list(map(_ffi_pystr, value))
         _lib.free_pointer_array(info.value, len(value))
@@ -1980,4 +1982,48 @@ class GLTexture(Image, _GLObject):
 
 # }}}
 
+class DeviceTopologyAmd(object):
+    # Hack around fmt.__dict__ check in test_wrapper.py
+    __dict__ = {}
+    __slots__ = ('ptr',)
+
+    def __init__(self, bus=0, device=0, function=0):
+        self.ptr = _ffi.new("cl_device_topology_amd*")
+        self.bus = bus
+        self.device = device
+        self.function = function
+
+    def _check_range(self, value, prop=None):
+        if (value < -127) or (value > 127):
+            raise ValueError("Value %s not in range [-127, 127].")
+
+    @_cffi_property('pcie')
+    def _pcie(self):
+        return self.ptr
+
+    @property
+    def bus(self):
+        return self._pcie.bus
+
+    @bus.setter
+    def bus(self, value):
+        self._check_range(value)
+        self._pcie.bus = value
+
+    @property
+    def device(self):
+        return self._pcie.device
+
+    @device.setter
+    def device(self, value):
+        self._pcie.device = value
+
+    @property
+    def function(self):
+        return self._pcie.function
+
+    @function.setter
+    def function(self, value):
+        self._pcie.function = value
+
 # vim: foldmethod=marker
diff --git a/setup.py b/setup.py
index fd88b6461ec742ecbf824d1b7dfc577ce42673f0..ba00bb944188617e8df40e6e93c7132e25dcb7cd 100644
--- a/setup.py
+++ b/setup.py
@@ -72,6 +72,11 @@ def get_config_schema():
     return ConfigSchema([
         Switch("CL_TRACE", False, "Enable OpenCL API tracing"),
         Switch("CL_ENABLE_GL", False, "Enable OpenCL<->OpenGL interoperability"),
+        Switch("CL_USE_SHIPPED_EXT", True,
+            "Use the pyopencl version of CL/cl_ext.h which includes" +
+            " a broader range of vendor-specific OpenCL extension attributes" +
+            " than the standard Khronos (or vendor specific) CL/cl_ext.h."
+        ),
         Option("CL_PRETEND_VERSION", None,
             "Dotted CL version (e.g. 1.2) which you'd like to use."),
 
@@ -107,6 +112,9 @@ def main():
     if conf["CL_ENABLE_GL"]:
         extra_defines["HAVE_GL"] = 1
 
+    if conf["CL_USE_SHIPPED_EXT"]:
+        extra_defines["PYOPENCL_USE_SHIPPED_EXT"] = 1
+
     if conf["CL_PRETEND_VERSION"]:
         try:
             major, minor = [int(x) for x in conf["CL_PRETEND_VERSION"].split(".")]
diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h
index 2fb6cfd2bb50805afd53154f49c82958bc228552..c88c00519b899f134a74066e2c9fa9cff547fee7 100644
--- a/src/c_wrapper/clhelper.h
+++ b/src/c_wrapper/clhelper.h
@@ -243,4 +243,15 @@ operator<<(std::ostream &stm, const cl_image_format &fmt)
     return stm;
 }
 
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+static PYOPENCL_INLINE std::ostream&
+operator<<(std::ostream &stm, const cl_device_topology_amd &topol)
+{
+    stm << "pcie.bus: " << topol.pcie.bus
+        << ",\npcie.device: " << topol.pcie.device
+        << ",\npcie.function: " << topol.pcie.function
+        << ",\npcie.type: " << topol.pcie.type;
+    return stm;
+}
+#endif
 #endif
diff --git a/src/c_wrapper/clinfo_ext.h b/src/c_wrapper/clinfo_ext.h
new file mode 100644
index 0000000000000000000000000000000000000000..6094c52c18057b81e09526f3576c11042163e942
--- /dev/null
+++ b/src/c_wrapper/clinfo_ext.h
@@ -0,0 +1,165 @@
+/* Include OpenCL header, and define OpenCL extensions, since what is and is not
+ * available in the official headers is very system-dependent */
+
+#ifndef _EXT_H
+#define _EXT_H
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+/* These two defines were introduced in the 1.2 headers
+ * on 2012-11-30, so earlier versions don't have them
+ * (e.g. Debian wheezy)
+ */
+
+#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT                 0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT          0x104B
+#endif
+
+/* 2.0 headers are not very common for the time being, so
+ * let's copy the defines for the new CL_DEVICE_* properties
+ * here.
+ */
+#ifndef CL_VERSION_2_0
+#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS             0x104C
+#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE              0x104D
+#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES            0x104E
+#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE        0x104F
+#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE              0x1050
+#define CL_DEVICE_MAX_ON_DEVICE_QUEUES                  0x1051
+#define CL_DEVICE_MAX_ON_DEVICE_EVENTS                  0x1052
+#define CL_DEVICE_SVM_CAPABILITIES                      0x1053
+#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE  0x1054
+#define CL_DEVICE_MAX_PIPE_ARGS                         0x1055
+#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS          0x1056
+#define CL_DEVICE_PIPE_MAX_PACKET_SIZE                  0x1057
+#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT   0x1058
+#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT     0x1059
+#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT      0x105A
+
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER           (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER             (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM             (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS                       (1 << 3)
+
+typedef cl_bitfield         cl_device_svm_capabilities;
+#endif
+
+#ifndef CL_VERSION_2_1
+#define CL_PLATFORM_HOST_TIMER_RESOLUTION		0x0905
+#define CL_DEVICE_IL_VERSION				0x105B
+#define CL_DEVICE_MAX_NUM_SUB_GROUPS			0x105C
+#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D
+#endif
+
+/*
+ * Extensions
+ */
+
+/* cl_khr_icd */
+#define CL_PLATFORM_ICD_SUFFIX_KHR			0x0920
+#define CL_PLATFORM_NOT_FOUND_KHR			-1001
+
+
+/* cl_khr_fp64 */
+#define CL_DEVICE_DOUBLE_FP_CONFIG			0x1032
+
+/* cl_khr_fp16 */
+#define CL_DEVICE_HALF_FP_CONFIG			0x1033
+
+/* cl_khr_terminate_context */
+#define CL_DEVICE_TERMINATE_CAPABILITY_KHR		0x200F
+
+/* cl_nv_device_attribute_query */
+#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV		0x4000
+#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV		0x4001
+#define CL_DEVICE_REGISTERS_PER_BLOCK_NV		0x4002
+#define CL_DEVICE_WARP_SIZE_NV				0x4003
+#define CL_DEVICE_GPU_OVERLAP_NV			0x4004
+#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV		0x4005
+#define CL_DEVICE_INTEGRATED_MEMORY_NV			0x4006
+#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV	0x4007
+#define CL_DEVICE_PCI_BUS_ID_NV				0x4008
+#define CL_DEVICE_PCI_SLOT_ID_NV			0x4009
+
+/* cl_ext_atomic_counters_{32,64} */
+#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT		0x4032
+
+/* cl_amd_device_attribute_query */
+#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD		0x4036
+#define CL_DEVICE_TOPOLOGY_AMD				0x4037
+#define CL_DEVICE_BOARD_NAME_AMD			0x4038
+#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD		0x4039
+#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD		0x4040
+#define CL_DEVICE_SIMD_WIDTH_AMD			0x4041
+#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD		0x4042
+#define CL_DEVICE_WAVEFRONT_WIDTH_AMD			0x4043
+#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD		0x4044
+#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD		0x4045
+#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD	0x4046
+#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD	0x4047
+#define CL_DEVICE_LOCAL_MEM_BANKS_AMD			0x4048
+#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD		0x4049
+#define CL_DEVICE_GFXIP_MAJOR_AMD			0x404A
+#define CL_DEVICE_GFXIP_MINOR_AMD			0x404B
+#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD		0x404C
+
+#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
+#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD		1
+
+typedef union
+{
+	struct { cl_uint type; cl_uint data[5]; } raw;
+	struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} cl_device_topology_amd;
+#endif
+
+/* cl_amd_offline_devices */
+#define CL_CONTEXT_OFFLINE_DEVICES_AMD			0x403F
+
+/* cl_ext_device_fission */
+#define cl_ext_device_fission				1
+
+typedef cl_ulong  cl_device_partition_property_ext;
+
+#define CL_DEVICE_PARTITION_EQUALLY_EXT			0x4050
+#define CL_DEVICE_PARTITION_BY_COUNTS_EXT		0x4051
+#define CL_DEVICE_PARTITION_BY_NAMES_EXT		0x4052
+#define CL_DEVICE_PARTITION_BY_NAMES_INTEL		0x4052 /* cl_intel_device_partition_by_names */
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT	0x4053
+
+#define CL_DEVICE_PARENT_DEVICE_EXT			0x4054
+#define CL_DEVICE_PARTITION_TYPES_EXT			0x4055
+#define CL_DEVICE_AFFINITY_DOMAINS_EXT			0x4056
+#define CL_DEVICE_REFERENCE_COUNT_EXT			0x4057
+#define CL_DEVICE_PARTITION_STYLE_EXT			0x4058
+
+#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT			0x1
+#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT			0x2
+#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT			0x3
+#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT			0x4
+#define CL_AFFINITY_DOMAIN_NUMA_EXT			0x10
+#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT		0x100
+
+/* cl_intel_advanced_motion_estimation */
+#define CL_DEVICE_ME_VERSION_INTEL			0x407E
+
+/* cl_qcom_ext_host_ptr */
+#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM		0x40A0
+#define CL_DEVICE_PAGE_SIZE_QCOM			0x40A1
+
+/* cl_khr_spir */
+#define CL_DEVICE_SPIR_VERSIONS				0x40E0
+
+/* cl_altera_device_temperature */
+#define CL_DEVICE_CORE_TEMPERATURE_ALTERA		0x40F3
+
+/* cl_intel_simultaneous_sharing */
+#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL		0x4104
+#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL	0x4105
+
+#endif
diff --git a/src/c_wrapper/device.cpp b/src/c_wrapper/device.cpp
index 5e9ec8fc29423176b6c735450b07ebe6edd9f39d..3e0525cdd3c2d7daeeefb8bd018a267a39b0e8e1 100644
--- a/src/c_wrapper/device.cpp
+++ b/src/c_wrapper/device.cpp
@@ -28,6 +28,27 @@ device::~device()
 #endif
 }
 
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+template<typename... ArgTypes>
+PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info
+get_device_topology_amd(ArgTypes&&... args)
+{
+    const char * tpname = "cl_device_topology_amd*";
+    cl_device_topology_amd value;
+    const char * fname = "clGetDeviceInfo";
+    call_guarded(clGetDeviceInfo, fname, args..., size_arg(value), nullptr);
+    generic_info info;
+    info.dontfree = 0;
+    info.opaque_class = CLASS_NONE;
+    info.type = tpname;
+    info.value = cl_memdup(&value);
+    return info;
+}
+
+#define pyopencl_get_device_topology_amd(...) get_device_topology_amd(__VA_ARGS__)
+
+#endif
+
 generic_info
 device::get_info(cl_uint param_name) const
 {
@@ -143,6 +164,15 @@ device::get_info(cl_uint param_name) const
     case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV:
     case CL_DEVICE_REGISTERS_PER_BLOCK_NV:
     case CL_DEVICE_WARP_SIZE_NV:
+#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV
+    case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV:
+#endif
+#ifdef CL_DEVICE_PCI_BUS_ID_NV
+    case CL_DEVICE_PCI_BUS_ID_NV:
+#endif
+#ifdef CL_DEVICE_PCI_SLOT_ID_NV
+    case CL_DEVICE_PCI_SLOT_ID_NV:
+#endif
         return DEV_GET_INT_INF(cl_uint);
     case CL_DEVICE_GPU_OVERLAP_NV:
     case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:
@@ -218,11 +248,14 @@ device::get_info(cl_uint param_name) const
     case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD:
         return DEV_GET_INT_INF(cl_ulong);
 #endif
-        /* FIXME
-           #ifdef CL_DEVICE_TOPOLOGY_AMD
-           case CL_DEVICE_TOPOLOGY_AMD:
-           #endif
-        */
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+        case CL_DEVICE_TOPOLOGY_AMD:
+            return pyopencl_get_device_topology_amd(PYOPENCL_CL_CASTABLE_THIS, param_name);
+#endif
+#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD
+    case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD:
+        return DEV_GET_INT_INF(cl_bool);
+#endif
 #ifdef CL_DEVICE_BOARD_NAME_AMD
     case CL_DEVICE_BOARD_NAME_AMD: ;
         return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name);
@@ -259,12 +292,45 @@ device::get_info(cl_uint param_name) const
 #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD
     case CL_DEVICE_LOCAL_MEM_BANKS_AMD:
 #endif
-
 #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT
     case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT:
+#endif
+#ifdef CL_DEVICE_GFXIP_MAJOR_AMD
+    case CL_DEVICE_GFXIP_MAJOR_AMD:
+#endif
+#ifdef CL_DEVICE_GFXIP_MINOR_AMD
+    case CL_DEVICE_GFXIP_MINOR_AMD:
+#endif
+#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD
+    case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD:
 #endif
         return DEV_GET_INT_INF(cl_uint);
         // }}}
+#ifdef CL_DEVICE_ME_VERSION_INTEL
+    case CL_DEVICE_ME_VERSION_INTEL:
+#endif
+#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM
+    case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM:
+#endif
+#ifdef CL_DEVICE_PAGE_SIZE_QCOM
+    case CL_DEVICE_PAGE_SIZE_QCOM:
+#endif
+#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL
+    case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL:
+#endif
+        return DEV_GET_INT_INF(cl_uint);
+#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL
+    case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL:
+        return pyopencl_get_array_info(cl_uint, Device, PYOPENCL_CL_CASTABLE_THIS, param_name);
+#endif
+#ifdef CL_DEVICE_SPIR_VERSIONS
+    case CL_DEVICE_SPIR_VERSIONS:
+        return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name);
+#endif
+#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA
+    case CL_DEVICE_CORE_TEMPERATURE_ALTERA:
+        return DEV_GET_INT_INF(cl_int);
+#endif
 
     default:
         throw clerror("Device.get_info", CL_INVALID_VALUE);
diff --git a/src/c_wrapper/pyopencl_ext.h b/src/c_wrapper/pyopencl_ext.h
new file mode 100644
index 0000000000000000000000000000000000000000..4b5e7871e57d7c26a89830e5bc5bec4bb1c8667c
--- /dev/null
+++ b/src/c_wrapper/pyopencl_ext.h
@@ -0,0 +1,34 @@
+#ifndef _PYOPENCL_EXT_H
+#define _PYOPENCL_EXT_H
+
+#ifdef PYOPENCL_USE_SHIPPED_EXT
+
+#include "clinfo_ext.h"
+
+#else
+
+#ifdef __APPLE__
+
+#include <OpenCL/opencl.h>
+
+#else
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
+#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD        1
+
+typedef union
+{
+    struct { cl_uint type; cl_uint data[5]; } raw;
+    struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
+} cl_device_topology_amd;
+#endif
+
+#endif
+
+#endif
+
+#endif
+
diff --git a/src/c_wrapper/wrap_cl.h b/src/c_wrapper/wrap_cl.h
index 98e26963d4c620f8ccdaf2ac12fd2f4a28575901..dbd4115b9e2ccb6f0ac1ff59ff7979112a0710f7 100644
--- a/src/c_wrapper/wrap_cl.h
+++ b/src/c_wrapper/wrap_cl.h
@@ -9,12 +9,13 @@
 
 #include <stdint.h>
 
+#include "pyopencl_ext.h"
+
 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
 
 #ifdef __APPLE__
 
 // {{{ Mac
-#include <OpenCL/opencl.h>
 
 #define PYOPENCL_HAVE_EVENT_SET_CALLBACK
 
@@ -32,9 +33,6 @@
 
 // {{{ elsewhere
 
-#include <CL/cl.h>
-#include <CL/cl_ext.h>
-
 #if defined(_WIN32)
 
 // {{{ Windows
diff --git a/src/c_wrapper/wrap_constants.cpp b/src/c_wrapper/wrap_constants.cpp
index 1a0245505acbc9397af494b8dda17dbd71277b1f..bfa882a63dac7b00c1a79393fc28d0b7660509ce 100644
--- a/src/c_wrapper/wrap_constants.cpp
+++ b/src/c_wrapper/wrap_constants.cpp
@@ -201,6 +201,16 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value))
     ADD_ATTR("device_info", DEVICE_, GPU_OVERLAP_NV);
     ADD_ATTR("device_info", DEVICE_, KERNEL_EXEC_TIMEOUT_NV);
     ADD_ATTR("device_info", DEVICE_, INTEGRATED_MEMORY_NV);
+    // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h
+#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV
+    ADD_ATTR("device_info", DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV);
+#endif
+#ifdef CL_DEVICE_PCI_BUS_ID_NV
+    ADD_ATTR("device_info", DEVICE_, PCI_BUS_ID_NV);
+#endif
+#ifdef CL_DEVICE_PCI_SLOT_ID_NV
+    ADD_ATTR("device_info", DEVICE_, PCI_SLOT_ID_NV);
+#endif
 #endif
 #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD
     ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_OFFSET_AMD);
@@ -242,6 +252,19 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value))
     ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_BANKS_AMD);
 #endif
 
+#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD
+    ADD_ATTR("device_info", DEVICE_, THREAD_TRACE_SUPPORTED_AMD);
+#endif
+#ifdef CL_DEVICE_GFXIP_MAJOR_AMD
+    ADD_ATTR("device_info", DEVICE_, GFXIP_MAJOR_AMD);
+#endif
+#ifdef CL_DEVICE_GFXIP_MINOR_AMD
+    ADD_ATTR("device_info", DEVICE_, GFXIP_MINOR_AMD);
+#endif
+#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD
+    ADD_ATTR("device_info", DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD);
+#endif
+
 #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT
     ADD_ATTR("device_info", DEVICE_, MAX_ATOMIC_COUNTERS_EXT);
 #endif
@@ -280,7 +303,36 @@ void populate_constants(void(*add)(const char*, const char*, int64_t value))
     ADD_ATTR("device_info", DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT);
     ADD_ATTR("device_info", DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT);
 #endif
+    /* cl_intel_advanced_motion_estimation */
+#ifdef CL_DEVICE_ME_VERSION_INTEL
+    ADD_ATTR("device_info", DEVICE_, ME_VERSION_INTEL);
+#endif
 
+    /* cl_qcom_ext_host_ptr */
+#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM
+    ADD_ATTR("device_info", DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM);
+#endif
+#ifdef CL_DEVICE_PAGE_SIZE_QCOM
+    ADD_ATTR("device_info", DEVICE_, PAGE_SIZE_QCOM);
+#endif
+
+    /* cl_khr_spir */
+#ifdef CL_DEVICE_SPIR_VERSIONS
+    ADD_ATTR("device_info", DEVICE_, SPIR_VERSIONS);
+#endif
+
+    /* cl_altera_device_temperature */
+#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA
+    ADD_ATTR("device_info", DEVICE_, CORE_TEMPERATURE_ALTERA);
+#endif
+
+    /* cl_intel_simultaneous_sharing */
+#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL
+    ADD_ATTR("device_info", DEVICE_, SIMULTANEOUS_INTEROPS_INTEL);
+#endif
+#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL
+    ADD_ATTR("device_info", DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL);
+#endif
 
     // device_fp_config
     ADD_ATTR("device_fp_config", FP_, DENORM);
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index e7f86c2e5eddfeeb07de1b17d9f78797c1da2dba..66b1bbc463ea95ac7c3abbe2c97737b897938a9e 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -293,6 +293,15 @@ def test_image_format_constructor():
     assert iform.channel_data_type == cl.channel_type.FLOAT
     assert not iform.__dict__
 
+def test_device_topology_amd_constructor():
+    # doesn't need cl_amd_device_attribute_query support to succeed
+    topol = cl.DeviceTopologyAmd(3,4,5)
+
+    assert topol.bus == 3
+    assert topol.device == 4
+    assert topol.function == 5
+    
+    assert not topol.__dict__
 
 def test_nonempty_supported_image_formats(ctx_factory):
     context = ctx_factory()