diff --git a/pycuda/driver.py b/pycuda/driver.py
index fd042a758436fcbbd3a658f47ad71442a5fa62ef..916d0980e5c6f686b0a9c58a01ccfa9e3b1f66aa 100644
--- a/pycuda/driver.py
+++ b/pycuda/driver.py
@@ -188,7 +188,7 @@ def _add_functionality():
             if isinstance(arg, np.number):
                 arg_data.append(arg)
                 format += arg.dtype.char
-            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)):
+            elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation, TextureObject, SurfaceObject)):
                 arg_data.append(int(arg))
                 format += "P"
             elif isinstance(arg, ArgumentHandler):
diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index da60bd5756508e599cb2c54d643e4c0f60d17036..d04111c720e304a1c9a8dc9334624eb9ca456454 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -37,6 +37,7 @@
 #include <boost/thread/thread.hpp>
 #include <boost/thread/tss.hpp>
 #include <boost/version.hpp>
+#include <numpy/arrayobject.h>
 
 #if (BOOST_VERSION/100) < 1035
 #warning *****************************************************************
@@ -1190,6 +1191,279 @@ namespace pycuda
 
   // }}}
 
+  // {{{ bindless textures
+
+#if CUDAPP_CUDA_VERSION >= 6000
+
+  template <class R, class T>
+  void tuple_to_array(T *dst, py::tuple src_py, unsigned n)
+  {
+    pycuda_size_t src_length = py::len(src_py);
+    if (src_length != n)
+        throw pycuda::error(
+            "setter:", CUDA_ERROR_INVALID_VALUE,
+            "incorrect number of elements in tuple");
+
+    for (unsigned i = 0; i < src_length; ++i)
+      {
+        R value = py::extract<R>(src_py[i]);
+        dst[i] = (T)(value);
+      }
+  }
+
+  template <class R, class T>
+  py::tuple array_to_tuple_3(T *src)
+  {
+    return py::make_tuple((R)src[0], (R)src[1], (R)src[2]);
+  }
+
+  template <class R, class T>
+  py::tuple array_to_tuple_4(T *src)
+  {
+    return py::make_tuple((R)src[0], (R)src[1], (R)src[2], (R)src[3]);
+  }
+
+  // {{{ texture descriptor
+
+  struct WRAP_CUDA_TEXTURE_DESC : public CUDA_TEXTURE_DESC
+  {
+      WRAP_CUDA_TEXTURE_DESC()
+      {
+        // This class is a standard layout type. CUDA_TEXTURE_DESC
+        // needs to be memset to 0. When I made it a member
+        // of this class, wrapping of members such as .fields
+        // failed. So unorthodox as this might be, deriving
+        // a class and memset(this) works and is permissible because
+        // of standard layout.
+        // It can be verified that this class is standard layout by:
+        // #include <type_traits>
+        //printf("traits: %d\n", std::is_standard_layout<WRAP_CUDA_TEXTURE_DESC>::value);
+        // Do not add *any* member variables or virtual functions.
+        // That will make it not standard layout.
+        memset(this, 0, sizeof(CUDA_TEXTURE_DESC));
+      }
+
+      py::tuple get_address_mode()
+      {
+        return array_to_tuple_3<int>(addressMode);
+      }
+
+      void set_address_mode(py::tuple src_py)
+      {
+        tuple_to_array<int>(addressMode, src_py, 3);
+      }
+
+      py::tuple get_border_color()
+      {
+        return array_to_tuple_4<float>(borderColor);
+      }
+
+      void set_border_color(py::tuple src_py)
+      {
+        tuple_to_array<float>(borderColor, src_py, 4);
+      }
+  };
+
+  // }}}
+
+  // {{{ surface object
+
+  class surface_object : public boost::noncopyable
+  {
+    private:
+      boost::shared_ptr<array> m_array;
+      CUsurfObject m_cusurf;
+
+    public:
+      surface_object(boost::shared_ptr<array> ary)
+        : m_array(ary), m_cusurf(0)
+      {
+        CUDA_RESOURCE_DESC resdesc;
+        memset(&resdesc, 0, sizeof(resdesc));
+
+        resdesc.resType = CU_RESOURCE_TYPE_ARRAY;
+        resdesc.res.array.hArray = m_array->handle();
+        CUDAPP_CALL_GUARDED(cuSurfObjectCreate, (&m_cusurf, &resdesc));
+      }
+
+      CUsurfObject to_int()
+      {
+        // CUsurfObject is a typedef for unsigned long long
+        return m_cusurf;
+      }
+
+      ~surface_object()
+      {
+        CUDAPP_CALL_GUARDED_CLEANUP(cuSurfObjectDestroy, (m_cusurf));
+      }
+
+  };
+
+  // }}}
+
+  // {{{ texture object
+
+  class texture_object : public boost::noncopyable
+  {
+     private:
+       // either a numpy array or a CUDA array
+       py::object m_array;
+       CUtexObject m_cutex;
+
+    public:
+       texture_object(py::object ary, const CUDA_TEXTURE_DESC & texdesc)
+         : m_array(ary), m_cutex(0)
+       {
+         CUDA_RESOURCE_DESC resdesc;
+         memset(&resdesc, 0, sizeof(resdesc));
+
+         py::extract<array> extract_array(ary);
+         if (extract_array.check())
+         {
+           const array & pycuda_array = extract_array();
+           resdesc.resType = CU_RESOURCE_TYPE_ARRAY;
+           resdesc.res.array.hArray = pycuda_array.handle();
+           CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL));
+           return;
+         }
+
+         if (!PyArray_Check(m_array.ptr()))
+           throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE,
+               "ary argument is not a numpy array");
+
+         PyArrayObject* np_a = (PyArrayObject*)(m_array.ptr());
+         void * data = (float*)PyArray_DATA(np_a);
+         unsigned memtype;
+         CUDAPP_CALL_GUARDED(cuPointerGetAttribute, (&memtype, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)data));
+         if(memtype != CU_MEMORYTYPE_DEVICE)
+           throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE,
+               "ary is not device memory");
+
+         int ndim = PyArray_NDIM(np_a);
+         int item_type = PyArray_TYPE(np_a);
+         int item_size = PyArray_ITEMSIZE(np_a);
+         npy_intp* shape_v = PyArray_SHAPE(np_a);
+         npy_intp* stride_v = PyArray_STRIDES(np_a);
+
+         // This used to be a check for !PyArray_ISCONTIGUOUS(ary.ptr())
+         // However, we use views into padded arrays to create the equivalent
+         // of pitch2d memory from managed memory. All views count as non-contiguous.
+         // So we do a different check here just to see if the last dimension stride is
+         // greater than the itemsize. Also as of Jan 2019, textures do not allow
+         // double precision.
+         if ((stride_v[ndim-1] != item_size) || (item_size > sizeof(float)))
+           throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE,
+               "ary argument is not contiguous/texture incompatible");
+
+         int nchan, ndim_effective;
+         if (ndim == 1)
+         {
+           nchan = 1;
+           ndim_effective = 1;
+         }
+         else
+         {
+           nchan = shape_v[ndim - 1];
+           ndim_effective = ndim - 1;
+         }
+
+         if ((nchan < 1) || (nchan > 4))
+           throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE,
+               "number of texture channels should be in {1,2,3,4}");
+         if (!((ndim_effective == 1) || (ndim_effective == 2)))
+           throw pycuda::error(
+               "texture_object", CUDA_ERROR_INVALID_VALUE,
+               "textures are limited to 1 or 2 dimensions of texels");
+
+
+         if (ndim_effective == 1)
+         {
+           resdesc.resType = CU_RESOURCE_TYPE_LINEAR;
+           resdesc.res.linear.format = get_cuarray_format(item_type);
+           resdesc.res.linear.devPtr = (CUdeviceptr)data;
+           resdesc.res.linear.numChannels = nchan;
+           resdesc.res.linear.sizeInBytes = PyArray_NBYTES(np_a);
+         }
+         else
+         {
+           resdesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+           resdesc.res.pitch2D.format = get_cuarray_format(item_type);
+           resdesc.res.pitch2D.devPtr = (CUdeviceptr)data;
+           resdesc.res.pitch2D.height = shape_v[0];
+           if(nchan == 3)
+           {
+             // CUDA does not support 3 channel images even though RGB/BGR
+             // is a common case. For convolutional kernels we can still use
+             // the 3channel image without copying, but we need to consider
+             // it a 3x wide image of 1 channel.
+             resdesc.res.pitch2D.numChannels = 1;
+             resdesc.res.pitch2D.width = shape_v[1] * 3;
+           }
+           else
+           {
+             resdesc.res.pitch2D.numChannels = nchan;
+             resdesc.res.pitch2D.width = shape_v[1];
+           }
+           resdesc.res.pitch2D.pitchInBytes = stride_v[0];
+         }
+
+         CUDAPP_CALL_GUARDED(cuTexObjectCreate, (&m_cutex, &resdesc, &texdesc, NULL));
+       }
+
+
+       CUarray_format get_cuarray_format(int type)
+       {
+         switch (type)
+         {
+             case NPY_INT8:
+               return CU_AD_FORMAT_SIGNED_INT8;
+
+             case NPY_UINT8:
+               return CU_AD_FORMAT_UNSIGNED_INT8;
+
+             case NPY_INT16:
+               return CU_AD_FORMAT_SIGNED_INT16;
+
+             case NPY_UINT16:
+               return CU_AD_FORMAT_UNSIGNED_INT16;
+
+             case NPY_INT32:
+               return CU_AD_FORMAT_SIGNED_INT32;
+
+             case NPY_UINT32:
+               return CU_AD_FORMAT_UNSIGNED_INT32;
+
+             case NPY_FLOAT16:
+               return CU_AD_FORMAT_HALF;
+
+             case NPY_FLOAT32:
+               return CU_AD_FORMAT_FLOAT;
+
+             default:
+               throw pycuda::error("texture_object", CUDA_ERROR_INVALID_VALUE,
+                                   "Unsupported numpy type");
+               break;
+         }
+       }
+
+       CUtexObject to_int()
+       {
+         return m_cutex;
+       }
+
+     ~texture_object()
+     {
+       CUDAPP_CALL_GUARDED_CLEANUP(cuTexObjectDestroy, (m_cutex));
+     }
+
+  };
+
+  // }}}
+
+#endif // CUDAPP_CUDA_VERSION >= 6000
+
+  // }}}
+
   // {{{ surface reference
 #if CUDAPP_CUDA_VERSION >= 3010
   class module;
@@ -1199,7 +1473,6 @@ namespace pycuda
     private:
       CUsurfref m_surfref;
 
-      // life support for array and module
       boost::shared_ptr<array> m_array;
       boost::shared_ptr<module> m_module;
 
@@ -2194,9 +2467,8 @@ namespace pycuda
   }
 #endif
   // }}}
-}
-
 
+}
 
 
 #endif
diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
index dfa3d1cda929f86a186fb6788bd0e438128a0f6f..39467d940208246bd29caf56c4eb2442bf5bdd5d 100644
--- a/src/wrapper/wrap_cudadrv.cpp
+++ b/src/wrapper/wrap_cudadrv.cpp
@@ -260,7 +260,7 @@ namespace
     py_buffer_wrapper buf_wrapper;
     buf_wrapper.get(dest.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);
 
-    CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH, 
+    CUDAPP_CALL_GUARDED_THREADED(cuMemcpyAtoH,
         (buf_wrapper.m_buf.buf, ary.handle(), index, buf_wrapper.m_buf.len));
   }
 
@@ -1348,7 +1348,7 @@ BOOST_PYTHON_MODULE(_driver)
 
     wrp
       .DEF_SIMPLE_METHOD(get_device_pointer)
-      .def("attach", &cl::attach, 
+      .def("attach", &cl::attach,
         (py::arg("mem_flags"), py::arg("stream")=py::object()))
       ;
   }
@@ -1633,6 +1633,41 @@ BOOST_PYTHON_MODULE(_driver)
   }
   // }}}
 
+#if CUDAPP_CUDA_VERSION >= 6000
+  // {{{ texture descriptor
+
+  {
+    typedef WRAP_CUDA_TEXTURE_DESC cl;
+    py::class_<cl>("TextureDescriptor")
+    .add_property("address_mode", &cl::get_address_mode, &cl::set_address_mode)
+    .def_readwrite("filter_mode", &cl::filterMode)
+    .def_readwrite("flags", &cl::flags)
+    .add_property("border_color", &cl::get_border_color, &cl::set_border_color);
+  }
+
+  // }}}
+
+  // {{{ texture object
+
+  {
+    typedef texture_object cl;
+    py::class_<cl, boost::noncopyable>("TextureObject", py::init<py::object, const WRAP_CUDA_TEXTURE_DESC &>())
+    .def("__int__", &cl::to_int);
+  }
+
+  // }}}
+
+  // {{{ surface object
+
+  {
+    typedef surface_object cl;
+    py::class_<cl, boost::noncopyable>("SurfaceObject", py::init<shared_ptr<array> >())
+    .def("__int__", &cl::to_int);
+  }
+
+  // }}}
+#endif
+
   // {{{ surface reference
 #if CUDAPP_CUDA_VERSION >= 3010
   {
@@ -1669,6 +1704,7 @@ BOOST_PYTHON_MODULE(_driver)
 #ifdef HAVE_CURAND
   pycuda_expose_curand();
 #endif
+
 }
 
 // vim: foldmethod=marker
diff --git a/test/test_driver.py b/test/test_driver.py
index b440eff09e9791ea7977afd1cba550f0cebe173f..8425e7e38f90789fdae09d76a6a1dfbb9ea78ef0 100644
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -947,6 +947,86 @@ class TestDriver:
         test_kernel = mod.get_function('test_kernel')
         test_kernel(grid=(2, 1), block=(1, 1, 1))
 
+    @mark_cuda_test
+    def test_bindless_textures(self):
+        if drv.get_version() < (6, 0, 0):
+            pytest.skip("bindless textures not supported on CUDA < 6")
+
+        mod = SourceModule("""
+                __global__ void gpu_add(float *a, float *b, float *res)
+                {
+                    int idx = threadIdx.x + threadIdx.y*16;
+                    res[idx] = a[idx] + b[idx];
+                }
+
+                // texture object is a kernel argument
+                __global__ void tex_kernel1(
+                        cudaTextureObject_t tex, cudaSurfaceObject_t surf)
+                {
+                  int i = blockIdx.x *blockDim.x + threadIdx.x;
+                  float x = tex1Dfetch<float>(tex, i);
+                  // do some work using x ...
+                }
+                """)
+
+        print('TextureObject exists. Members:', dir(drv.TextureObject))
+
+        tdesc = drv.TextureDescriptor()
+        print('TextureDescriptor exists. Members:', dir(tdesc))
+
+        print('Setting address mode')
+        print('before', tdesc.address_mode)
+        tdesc.address_mode = (1, 2, 3)
+        print('after', tdesc.address_mode)
+
+        adesc = drv.ArrayDescriptor()
+        adesc.width = 1024
+        adesc.height = 768
+        adesc.format = drv.array_format.UNSIGNED_INT8
+        adesc.num_channels = 4
+        sarray = drv.Array(adesc)
+        print('sarray made')
+        surf1 = drv.SurfaceObject(sarray)
+        print('tex1 made')
+
+        a_um_a = drv.managed_empty(
+                (16, 16, 4), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL)
+        a_um_a[:] = np.random.randn(*a_um_a.shape)
+
+        import sys
+        print('pre ref_count', sys.getrefcount(a_um_a))
+        tex2 = drv.TextureObject(a_um_a, tdesc)
+        print('post ref_count', sys.getrefcount(a_um_a))
+        print('tex2 made')
+
+        print('post ref_count', sys.getrefcount(a_um_a))
+        print('{0:016x}'.format(id(tex2)), '{0:016x}'.format(int(tex2)))
+
+        func = mod.get_function("tex_kernel1")
+        func(a_um_a, tex2, surf1, block=(16, 16, 1))
+        drv.Context.synchronize()
+
+        # Uniform memory version of hello world.
+        a_um_a = drv.managed_empty(
+                (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL)
+        a_um_a[:] = np.random.randn(*a_um_a.shape)
+
+        b_um_a = drv.managed_empty(
+                (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL)
+        b_um_a[:] = np.random.randn(*b_um_a.shape)
+
+        res_um_a = drv.managed_empty(
+                (16, 16), np.float32, mem_flags=drv.mem_attach_flags.GLOBAL)
+
+        func = mod.get_function("gpu_add")
+        func(a_um_a, b_um_a, res_um_a, block=(16, 16, 1))
+        drv.Context.synchronize()
+
+        res_um_a = res_um_a
+        print(res_um_a)
+
+        assert(np.allclose(res_um_a, a_um_a+b_um_a))
+
 
 def test_import_pyopencl_before_pycuda():
     try: