diff --git a/bpl-subset b/bpl-subset
index 381e9410e2b380ec44920ca10b4252506ab507d0..3702fb119804dddde5eaf4a254822b891a947104 160000
--- a/bpl-subset
+++ b/bpl-subset
@@ -1 +1 @@
-Subproject commit 381e9410e2b380ec44920ca10b4252506ab507d0
+Subproject commit 3702fb119804dddde5eaf4a254822b891a947104
diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
index 100d21cc7a75325d93295bdc8a89d0bf3e439b01..6df39063e9919a4518f4a3f41e149265ec21db25 100644
--- a/pycuda/gpuarray.py
+++ b/pycuda/gpuarray.py
@@ -1066,7 +1066,7 @@ class GPUArray:
             shape=tuple(new_shape),
             dtype=self.dtype,
             allocator=self.allocator,
-            base=self.base or self,
+            base=self if self.base is None else self.base,
             gpudata=self.gpudata,
             strides=tuple(new_strides),
         )
diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py
index 50562e53429418e3ce366b7bcf18bf7b6db25269..b5e683c4a3bf0f6903e864930163a5eb5465830d 100644
--- a/test/test_gpuarray.py
+++ b/test/test_gpuarray.py
@@ -1539,6 +1539,13 @@ class TestGPUArray:
         assert np.max(reference) == 0
         assert np.allclose(result[2**32:], np.arange(1, 12+1))
 
+    def test_noncontig_transpose(self):
+        # https://github.com/inducer/pycuda/issues/385
+        d = gpuarray.zeros((1000, 15, 2048), "f")
+        d.transpose(axes=(1, 0, 2))  # works
+        d2 = d[:, 7:9, :]  # non C-contiguous
+        d2.transpose(axes=(1, 0, 2))  # crashes for recent versions
+
 
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.