diff --git a/doc/source/array.rst b/doc/source/array.rst
index fc05a353563e607830ac7732fc47d964b1e6e8e2..35c8166d6da8ed8fedfcc20cd8cde73aebc6598e 100644
--- a/doc/source/array.rst
+++ b/doc/source/array.rst
@@ -90,7 +90,7 @@ The :class:`GPUArray` Array Class
         of :attr:`size` instead of its current value. The change was made in order
         to match :mod:`numpy`.
 
-    .. method :: reshape(shape)
+    .. method :: reshape(shape, order="C")
 
         Returns an array containing the same data with a new shape.
 
@@ -273,15 +273,34 @@ Constructing :class:`GPUArray` Instances
     Same as :func:`empty`, but the :class:`GPUArray` is zero-initialized before
     being returned.
 
-.. function:: empty_like(other_ary)
+.. function:: empty_like(other_ary, dtype=None, order="K")
 
     Make a new, uninitialized :class:`GPUArray` having the same properties
-    as *other_ary*.
+    as *other_ary*.  The *dtype* and *order* attributes allow these aspects to
+    be set independently of their values in *other_ary*.  For *order*, "A"
+    means retain Fortran-ordering if the input is Fortran-contiguous, otherwise
+    use "C" ordering.  The default, *order* or "K" tries to match the strides
+    of *other_ary* as closely as possible.
 
-.. function:: zeros_like(other_ary)
+.. function:: zeros_like(other_ary, dtype=None, order="K")
 
     Make a new, zero-initialized :class:`GPUArray` having the same properties
-    as *other_ary*.
+    as *other_ary*.  The *dtype* and *order* attributes allow these aspects to
+    be set independently of their values in *other_ary*.  For *order*, "A"
+    means retain Fortran-ordering if the input is Fortran-contiguous, otherwise
+    use "C" ordering.  The default, *order* or "K" tries to match the strides
+    of *other_ary* as closely as possible.
+
+.. function:: ones_like(other_ary, dtype=None, order="K")
+
+    Make a new, ones-initialized :class:`GPUArray` having the same properties
+    as *other_ary*.  The *dtype* and *order* attributes allow these aspects to
+    be set independently of their values in *other_ary*.  For *order*, "A"
+    means retain Fortran-ordering if the input is Fortran-contiguous, otherwise
+    use "C" ordering.  The default, *order* or "K" tries to match the strides
+    of *other_ary* as closely as possible.
+
+    .. versionadded: 2017.2
 
 .. function:: arange(start, stop, step, dtype=None, stream=None)
 
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index d8e73dc3ca3a38048232531d6e1b9cdec842f5b3..572eeaa9c47ccf8cb182c6a238711f5e272929dd 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -1,6 +1,18 @@
 Changes
 =======
 
+Version 2017.2
+--------------
+
+* :func:`zeros_like` and :func:`empty_like` now have  *dtype* and *order*
+  arguments as in numpy.  Previously these routines always returned a
+  C-order array.  The new default behavior follows the numpy default, which is
+  to match the order and strides of the input as closely as possible.
+* A :func:`ones_like` gpuarray function was added.
+* methods :attr:`GPUArray.imag`, :attr:`GPUArray.real`, :meth:`GPUArray.conj`
+  now all return Fortran-ordered arrays when the :class:`GPUArray` is
+  Fortran-ordered.
+
 Version 2016.2
 --------------
 .. note::
diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
index eda944d86d2f73200be48646b1c3f498623d39a8..3f6fd32dfd5bd9377286e5336d09d68d82bec4da 100644
--- a/pycuda/gpuarray.py
+++ b/pycuda/gpuarray.py
@@ -202,6 +202,7 @@ class GPUArray(object):
         self.strides = strides
         self.mem_size = self.size = s
         self.nbytes = self.dtype.itemsize * self.size
+        self.itemsize = self.dtype.itemsize
 
         self.allocator = allocator
         if gpudata is None:
@@ -1011,44 +1012,57 @@ def zeros(shape, dtype, allocator=drv.mem_alloc, order="C"):
     return result
 
 
-def empty_like(other_ary, dtype=None, order='K'):
-    if order == 'K':
-        if other_ary.flags.f_contiguous:
+def _array_like_helper(other_ary, dtype, order):
+    """Set order, strides, dtype as in numpy's zero_like. """
+    strides = None
+    if order == "A":
+        if other_ary.flags.f_contiguous and not other_ary.flags.c_contiguous:
             order = "F"
         else:
             order = "C"
+    elif order == "K":
+        if other_ary.flags.c_contiguous or (other_ary.ndim <= 1):
+            order = "C"
+        elif other_ary.flags.f_contiguous:
+            order = "F"
+        else:
+            # array_like routines only return positive strides
+            strides = [np.abs(s) for s in other_ary.strides]
+            if dtype is not None and dtype != other_ary.dtype:
+                # scale strides by itemsize when dtype is not the same
+                itemsize = other_ary.nbytes // other_ary.size
+                itemsize_ratio = np.dtype(dtype).itemsize / itemsize
+                strides = [int(s*itemsize_ratio) for s in strides]
+    elif order not in ["C", "F"]:
+        raise ValueError("Unsupported order: %r" % order)
     if dtype is None:
         dtype = other_ary.dtype
+    return dtype, order, strides
+
+
+def empty_like(other_ary, dtype=None, order="K"):
+    dtype, order, strides = _array_like_helper(other_ary, dtype, order)
     result = GPUArray(
-            other_ary.shape, dtype, other_ary.allocator, order=order)
+            other_ary.shape, dtype, other_ary.allocator, order=order,
+            strides=strides)
     return result
 
 
-def zeros_like(other_ary, dtype=None, order='K'):
-    if order == 'K':
-        if other_ary.flags.f_contiguous:
-            order = "F"
-        else:
-            order = "C"
-    if dtype is None:
-        dtype = other_ary.dtype
+def zeros_like(other_ary, dtype=None, order="K"):
+    dtype, order, strides = _array_like_helper(other_ary, dtype, order)
     result = GPUArray(
-            other_ary.shape, dtype, other_ary.allocator, order=order)
+            other_ary.shape, dtype, other_ary.allocator, order=order,
+            strides=strides)
     zero = np.zeros((), result.dtype)
     result.fill(zero)
     return result
 
 
-def ones_like(other_ary, dtype=None, order='K'):
-    if order == 'K':
-        if other_ary.flags.f_contiguous:
-            order = "F"
-        else:
-            order = "C"
-    if dtype is None:
-        dtype = other_ary.dtype
+def ones_like(other_ary, dtype=None, order="K"):
+    dtype, order, strides = _array_like_helper(other_ary, dtype, order)
     result = GPUArray(
-            other_ary.shape, dtype, other_ary.allocator, order=order)
+            other_ary.shape, dtype, other_ary.allocator, order=order,
+            strides=strides)
     one = np.ones((), result.dtype)
     result.fill(one)
     return result
diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py
index 39326eedb85a9e31eaa748519f59e1c5a3468630..0a3218e1e5b849a9df165bd64dad3b95b709bd2f 100644
--- a/test/test_gpuarray.py
+++ b/test/test_gpuarray.py
@@ -1085,6 +1085,56 @@ class TestGPUArray:
         assert np.allclose(a_gpu.get(), a)
         assert np.allclose(a_gpu[1:3,1:3,1:3].get(), a[1:3,1:3,1:3])
 
+    @mark_cuda_test
+    def test_zeros_like_etc(self):
+        shape = (16, 16)
+        a = np.random.randn(*shape).astype(np.float32)
+        z = gpuarray.to_gpu(a)
+        zf = gpuarray.to_gpu(np.asfortranarray(a))
+        a_noncontig = np.arange(3*4*5).reshape(3, 4, 5).swapaxes(1, 2)
+        z_noncontig = gpuarray.to_gpu(a_noncontig)
+        for func in [gpuarray.empty_like,
+                     gpuarray.zeros_like,
+                     gpuarray.ones_like]:
+            for arr in [z, zf, z_noncontig]:
+
+                contig = arr.flags.c_contiguous or arr.flags.f_contiguous
+
+                # Output matches order of input.
+                # Non-contiguous becomes C-contiguous
+                new_z = func(arr, order="A")
+                if contig:
+                    assert new_z.flags.c_contiguous == arr.flags.c_contiguous
+                    assert new_z.flags.f_contiguous == arr.flags.f_contiguous
+                else:
+                    assert new_z.flags.c_contiguous is True
+                    assert new_z.flags.f_contiguous is False
+                assert new_z.dtype == arr.dtype
+                assert new_z.shape == arr.shape
+
+                # Force C-ordered output
+                new_z = func(arr, order="C")
+                assert new_z.flags.c_contiguous is True
+                assert new_z.flags.f_contiguous is False
+                assert new_z.dtype == arr.dtype
+                assert new_z.shape == arr.shape
+
+                # Force Fortran-orded output
+                new_z = func(arr, order="F")
+                assert new_z.flags.c_contiguous is False
+                assert new_z.flags.f_contiguous is True
+                assert new_z.dtype == arr.dtype
+                assert new_z.shape == arr.shape
+
+                # Change the dtype, but otherwise match order & strides
+                # order = "K" so non-contiguous array remains non-contiguous
+                new_z = func(arr, dtype=np.complex64, order="K")
+                assert new_z.flags.c_contiguous == arr.flags.c_contiguous
+                assert new_z.flags.f_contiguous == arr.flags.f_contiguous
+                assert new_z.dtype == np.complex64
+                assert new_z.shape == arr.shape
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.
     import pycuda.autoinit  # noqa