diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py
index 9de9cf1ea7b419700911584bab9c64961231f338..7181cfc9eea385c8eafa8c33dc87e1940a452582 100644
--- a/examples/matrix-multiply.py
+++ b/examples/matrix-multiply.py
@@ -187,7 +187,7 @@ push_time = time()-t1
 
 # warmup ----------------------------------------------------------------------
 for i in range(5):
-    event = kernel(queue, h_c.shape, (block_size, block_size), 
+    event = kernel(queue, h_c.shape[::-1], (block_size, block_size), 
             d_c_buf, d_a_buf, d_b_buf)
     event.wait()
 
@@ -198,7 +198,7 @@ t1 = time()
 
 count = 20
 for i in range(count):
-    event = kernel(queue, h_c.shape, (block_size, block_size),
+    event = kernel(queue, h_c.shape[::-1], (block_size, block_size),
             d_c_buf, d_a_buf, d_b_buf)
 
 event.wait()