diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py
index 49ccbf055534dc65fd4d342b97a2d267a3b3bf32..4d7047df47aac397baa2e65b85999519477177c1 100644
--- a/loopy/codegen/__init__.py
+++ b/loopy/codegen/__init__.py
@@ -263,11 +263,12 @@ def generate_code(kernel, with_annotation=False,
 
     # }}}
 
+    from pyopencl.tools import dtype_to_ctype
     mod.extend([
         LiteralLines(r"""
-        #define lid(N) ((int) get_local_id(N))
-        #define gid(N) ((int) get_group_id(N))
-        """),
+        #define lid(N) ((%(idx_ctype)s) get_local_id(N))
+        #define gid(N) ((%(idx_ctype)s) get_group_id(N))
+        """ % dict(idx_ctype=dtype_to_ctype(kernel.index_dtype))),
         Line()])
 
     # {{{ build lmem array declarators for temporary variables
diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py
index c9a9b866070c544c0a7260fe5dd8f2dc8d5f8ac7..5bc5c3586a48bcf7706cead4b23dcee569cac207 100644
--- a/loopy/codegen/bounds.py
+++ b/loopy/codegen/bounds.py
@@ -146,7 +146,8 @@ def wrap_in_bounds_checks(ccm, domain, check_inames, implemented_domain, stmt):
 
     return stmt, new_implemented_domain
 
-def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
+def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt,
+        index_dtype):
     # FIXME add admissible vars
     if isinstance(constraint_bset, isl.Set):
         constraint_bset, = constraint_bset.get_basic_sets()
@@ -191,7 +192,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
         from loopy.codegen import gen_code_block
         from cgen import Initializer, POD, Const, Line
         return gen_code_block([
-            Initializer(Const(POD(np.int32, iname)),
+            Initializer(Const(POD(index_dtype, iname)),
                 ccm(equality_expr, 'i')),
             Line(),
             stmt,
diff --git a/loopy/codegen/expression.py b/loopy/codegen/expression.py
index 82741a249716790eb2ef17ff1f55bdfb9d6a3b84..964d7efc1f6d681f618d0fc7edfc3314cdef4a56 100644
--- a/loopy/codegen/expression.py
+++ b/loopy/codegen/expression.py
@@ -87,7 +87,7 @@ class TypeInferenceMapper(CombineMapper):
             return tv.dtype
 
         if expr.name in self.kernel.all_inames():
-            return np.dtype(np.int16) # don't force single-precision upcast
+            return self.kernel.index_dtype
 
         for mangler in self.kernel.symbol_manglers:
             result = mangler(expr.name)
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 36a24d5c50382443701b9a83c0f7b9ede13afea9..7b4e63c959f070ad047b8d89410696e5f11356bb 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -259,7 +259,8 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
             from cgen import Comment
             result.append(Comment(cmt))
         result.append(
-                wrap_in_for_from_constraints(ccm, iname, slab, inner))
+                wrap_in_for_from_constraints(ccm, iname, slab, inner,
+                    kernel.index_dtype))
 
     return gen_code_block(result)
 
diff --git a/loopy/kernel.py b/loopy/kernel.py
index acbc018111b31de858b166b99dd952ffb052be5a..5e2a060e64c7f2ea0f78d76b1aea697036b84168 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -649,7 +649,8 @@ class LoopKernel(Record):
             applied_iname_rewrites=[],
             cache_manager=None,
             iname_to_tag_requests=None,
-            lowest_priority_inames=[], breakable_inames=set()):
+            lowest_priority_inames=[], breakable_inames=set(),
+            index_dtype=np.int32):
         """
         :arg domain: a :class:`islpy.BasicSet`, or a string parseable to a basic set by the isl.
             Example: "{[i,j]: 0<=i < 10 and 0<= j < 9}"
@@ -895,7 +896,8 @@ class LoopKernel(Record):
                 breakable_inames=breakable_inames,
                 applied_iname_rewrites=applied_iname_rewrites,
                 function_manglers=function_manglers,
-                symbol_manglers=symbol_manglers)
+                symbol_manglers=symbol_manglers,
+                index_dtype=np.dtype(index_dtype))
 
     # {{{ function mangling