diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py
index 948c419c7989174d0824c342981f88b6fa4b8e6b..0b509fad8ec2d3ae6a21d5a228e0fd578cb0ab4c 100644
--- a/loopy/codegen/control.py
+++ b/loopy/codegen/control.py
@@ -79,17 +79,7 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state):
         return func(kernel, sched_index, codegen_state)
 
     elif isinstance(sched_item, Barrier):
-        from loopy.codegen import GeneratedInstruction
-        from cgen import Statement as S  # noqa
-
-        if sched_item.comment:
-            comment = " /* %s */" % sched_item.comment
-        else:
-            comment = ""
-
-        return GeneratedInstruction(
-                ast=S("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment),
-                implemented_domain=None)
+        return kernel.target.emit_barrier(sched_item.kind, sched_item.comment)
 
     elif isinstance(sched_item, RunInstruction):
         insn = kernel.id_to_insn[sched_item.insn_id]
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 936366030a239822d77c0265f698edcbd56ef695..eb5c00d299eff02aad714e324fca92d8d3bdbffe 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -244,12 +244,10 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state,
     tag = kernel.iname_to_tag.get(iname)
 
     assert isinstance(tag, UniqueTag)
-    from pymbolic import var
-
     if isinstance(tag, LocalIndexTag):
-        hw_axis_expr = var("lid")(tag.axis)
+        hw_axis_expr = kernel.target.get_local_axis_expr(tag.axis)
     elif isinstance(tag, GroupIndexTag):
-        hw_axis_expr = var("gid")(tag.axis)
+        hw_axis_expr = kernel.target.get_global_axis_expr(tag.axis)
     else:
         raise RuntimeError("unexpected hw tag type")
 
diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py
index a777f458452fe012c8f22226d4a756a03b3e8403..ccc2e378f834a7d77ce9ab431be6349fb594109c 100644
--- a/loopy/kernel/data.py
+++ b/loopy/kernel/data.py
@@ -221,17 +221,8 @@ class GlobalArg(ArrayBase, KernelArgument):
     max_target_axes = 1
 
     def get_arg_decl(self, target, name_suffix, shape, dtype, is_written):
-        from loopy.codegen import POD  # uses the correct complex type
-        from cgen import RestrictPointer, Const
-        from cgen.opencl import CLGlobal
-
-        arg_decl = RestrictPointer(
-                POD(target, dtype, self.name + name_suffix))
-
-        if not is_written:
-            arg_decl = Const(arg_decl)
-
-        return CLGlobal(arg_decl)
+        return target.get_global_arg_decl(self.name + name_suffix, shape,
+                dtype, is_written)
 
 
 class ConstantArg(ArrayBase, KernelArgument):
@@ -239,17 +230,8 @@ class ConstantArg(ArrayBase, KernelArgument):
     max_target_axes = 1
 
     def get_arg_decl(self, target, name_suffix, shape, dtype, is_written):
-        from loopy.codegen import POD  # uses the correct complex type
-        from cgen import RestrictPointer, Const
-        from cgen.opencl import CLConstant
-
-        arg_decl = RestrictPointer(
-                POD(dtype, self.name + name_suffix))
-
-        if not is_written:
-            arg_decl = Const(arg_decl)
-
-        return CLConstant(arg_decl)
+        return target.get_constant_arg_decl(self.name + name_suffix, shape,
+                dtype, is_written)
 
 
 class ImageArg(ArrayBase, KernelArgument):
@@ -261,13 +243,8 @@ class ImageArg(ArrayBase, KernelArgument):
         return len(self.dim_tags)
 
     def get_arg_decl(self, target, name_suffix, shape, dtype, is_written):
-        if is_written:
-            mode = "w"
-        else:
-            mode = "r"
-
-        from cgen.opencl import CLImage
-        return CLImage(self.num_target_axes(), mode, self.name+name_suffix)
+        return target.get_image_arg_decl(self.name + name_suffix, shape,
+                dtype, is_written)
 
 
 class ValueArg(KernelArgument):
diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py
index d36c39044cb4448d59e5dae78f5cca13afb06d11..4c54570aa8d153c73954124a38e79980e84688bf 100644
--- a/loopy/target/__init__.py
+++ b/loopy/target/__init__.py
@@ -52,11 +52,7 @@ class TargetBase(object):
 
     # }}}
 
-    def preprocess(self, kernel):
-        return kernel
-
-    def pre_codegen_check(self, kernel):
-        pass
+    # {{{ library
 
     def function_manglers(self):
         return []
@@ -67,10 +63,24 @@ class TargetBase(object):
     def preamble_generators(self):
         return []
 
-    def get_or_register_dtype(self, names, dtype=None):
-        raise NotImplementedError()
+    # }}}
+
+    # {{{ top-level codegen
+
+    def preprocess(self, kernel):
+        return kernel
+
+    def pre_codegen_check(self, kernel):
+        pass
+
+    def generate_code(self, kernel, codegen_state, impl_arg_info):
+        pass
+
+    # }}}
 
-    def dtype_to_typename(self, dtype):
+    # {{{ types
+
+    def get_dtype_registry(self):
         raise NotImplementedError()
 
     def is_vector_dtype(self, dtype):
@@ -82,3 +92,30 @@ class TargetBase(object):
     def alignment_requirement(self, type_decl):
         import struct
         return struct.calcsize(type_decl.struct_format())
+
+    # }}}
+
+    # {{{ code generation guts
+
+    def get_global_axis_expr(self, axis):
+        raise NotImplementedError()
+
+    def get_local_axis_expr(self, axis):
+        raise NotImplementedError()
+
+    def emit_barrier(self, kind, comment):
+        """
+        :arg kind: ``"local"`` or ``"global"``
+        :return: a :class:`loopy.codegen.GeneratedInstruction`.
+        """
+        raise NotImplementedError()
+
+    def get_global_arg_decl(self, name, shape, dtype, is_written):
+        raise NotImplementedError()
+
+    def get_image_arg_decl(self, name, shape, dtype, is_written):
+        raise NotImplementedError()
+
+    # }}}
+
+# vim: foldmethod=marker
diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py
index 628e5d9ac14714f58dd6b68e3f8b605880b1f19b..e98c2af0055ac6c9535765c52bf57fdc951084c5 100644
--- a/loopy/target/c/__init__.py
+++ b/loopy/target/c/__init__.py
@@ -197,4 +197,18 @@ class CTarget(TargetBase):
 
         return body, gen_code.implemented_domains
 
+    def get_global_arg_decl(self, name, shape, dtype, is_written):
+        from loopy.codegen import POD  # uses the correct complex type
+        from cgen import RestrictPointer, Const
+
+        arg_decl = RestrictPointer(
+                POD(self, dtype, name))
+
+        if not is_written:
+            arg_decl = Const(arg_decl)
+
+        return arg_decl
+
     # }}}
+
+# vim: foldmethod=marker
diff --git a/loopy/target/ispc/__init__.py b/loopy/target/ispc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd07db13ff405c2650db47c5a46026d847fe812
--- /dev/null
+++ b/loopy/target/ispc/__init__.py
@@ -0,0 +1,85 @@
+"""Target for Intel ISPC."""
+
+from __future__ import division, absolute_import
+
+__copyright__ = "Copyright (C) 2015 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+
+import numpy as np  # noqa
+from loopy.target.c import CTarget
+from loopy.diagnostic import LoopyError
+
+from pymbolic import var
+
+
+class ISPCTarget(CTarget):
+    def get_global_axis_expr(self, axis):
+        return var("taskIndex%d" % axis)
+
+    def get_local_axis_expr(self, axis):
+        if axis == 0:
+            return var("programIndex")
+        else:
+            raise LoopyError("ISPC only supports one local axis")
+
+    def emit_barrier(self, kind, comment):
+        from loopy.codegen import GeneratedInstruction
+        from cgen import Comment, Statement
+
+        assert comment
+
+        if kind == "local":
+            return GeneratedInstruction(
+                    ast=Comment("local barrier: %s" % comment),
+                    implemented_domain=None)
+
+        elif kind == "global":
+            return GeneratedInstruction(
+                    ast=Statement("sync; /* %s */" % comment),
+                    implemented_domain=None)
+
+        else:
+            raise LoopyError("unknown barrier kind")
+
+    def get_global_arg_decl(self, name, shape, dtype, is_written):
+        from loopy.codegen import POD  # uses the correct complex type
+        from cgen import Const
+        from cgen.ispc import ISPCUniformPointer
+
+        arg_decl = ISPCUniformPointer(POD(self, dtype, name))
+
+        if not is_written:
+            arg_decl = Const(arg_decl)
+
+        return arg_decl
+
+    # }}}
+
+# TODO: Fix argument wrapping (value,
+# TODO: Fix local variable wrapping
+# TODO: Fix local variable alloc
+# TODO: Top-level foreach
+# TODO: Generate launch code
+# TODO: Vector types
+
+# vim: foldmethod=marker
diff --git a/loopy/target/opencl/__init__.py b/loopy/target/opencl/__init__.py
index d038c329a9eff73a95458fde44078c26f3dbbc56..4a39e52453ab24beb5f627127435c0124d435803 100644
--- a/loopy/target/opencl/__init__.py
+++ b/loopy/target/opencl/__init__.py
@@ -28,6 +28,9 @@ import numpy as np
 
 from loopy.target.c import CTarget
 from pytools import memoize_method
+from loopy.diagnostic import LoopyError
+
+from pymbolic import var
 
 
 # {{{ vector types
@@ -192,6 +195,8 @@ def opencl_preamble_generator(kernel, seen_dtypes, seen_functions):
 # {{{ target
 
 class OpenCLTarget(CTarget):
+    # {{{ library
+
     def function_manglers(self):
         return (
                 super(OpenCLTarget, self).function_manglers() + [
@@ -212,6 +217,8 @@ class OpenCLTarget(CTarget):
                     reduction_preamble_generator
                     ])
 
+    # }}}
+
     @memoize_method
     def get_dtype_registry(self):
         from loopy.target.c.compyte.dtypes import (DTypeRegistry,
@@ -232,6 +239,10 @@ class OpenCLTarget(CTarget):
     def vector_dtype(self, base, count):
         return vec.types[base, count]
 
+    # }}}
+
+    # {{{ top-level codegen
+
     def wrap_function_declaration(self, kernel, fdecl):
         from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
         return CLRequiredWorkGroupSize(
@@ -269,6 +280,64 @@ class OpenCLTarget(CTarget):
 
         return body, implemented_domains
 
+    # }}}
+
+    # {{{ code generation guts
+
+    def get_global_axis_expr(self, axis):
+        return var("gid")(axis)
+
+    def get_local_axis_expr(self, axis):
+        return var("lid")(axis)
+
+    def emit_barrier(self, kind, comment):
+        """
+        :arg kind: ``"local"`` or ``"global"``
+        :return: a :class:`loopy.codegen.GeneratedInstruction`.
+        """
+        if kind == "local":
+            if comment:
+                comment = "/* %s */" % comment
+
+            from loopy.codegen import GeneratedInstruction
+            from cgen import Statement
+            return GeneratedInstruction(
+                    ast=Statement("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment),
+                    implemented_domain=None)
+        elif kind == "global":
+            raise LoopyError("OpenCL does not have global barriers")
+        else:
+            raise LoopyError("unknown barrier kind")
+
+    def get_global_arg_decl(self, name, shape, dtype, is_written):
+        from cgen.opencl import CLGlobal
+
+        return CLGlobal(super(OpenCLTarget, self).get_global_arg_decl(
+            name, shape, dtype, is_written))
+
+    def get_image_arg_decl(self, name, shape, dtype, is_written):
+        if is_written:
+            mode = "w"
+        else:
+            mode = "r"
+
+        from cgen.opencl import CLImage
+        return CLImage(self.num_target_axes(), mode, name)
+
+    def get_arg_decl(self, name, shape, dtype, is_written):
+        from loopy.codegen import POD  # uses the correct complex type
+        from cgen import RestrictPointer, Const
+        from cgen.opencl import CLConstant
+
+        arg_decl = RestrictPointer(POD(dtype, name))
+
+        if not is_written:
+            arg_decl = Const(arg_decl)
+
+        return CLConstant(arg_decl)
+
+    # }}}
+
 # }}}
 
 # vim: foldmethod=marker
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 7dc080e2d1d0c0a81151fe558f63661768c9ef8b..81c0dd5535cc2d6a4337cddc5ba2c8b6a394ebc3 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2313,6 +2313,30 @@ def test_collect_common_factors(ctx_factory):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=13))
 
 
+def test_ispc_backend():
+    from loopy.target.ispc import ISPCTarget
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            "out[i] = 2*a[i]",
+            [
+                # Tests that comma'd arguments interoperate with
+                # argument guessing.
+                lp.GlobalArg("out,a", np.float32, shape=lp.auto),
+                "..."
+                ],
+            target=ISPCTarget())
+
+    knl = lp.split_iname(knl, "i", 128, inner_tag="l.0")
+    knl = lp.split_iname(knl, "i_outer", 4, outer_tag="g.0", inner_tag="ilp")
+    knl = lp.add_prefetch(knl, "a", ["i_inner", "i_outer_inner"])
+
+    print(
+            lp.generate_code(
+                lp.get_one_scheduled_kernel(
+                    lp.preprocess_kernel(knl)))[0])
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])