diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py
index d206faad5bd84e3a1c7e7c061673f3d5d1144c84..789c00d33b7bb41816e6901e24046d4b0eefb27d 100644
--- a/loopy/codegen/control.py
+++ b/loopy/codegen/control.py
@@ -1,8 +1,6 @@
 """Loop nest build top-level control/hoisting."""
 
-from __future__ import division
-from __future__ import absolute_import
-import six
+from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
 
@@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-
+import six
 from loopy.codegen.result import merge_codegen_results, wrap_in_if
 import islpy as isl
 from loopy.schedule import (
         EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel,
         gather_schedule_block, generate_sub_sched_items)
+from loopy.diagnostic import LoopyError
 
 
 def get_admissible_conditional_inames_for(codegen_state, sched_index):
@@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index):
         return func(codegen_state, sched_index)
 
     elif isinstance(sched_item, Barrier):
+        # {{{ emit barrier code
+
+        from loopy.codegen.result import CodeGenerationResult
+
         if codegen_state.is_generating_device_code:
-            return codegen_state.ast_builder.emit_barrier(
+            barrier_ast = codegen_state.ast_builder.emit_barrier(
                     sched_item.kind, sched_item.comment)
-        from loopy.codegen.result import CodeGenerationResult
-        return CodeGenerationResult(
-                host_program=None,
-                device_programs=[],
-                implemented_domains={},
-                implemented_data_info=codegen_state.implemented_data_info)
+            if sched_item.originating_insn_id:
+                return CodeGenerationResult.new(
+                        codegen_state,
+                        sched_item.originating_insn_id,
+                        barrier_ast,
+                        codegen_state.implemented_domain)
+            else:
+                return barrier_ast
+        else:
+            # host code
+            if sched_item.kind in ["global", "local"]:
+                # host code is assumed globally and locally synchronous
+                return CodeGenerationResult(
+                        host_program=None,
+                        device_programs=[],
+                        implemented_domains={},
+                        implemented_data_info=codegen_state.implemented_data_info)
+
+            else:
+                raise LoopyError("do not know how to emit code for barrier kind '%s'"
+                        "in host code" % sched_item.kind)
+
+        # }}}
 
     elif isinstance(sched_item, RunInstruction):
         insn = kernel.id_to_insn[sched_item.insn_id]
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index 3bffc854603e9d02d0c1b68c05da609aacda6e55..89cb5f26a4940656cca1ab09841311148e113275 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options):
     if special_insn_kind == "gbarrier":
         cls = BarrierInstruction
         kwargs["kind"] = "global"
+    elif special_insn_kind == "lbarrier":
+        cls = BarrierInstruction
+        kwargs["kind"] = "local"
     elif special_insn_kind == "nop":
         cls = NoOpInstruction
     else:
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 08268ca9f27623a6d17a195d3c04acb55e5ec68a..d5c388af60a39987c09092fc93325f067a8f4cf7 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction):
 
     .. attribute:: kind
 
-        A string, currently only ``"global"``.
+        A string, ``"global"`` or ``"local"``.
 
     The textual syntax in a :mod:`loopy` kernel is::
 
         ... gbarrier
+        ... lbarrier
     """
 
     fields = _DataObliviousInstruction.fields | set(["kind"])
@@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction):
             priority=None,
             boostable=None, boostable_into=None,
             predicates=None, tags=None, kind="global"):
-        assert kind == "global"
 
         if predicates:
             raise LoopyError("conditional barriers are not supported")
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index f27ae49e80bc19989cf6e4e002c03ebb7a637b78..4281e50bd006a3cddf5a3cae0ffffe3d78abcfac 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -732,13 +732,15 @@ def generate_loop_schedules_internal(
 
     # }}}
 
-    # {{{ see if there are pending local barriers in the preschedule
+    # {{{ see if there are pending barriers in the preschedule
 
-    # Local barriers do not have associated instructions, so they need to
-    # be handled separately from instructions.
+    # Barriers that do not have an originating instruction are handled here.
+    # (These are automatically inserted by insert_barriers().) Barriers with
+    # originating instructions are handled as part of normal instruction
+    # scheduling below.
     if (
             isinstance(next_preschedule_item, Barrier)
-            and next_preschedule_item.kind == "local"):
+            and next_preschedule_item.originating_insn_id is None):
         for result in generate_loop_schedules_internal(
                     sched_state.copy(
                         schedule=sched_state.schedule + (next_preschedule_item,),
@@ -814,10 +816,7 @@ def generate_loop_schedules_internal(
         if insn_id in sched_state.prescheduled_insn_ids:
             if isinstance(next_preschedule_item, RunInstruction):
                 next_preschedule_insn_id = next_preschedule_item.insn_id
-            elif (
-                    isinstance(next_preschedule_item, Barrier)
-                    and next_preschedule_item.kind == "global"):
-                assert hasattr(next_preschedule_item, "originating_insn_id")
+            elif isinstance(next_preschedule_item, Barrier):
                 assert next_preschedule_item.originating_insn_id is not None
                 next_preschedule_insn_id = next_preschedule_item.originating_insn_id
             else:
@@ -1606,7 +1605,10 @@ def append_barrier_or_raise_error(schedule, dep, verify_only):
         comment = "for %s (%s)" % (
                 dep.variable, dep.dep_descr.format(
                     tgt=dep.target.id, src=dep.source.id))
-        schedule.append(Barrier(comment=comment, kind=dep.var_kind))
+        schedule.append(Barrier(
+            comment=comment,
+            kind=dep.var_kind,
+            originating_insn_id=None))
 
 
 def insert_barriers(kernel, schedule, kind, verify_only, level=0):
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 78633abbd41408ae700aa8516e8a9c6f70f018a9..3593019ad2ca7e41f7db4c95616184e1e8972125 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction():
     print(k.stringify(with_dependencies=True))
 
 
+def test_literal_local_barrier(ctx_factory):
+    ctx = ctx_factory()
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            """
+            for i
+                ... lbarrier
+            end
+            """, seq_dependencies=True)
+
+    knl = lp.fix_parameters(knl, n=128)
+
+    ref_knl = knl
+
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
+
+
 def test_kernel_splitting(ctx_factory):
     ctx = ctx_factory()
 
@@ -1317,6 +1335,28 @@ def test_save_of_local_array(ctx_factory, debug=False):
     save_and_reload_temporaries_test(queue, knl, np.arange(8), debug)
 
 
+def test_save_of_local_array_with_explicit_local_barrier(ctx_factory, debug=False):
+    ctx = ctx_factory()
+    queue = cl.CommandQueue(ctx)
+
+    knl = lp.make_kernel(
+        "{ [i,j]: 0<=i,j<8 }",
+        """
+        for i, j
+            <>t[2*j] = j
+            ... lbarrier
+            t[2*j+1] = t[2*j]
+            ... gbarrier
+            out[i] = t[2*i]
+        end
+        """, seq_dependencies=True)
+
+    knl = lp.set_temporary_scope(knl, "t", "local")
+    knl = lp.tag_inames(knl, dict(i="g.0", j="l.0"))
+
+    save_and_reload_temporaries_test(queue, knl, np.arange(8), debug)
+
+
 def test_save_local_multidim_array(ctx_factory, debug=False):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)