diff --git a/loopy/__init__.py b/loopy/__init__.py
index fa146961d8bbde486c042dee5540060d9845b76e..4367374cff4f342a2a6689ed47781c1f8e5e880d 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -523,11 +523,13 @@ class _InameDuplicator(ExpandingIdentityMapper):
             return var(new_name)
 
     def map_instruction(self, insn):
+        if not self.within(((insn.id, None),)):
+            return insn
+
         new_fid = frozenset(
                 self.old_to_new.get(iname, iname)
                 for iname in insn.forced_iname_deps)
-        return insn.copy(
-                forced_iname_deps=new_fid)
+        return insn.copy(forced_iname_deps=new_fid)
 
 
 def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
@@ -567,9 +569,11 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
             new_iname = name_gen(new_iname)
 
         else:
+            if name_gen.is_name_conflicting(new_iname):
+                raise ValueError("new iname '%s' conflicts with existing names"
+                        % new_iname)
+
             name_gen.add_name(new_iname)
-            raise ValueError("new iname '%s' conflicts with existing names"
-                    % new_iname)
 
         new_inames[i] = new_iname
 
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index 6a68b04514346d2218ef63a85c7622ab7b5ddcfb..6705c13aea45098500b61e37e7cb066ee3a7cfd3 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -152,6 +152,12 @@ SUBST_RE = re.compile(
 
 
 def parse_insn(insn):
+    """
+    :return: a tuple ``(insn, inames_to_dup)``, where insn is a
+        :class:`ExpressionInstruction` or a :class:`SubstitutionRule`
+        and *inames_to_dup* is None or a list of tuples `(old, new)`.
+    """
+
     insn_match = INSN_RE.match(insn)
     subst_match = SUBST_RE.match(insn)
     if insn_match is not None and subst_match is not None:
@@ -183,6 +189,7 @@ def parse_insn(insn):
         insn_deps = None
         insn_deps_is_final = False
         insn_id = None
+        inames_to_dup = []
         priority = 0
         forced_iname_deps_is_final = False
         forced_iname_deps = frozenset()
@@ -208,6 +215,14 @@ def parse_insn(insn):
                     insn_id = UniqueName(opt_value)
                 elif opt_key == "priority":
                     priority = int(opt_value)
+                elif opt_key == "dup":
+                    for value in opt_value.split(":"):
+                        arrow_idx = value.find("->")
+                        if arrow_idx >= 0:
+                            inames_to_dup.append(
+                                    (value[:arrow_idx], value[arrow_idx+2:]))
+                        else:
+                            inames_to_dup.append((value, None))
 
                 elif opt_key == "dep":
                     if opt_value.startswith("*"):
@@ -254,7 +269,7 @@ def parse_insn(insn):
                     assignee=lhs, expression=rhs,
                     temp_var_type=temp_var_type,
                     priority=priority,
-                    predicates=predicates)
+                    predicates=predicates), inames_to_dup
 
     elif subst_match is not None:
         from pymbolic.primitives import Variable, Call
@@ -280,7 +295,7 @@ def parse_insn(insn):
         return SubstitutionRule(
                 name=subst_name,
                 arguments=tuple(arg_names),
-                expression=rhs)
+                expression=rhs), []
 
 
 def parse_if_necessary(insn, defines):
@@ -1068,15 +1083,23 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
 
     parsed_instructions = []
     kwargs["substitutions"] = substitutions = {}
+    inames_to_dup = []
 
     if isinstance(instructions, str):
         instructions = [instructions]
+
     for insn in instructions:
-        for new_insn in parse_if_necessary(insn, defines):
+        for new_insn, insn_inames_to_dup in parse_if_necessary(insn, defines):
             if isinstance(new_insn, InstructionBase):
                 parsed_instructions.append(new_insn)
+
+                # Need to maintain 1-to-1 correspondence to instructions
+                inames_to_dup.append(insn_inames_to_dup)
+
             elif isinstance(new_insn, SubstitutionRule):
                 substitutions[new_insn.name] = new_insn
+
+                assert not insn_inames_to_dup
             else:
                 raise RuntimeError("unexpected type in instruction parsing")
 
@@ -1112,6 +1135,12 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
             options=options,
             **kwargs)
 
+    from loopy import duplicate_inames
+    for insn, insn_inames_to_dup in zip(knl.instructions, inames_to_dup):
+        for old_iname, new_iname in insn_inames_to_dup:
+            knl = duplicate_inames(knl, old_iname,
+                    within=insn.id, new_inames=new_iname)
+
     check_for_nonexistent_iname_deps(knl)
 
     knl = tag_reduction_inames_as_sequential(knl)