diff --git a/loopy/__init__.py b/loopy/__init__.py
index ae0a6d4ae4a14fde1ba9520d9febba4e28a3fcff..dd8d2147cb64de31cb25e246c259740834c33cff 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -310,6 +310,95 @@ def split_dimension(kernel, iname, inner_length,
 
 
 
+def join_dimensions(kernel, inames, new_iname=None, tag=AutoFitLocalIndexTag()):
+    """
+    :arg inames: fastest varying last
+    """
+
+    # now fastest varying first
+    inames = inames[::-1]
+
+    if new_iname is None:
+        new_iname = kernel.make_unique_var_name("_and_".join(inames))
+
+    new_domain = kernel.domain
+    new_dim_idx = new_domain.dim(dim_type.set)
+    new_domain = new_domain.add_dims(dim_type.set, 1)
+    new_domain = new_domain.set_dim_name(dim_type.set, new_dim_idx, new_iname)
+
+    joint_aff = zero = isl.Aff.zero_on_domain(kernel.space)
+    subst_dict = {}
+    base_divisor = 1
+
+    from pymbolic import var
+
+    for i, iname in enumerate(inames):
+        iname_dt, iname_idx = zero.get_space().get_var_dict()[iname]
+        iname_aff = zero.add_coefficient(iname_dt, iname_idx, 1)
+
+        joint_aff = joint_aff + base_divisor*iname_aff
+
+        bounds = kernel.get_iname_bounds(iname)
+
+        from loopy.isl_helpers import (
+                static_max_of_pw_aff, static_value_of_pw_aff)
+        from loopy.symbolic import pw_aff_to_expr
+
+        length = int(pw_aff_to_expr(
+            static_max_of_pw_aff(bounds.size, constants_only=True)))
+        lower_bound_aff = static_value_of_pw_aff(
+                bounds.lower_bound_pw_aff.coalesce(),
+                constants_only=False)
+
+        my_val = var(new_iname) // base_divisor
+        if i+1 < len(inames):
+            my_val %= length
+        my_val += pw_aff_to_expr(lower_bound_aff)
+        subst_dict[iname] = my_val
+
+        base_divisor *= length
+
+    from loopy.isl_helpers import iname_rel_aff
+    new_domain = new_domain.add_constraint(
+            isl.Constraint.equality_from_aff(
+                iname_rel_aff(new_domain.get_space(), new_iname, "==", joint_aff)))
+
+    for i, iname in enumerate(inames):
+        iname_to_dim = new_domain.get_space().get_var_dict()
+        iname_dt, iname_idx = iname_to_dim[iname]
+        new_domain = new_domain.eliminate(iname_dt, iname_idx, 1)
+        new_domain = new_domain.remove_dims(iname_dt, iname_idx, 1)
+
+    from loopy.symbolic import SubstitutionMapper
+    from pymbolic.mapper.substitutor import make_subst_func
+    subst_map = SubstitutionMapper(make_subst_func(subst_dict))
+
+    def subst_forced_iname_deps(fid):
+        result = set()
+        for iname in fid:
+            if iname in inames:
+                result.add(new_iname)
+            else:
+                result.add(iname)
+
+        return result
+
+    new_insns = [
+            insn.copy(
+                assignee=subst_map(insn.assignee),
+                expression=subst_map(insn.expression),
+                forced_iname_deps=subst_forced_iname_deps(insn.forced_iname_deps))
+            for insn in kernel.instructions]
+
+    result = kernel.copy(
+            instructions=new_insns,
+            domain=new_domain)
+
+    return tag_dimensions(result, {new_iname: tag})
+
+
+
+
 def tag_dimensions(kernel, iname_to_tag):
     from loopy.kernel import parse_tag