diff --git a/doc/references.rst b/doc/references.rst
index 48de48eeff40eff7a01a2abd2da3032f4e4c2003..e56859ee9b77554770d99761f0ed7d394019c9c2 100644
--- a/doc/references.rst
+++ b/doc/references.rst
@@ -15,3 +15,8 @@ References
     Nodal Discontinuous Galerkin Methods: Algorithms, Analysis, and Applications, \
     Springer \
     `(doi) <https://doi.org/10.1007/978-0-387-72067-8>`__
+
+.. [Chan_2016] J. Chan, R. J. Hewett, and T. Warburton (2016), \
+    Weight-Adjusted Discontinuous Galerkin Methods: Curvilinear Meshes, \
+    SIAM J Sci Comput \
+    `(doi) <https://doi.org/10.1137/16M1089198>`__
diff --git a/grudge/op.py b/grudge/op.py
index c784537a9ac10b8e4ee06b106135f6dca7e2df54..152e466dae698a5edf77670013ef11f04ff40fb8 100644
--- a/grudge/op.py
+++ b/grudge/op.py
@@ -795,7 +795,7 @@ def inverse_mass(dcoll: DiscretizationCollection, vec):
     scaling factor (see :func:`grudge.geometry.area_element`).
 
     For non-affine :math:`E`, :math:`J^e` is not constant. In this case, a
-    weight-adjusted approximation is used instead:
+    weight-adjusted approximation is used instead following [Chan_2016]_:
 
     .. math::