From 33e534f7f56946f2981f2507bfa151abb7c9aa88 Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 21 Feb 2020 17:01:35 -0600
Subject: [PATCH 1/6] Initial commit - first pass at a generator for a
 CVODE-like implicit integrator

---
 leap/cvode/__init__.py | 1090 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1090 insertions(+)
 create mode 100644 leap/cvode/__init__.py

diff --git a/leap/cvode/__init__.py b/leap/cvode/__init__.py
new file mode 100644
index 0000000..4c37633
--- /dev/null
+++ b/leap/cvode/__init__.py
@@ -0,0 +1,1090 @@
+"""CVODE-like implicit solver."""
+
+from __future__ import division
+
+__copyright__ = """
+Copyright (C) 2007 Andreas Kloeckner
+Copyright (C) 2014, 2015 Matt Wala
+Copyright (C) 2020 Cory Mikida
+"""
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+from leap import MethodBuilder
+from pymbolic import var
+
+
+__doc__ = """
+.. autoclass:: CVODEMethodBuilder
+"""
+
+
+# {{{ CVODE-like adaptive implicit integration
+
+def emit_complete_step(cb, name_gen, zn, lvec, acor,
+                       q, qwait, tau, h, tq, t, saved_tq5):
+
+    # This bit of code increments tau and applies
+    # the correction acor to zn, and also does some
+    # things to prep for potential order increase.
+
+    for i in range(q, 1, -1):
+        cb(tau[i], tau[i-1])
+
+    from pymbolic.primitives import Comparison, LogicalAnd
+    with cb.if_(LogicalAnd((Comparison(q, "==", 1), Comparison(t, "!=", 0)))):
+        cb(tau[2], tau[1])
+
+    cb(tau[1], h)
+
+    for i in range(0, q+1):
+        cb(zn[i], zn[i] + lvec[i] * acor)
+
+    cb(qwait, qwait - 1)
+    # FIXME: maxorder
+    with cb.if_(LogicalAnd((Comparison(qwait, "==", 1), Comparison(q, "!=", 5)))):
+        cb(zn[5], acor)
+        cb(saved_tq5, tq[5])
+
+    return zn, saved_tq5, qwait, tau
+
+
+def emit_prepare_step(cb, name_gen, tq, acor, zn, h, q,
+                      n, tau, saved_tq5, dsm, ewt, qwait,
+                      etamax, eta, qprime):
+
+    # See if we want to change order for the next step
+
+    # if etamax is 1, defer step size or order changes
+    from pymbolic.primitives import Max, Min
+    with cb.if_(etamax, '==', 1):
+        cb(qwait, Max((qwait, 2)))
+        cb(qprime, q)
+        cb(eta, 1)
+        cb(etamax, 10.0)
+        cb(acor, acor * (1 / tq[2]))
+
+    with cb.else_():
+        # etaq is the ratio of new to old h at the current order
+        etaq = var(name_gen("etaq"))
+        cb(etaq, 1.0 / ((6.0 * dsm) ** (1.0 / (q + 1)) + 0.000001))
+
+        hmax_inv = var(name_gen("hmax_inv"))
+        cb(hmax_inv, 0) # default in CVODE
+
+        # Consider an order change
+        with cb.if_(qwait, '==', 0):
+            cb(qwait, 2)
+            qprime, eta, zn = emit_order_changer(cb, name_gen, h, q, zn, acor,
+                                        ewt, n, saved_tq5, tq,
+                                        tau, dsm, etaq, etamax, eta, qprime)
+        with cb.else_():
+            cb(qprime, q)
+            cb(eta, etaq)
+            # Check threshold
+            with cb.if_(eta, '<', 1.5):
+                cb(eta, 1)
+            with cb.else_():
+                cb(eta, Min((eta, etamax)))
+                #cb(eta, eta / Max(1.0, abs(h)*hmax_inv*eta))
+                cb(eta, eta / Max((1.0, ((h**2)**0.5)*hmax_inv*eta)))
+
+        cb(etamax, 10.0)
+        cb(acor, acor * (1 / tq[2]))
+
+    return eta, qprime, acor, qwait, zn, etamax
+
+
+def emit_bdf_setup(cb, name_gen, q, h, qwait, tau, lvec, tq):
+
+    # This function is responsible for setting the coefficients
+    # in the BDF method.
+
+    alpha_0 = var(name_gen("alpha_0"))
+    alpha_0hat = var(name_gen("alpha_0hat"))
+    xi_inv = var(name_gen("xi_inv"))
+    xistar_inv = var(name_gen("xistar_inv"))
+    hsum = var(name_gen("hsum"))
+    cb(lvec[0], 1)
+    cb(lvec[1], 1)
+    cb(alpha_0, -1)
+    cb(alpha_0hat, -1)
+    cb(xi_inv, 1)
+    cb(xistar_inv, 1)
+    cb(hsum, h)
+    with cb.if_(q, '>', 1):
+        for j in range(2, q):
+            cb(hsum, hsum + tau[j-1])
+            cb(xi_inv, h / hsum)
+            cb(alpha_0, alpha_0 - 1 / j)
+            for i in range(j, 0, -1):
+                cb(lvec[i], lvec[i] + lvec[i-1] * xi_inv)
+        # j = q
+        cb(alpha_0, alpha_0 - 1 / q)
+        cb(xistar_inv, -lvec[1] - alpha_0)
+        cb(hsum, hsum + tau[q-1])
+        cb(xi_inv,  h / hsum)
+        cb(alpha_0hat, -lvec[1] - xi_inv)
+        for i in range(q, 0, -1):
+            cb(lvec[i], lvec[i] + lvec[i-1]*xistar_inv)
+
+    A1 = var(name_gen("A1"))
+    A2 = var(name_gen("A2"))
+    A3 = var(name_gen("A3"))
+    A4 = var(name_gen("A4"))
+    A5 = var(name_gen("A5"))
+    A6 = var(name_gen("A6"))
+    C = var(name_gen("C"))
+    CPrime = var(name_gen("CPrime"))
+    CPrimePrime = var(name_gen("CPrimePrime"))
+    cb(A1, 1 - alpha_0hat + alpha_0)
+    cb(A2, 1 + q * A1)
+    # TEST QUANTITY ARRAY
+    #cb(tq[2], abs(alpha_0 * (A2 / A1)))  # epsilon, the upper bound on local error
+    cb(tq[2], ((alpha_0 * (A2 / A1))**2)**0.5)  # epsilon, upper bound on local error
+    #cb(tq[5], abs((A2) / (l[q] * xi_inv/xistar_inv)))
+    cb(tq[5], (((A2) / (lvec[q] * xi_inv/xistar_inv))**2)**0.5)
+    with cb.if_(qwait, '==', 1):
+        cb(C, xistar_inv / lvec[q])
+        cb(A3, alpha_0 + 1.0 / q)
+        cb(A4, alpha_0hat + xi_inv)
+        cb(CPrime, A3 / (1.0 - A4 + A3))
+        #cb(tq[1], abs(CPrime / C))
+        cb(tq[1], ((CPrime / C)**2)**0.5)
+        cb(hsum, hsum + tau[q])
+        cb(xi_inv, h / hsum)
+        cb(A5, alpha_0 - (1.0 / (q+1)))
+        cb(A6, alpha_0hat - xi_inv)
+        cb(CPrimePrime, A2 / (1.0 - A6 + A5))
+        #cb(tq[3], abs(CPrimePrime * xi_inv * (q+2) * A5))
+        cb(tq[3], ((CPrimePrime * xi_inv * (q+2) * A5)**2)**0.5)
+
+    cb(tq[4], 0.1 * tq[2])  # for convergence check
+
+    return lvec, tq
+
+
+def emit_predict(cb, name_gen, zn, q):
+
+    # This function is responsible for the initial prediction of
+    # yn (before the correction loop is hit). Its use entirely involves
+    # the Nordsieck history array zn.
+
+    for i in range(1, q+1):
+        for j in range(q, i-1, -1):
+            cb(zn[j-1], zn[j] + zn[j-1])
+
+    return zn
+
+
+def emit_flag_handler(cb, name_gen, nflag, zn, q, eta,
+                      h, setup_flag, hscale, etamax, jac_counter):
+
+    # Handles the output flag of the Newton solver and decides
+    # how to proceed.
+
+    # If Newton failed to converge, we need to reset the step size,
+    # restore the Nordsieck array to its previous value, and try again
+    kflag = var(name_gen("kflag"))
+    with cb.if_(nflag, '==', False):
+        zn = emit_restore(cb, name_gen, zn, q)
+        cb(eta, 0.25)
+        zn, hscale = emit_rescale(cb, name_gen, zn, q, eta, h, hscale)
+        cb(h, eta*h)
+        cb(kflag, False)
+        cb(setup_flag, True)
+        # this will trigger recalculation of the Jacobian
+        cb(jac_counter, 51)
+        cb(etamax, 1)
+    with cb.else_():
+        cb(kflag, True)
+
+    return zn, eta, h, kflag, setup_flag, hscale, etamax, jac_counter
+
+
+def emit_error_check(cb, name_gen, h, q, tq, n, ewt, acor, etamax, err, hprime):
+
+    # Provided we've found that Newton has converged properly,
+    # we need to check the local error, and if it's too high,
+    # we need to decrease the timestep and try again.
+
+    delta_n = var(name_gen("delta_n"))
+    delta_n = emit_wrms(cb, name_gen, acor, ewt, n)
+    cb(err, delta_n / tq[2])
+
+    with cb.if_(err, '>=', 1):
+
+        cb(etamax, 1)
+        # Reset the step size based on local error behavior.
+        hprime = emit_step_changer(cb, name_gen, h, q, err)
+
+    return hprime, err, etamax
+
+
+def emit_interp(cb, name_gen, zn, t_current, t_targ, h, q, n, ym):
+
+    # This function is responsible for interpolating
+    # when we overshoot the desired solution point.
+
+    s = var(name_gen("s"))
+    c = var(name_gen("c"))
+
+    cb(s, (t_targ - t_current) / h)
+    for i in range(q, -1, -1):
+        cb(c, 1)
+        with cb.if_(i, '==', q):
+            cb(ym, zn[q])
+        with cb.else_():
+            cb(ym, s * ym + zn[i])
+
+    return ym
+
+
+def emit_restore(cb, name_gen, zn, q):
+
+    # If a check fails, restore the Nordsieck array
+    for i in range(1, q+1):
+        for j in range(q, i-1, -1):
+            cb(zn[j-1], zn[j-1] - zn[j])
+
+    return zn
+
+
+def emit_rescale(cb, name_gen, zn, q, eta, h, hscale):
+
+    # Rescale the Nordsieck history array in the case of a
+    # step size change.
+
+    factor = var(name_gen("factor"))
+    cb(factor, eta)
+    for i in range(1, q+1):
+        cb(zn[i], factor * zn[i])
+        cb(factor, factor * eta)
+
+    cb(hscale, h * eta)
+
+    return zn, hscale
+
+
+def emit_convergence_check(cb, name_gen, delm, delp, tq, R, it, acor, ewt, n):
+
+    # Check the convergence rate - this is our stopping criteria for the
+    # Newton solver
+
+    from pymbolic.primitives import Max
+    with cb.if_(it, '>', 1):
+        cb(R, Max((0.3*R, delm/delp)))
+
+    # If it's the first iteration, r = 1.
+    check = var(name_gen("check"))
+    cb(check, (R*delm) / tq[4])
+
+    converged = var(name_gen("converged"))
+    with cb.if_(check, '<', 1):
+        cb(converged, True)
+    with cb.else_():
+        cb(converged, False)
+
+    acnrm = var(name_gen("acnrm"))
+    with cb.if_(it, '==', 1):
+        cb(acnrm, delm)
+    with cb.else_():
+        wnorm = var(name_gen("wnorm"))
+        wnorm = emit_wrms(cb, name_gen, acor, ewt, n)
+        cb(acnrm, wnorm)
+
+    return converged, R, acnrm
+
+
+def emit_order_changer(cb, name_gen, h, q, zn, acor, ewt, n,
+                       saved_tq5, tq, tau, dsm, etaq, etamax, eta, qprime):
+
+    # This function resets the step size when we want to change the order
+    # to order + 1 or order - 1
+
+    eta_minus = var(name_gen("eta_minus"))
+    eta_plus = var(name_gen("eta_plus"))
+    cb(eta_minus, 0)
+    cb(eta_plus, 0)
+    with cb.if_(q, '>', 1):
+        eta_minus = emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n)
+    with cb.if_(q, '<', 5):
+        eta_plus = emit_eta_plus(cb, name_gen, zn, acor,
+                                 tau, h, ewt, tq, saved_tq5, q, n)
+
+    # Pick one of these candidates
+    etam = var(name_gen("etam"))
+    from pymbolic.primitives import Max, Min
+    cb(etam, Max((eta_minus, Max((etaq, eta_plus)))))
+
+    with cb.if_(etam, '==', etaq):
+        cb(eta, etaq)
+        cb(qprime, q)
+    with cb.if_(etam, '==', eta_minus):
+        cb(eta, eta_minus)
+        cb(qprime, q - 1)
+    with cb.if_(etam, '==', eta_plus):
+        cb(eta, eta_plus)
+        cb(qprime, q + 1)
+        # CHECK THRESHOLD
+        with cb.if_(etam, '>', 1.5):
+            cb(zn[5], acor)
+
+    hmax_inv = var(name_gen("hmax_inv"))
+    cb(hmax_inv, 0)  # default in CVODE
+
+    # CHECK THRESHOLD
+    with cb.if_(etam, '<', 1.5):
+        cb(eta, 1)
+        cb(qprime, q)
+    with cb.else_():
+        cb(eta, Min((eta, etamax)))
+        #cb(eta, eta / Max(1.0, abs(h)*hmax_inv*eta))
+        cb(eta, eta / Max((1.0, ((h)**2)**0.5*hmax_inv*eta)))
+
+    return qprime, eta, zn
+
+
+def emit_step_changer(cb, name_gen, h, q, err):
+
+    hprime = var(name_gen("hprime"))
+    # This function resets the step size based on local error behavior.
+    cb(hprime, h * (1.0 / ((6*err) ** (1/(q + 1)) + 0.000001)))
+
+    return hprime
+
+
+def emit_order_increase(cb, name_gen, zn, tau, q, hscale, lvec):
+
+    xi = var(name_gen("xi"))
+    alpha1 = var(name_gen("alpha1"))
+    prod = var(name_gen("prod"))
+    xiold = var(name_gen("xiold"))
+    alpha0 = var(name_gen("alpha0"))
+    hsum = var(name_gen("hsum"))
+    cb(lvec[2], 1)
+    cb(alpha1, 1)
+    cb(prod, 1)
+    cb(xiold, 1)
+    cb(alpha0, -1)
+    cb(hsum, hscale)
+    with cb.if_(q, '>', 1):
+        for j in range(1, q):
+            cb(hsum, hsum + tau[j+1])
+            cb(xi, hsum / hscale)
+            cb(prod, prod * xi)
+            cb(alpha0, alpha0 - 1 / (j + 1))
+            cb(alpha1, alpha1 + 1 / xi)
+            for i in range(j+2, 1, -1):
+                cb(lvec[i], lvec[i] * xiold + lvec[i-1])
+            cb(xiold, xi)
+
+    A1 = var(name_gen("A1"))
+    cb(A1, (-alpha0 - alpha1) / prod)
+    cb(zn[q+1], A1 * zn[5])
+    for j in range(2, q+1):
+        cb(zn[j], lvec[j]*zn[q+1] + zn[j])
+
+    return zn, lvec
+
+
+def emit_order_decrease(cb, name_gen, zn, tau, q, hscale, lvec):
+
+    xi = var(name_gen("xi"))
+    cb(lvec[2], 1)
+    hsum = var(name_gen("hsum"))
+    for j in range(1, q-1):
+        cb(hsum, hsum + tau[j])
+        cb(xi, hsum / hscale)
+        for i in range(j+1, 1, -1):
+            cb(lvec[i], lvec[i] * xi + lvec[i-1])
+
+    for j in range(2, q):
+        cb(zn[j], -lvec[j] * zn[q] + zn[j])
+
+    return zn, lvec
+
+
+def emit_order_adjust(cb, name_gen, zn, tau, q, delq, hscale, lvec):
+
+    with cb.if_(delq, '==', 1):
+        zn, lvec = emit_order_increase(cb, name_gen, zn, tau, q, hscale, lvec)
+    with cb.else_():
+        zn, lvec = emit_order_decrease(cb, name_gen, zn, tau, q, hscale, lvec)
+
+    return zn, lvec
+
+
+def emit_eta_plus(cb, name_gen, zn, acor, tau, h, ewt, tq, saved_tq5, q, n):
+
+    cquot = var(name_gen("cquot"))
+    cb(cquot, (tq[5] / saved_tq5) * (h / tau[2])**(q+1))
+    array = var("<builtin>array")
+    tempv = var(name_gen("tempv"))
+    cb(tempv, array(n))
+    cb(tempv, -cquot * zn[5] + acor)
+    dup = var(name_gen("dup"))
+    wnorm = var(name_gen("wnorm"))
+    wnorm = emit_wrms(cb, name_gen, tempv, ewt, n)
+    cb(dup, wnorm / tq[3])
+    eta_plus = var(name_gen("eta_plus"))
+    cb(eta_plus, 1.0 / ((10 * dup) ** (1 / (q+2)) + 0.000001))
+
+    return eta_plus
+
+
+def emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n):
+
+    ddn = var(name_gen("ddn"))
+    wnorm = var(name_gen("wnorm"))
+
+    wnorm = emit_wrms(cb, name_gen, zn, ewt, n)
+
+    cb(ddn, wnorm / tq[1])
+    eta_minus = var(name_gen("eta_minus"))
+    cb(eta_minus, 1.0 / ((6 * ddn) ** (1 / q) + 0.000001))
+
+    return eta_minus
+
+
+def emit_err_weights(cb, name_gen, y, atol, rtol, ewt, n):
+
+    for i in range(n):
+        #cb(ewt[i], 1.0 / (rtol*abs(y[i]) + atol))
+        cb(ewt[i], 1.0 / (rtol*((y[i])**2)**0.5 + atol))
+
+    return ewt
+
+
+def emit_wrms(cb, name_gen, y, ewt, n):
+
+    wnorm = var(name_gen("wnorm"))
+    cb(wnorm, 0)
+    for i in range(n):
+        cb(wnorm, wnorm + (y[i]*ewt[i])**2)
+
+    cb(wnorm, wnorm * (1 / n))
+    cb(wnorm, wnorm ** (1/2))
+
+    return wnorm
+
+
+# }}}
+
+
+# {{{ cvode-like method
+
+class CVODEMethodBuilder(MethodBuilder):
+    """
+    User-supplied context:
+        <state> + component_id: The value that is integrated
+        <func> + component_id: The right hand side
+
+    .. automethod:: __init__
+    .. automethod:: generate
+    """
+
+    def __init__(self, component_id, multistep_method='bdf',
+                 nls='newton', atol=1e-12, rtol=1e-6):
+        """
+        :arg multistep_method: Accepts 'bdf' or 'am',
+            where BDF (for stiff problems) uses the FLC-form of BDF,
+            and AM uses Adams-Moulton.
+        :arg nls: accepts 'newton' or 'functional'
+        :arg atol: absolute tolerance
+        :arg rtol: relative tolerance
+        """
+
+        # check inputs
+        if multistep_method != 'bdf' and multistep_method != 'am':
+            raise ValueError("multistep_method must be set to either bdf or am")
+        if nls != 'newton' and nls != 'functional':
+            raise ValueError("nls must be set to either newton or functional")
+
+        super(CVODEMethodBuilder, self).__init__()
+        self.multistep_method = multistep_method
+        self.nls = nls
+        self.component_id = component_id
+        self.atol = atol
+        self.rtol = rtol
+
+        # set max order based on whether using bdf or am
+        if multistep_method == 'bdf':
+            self.max_order = 5
+        else:
+            self.max_order = 6
+
+        # Declare variables
+        self.step = var('<p>step')
+        self.function = var('<func>' + component_id)
+
+        self.state = var('<state>' + component_id)
+        self.t = var('<t>')
+        self.dt = var('<dt>')
+
+        # CVODE's persistent state - data that needs to exist between phases
+        self.t_current = var('t_current')
+        self.t_targ = var('t_targ')
+        self.setup_counter = var('setup_counter')
+        self.jac_counter = var('jac_counter')
+        self.setup_flag = var('setup_flag')
+        self.nflag = var('nflag')
+        self.gamma_p = var('gamma_p')
+        self.q = var('q')
+        self.qprime = var('qprime')
+        self.qwait = var('qwait')
+        self.R = var('R')
+        self.saved_tq5 = var('saved_tq5')
+        self.hscale = var('hscale')
+        self.etamax = var('etamax')
+        self.t_int = var('t_int')
+        self.h = var("h")
+        self.hprime = var("hprime")
+        self.y_int = var('y_int')
+        self.tau = var('tau')
+        self.jest = var('jest')
+        self.n = var('n')
+        self.zn = \
+            [var('<p>zn_' + str(i)) for i in range(0, self.max_order)]
+        self.tq = \
+            [var('tq_' + str(i)) for i in range(0, self.max_order)]
+        self.lvec = \
+            [var('l_' + str(i)) for i in range(0, self.max_order)]
+        self.sign = var('sign')
+        self.hg = var('hg')
+        self.hlb = var('hlb')
+        self.hub = var('hub')
+        self.hbias = var('hbias')
+        self.hnew = var('hnew')
+        self.count = var('count')
+        self.max_iters = var('max_iters')
+        self.delp = var('delp')
+        self.dsm = var('dsm')
+        self.it = var('it')
+        self.eta = var('eta')
+        self.newit = var('newit')
+        self.rl1 = var('rl1')
+        self.ym = var('ym')
+        self.acor = var('acor')
+        self.ewt = var('ewt')
+
+    def generate(self):
+        """
+        :returns: :class:`dagrt.language.DAGCode`
+        """
+        from pytools import UniqueNameGenerator
+        name_gen = UniqueNameGenerator()
+
+        from dagrt.language import DAGCode, CodeBuilder
+
+        array = var("<builtin>array")
+
+        # Phase: INITIALIZE
+        with CodeBuilder(name="initialize") as cb_init:
+            cb_init(self.step, 1)
+            # Initial set of a number of CVODE parameters
+            cb_init(self.setup_counter, 0)
+            cb_init(self.jac_counter, 0)
+            cb_init(self.setup_flag, True)
+            cb_init(self.gamma_p, self.dt)
+            cb_init(self.n, var('<builtin>len')(self.state))
+            array = var("<builtin>array")
+            for i in range(0, self.max_order):
+                cb_init(self.zn[i], array(self.n))
+                cb_init(self.tq[i], 0)
+                cb_init(self.lvec[i], 0)
+            cb_init(self.zn[0], self.state)
+            cb_init(self.zn[1], self.eval_rhs(self.t, self.state))
+            cb_init(self.jest, array(self.n*self.n))
+            cb_init(self.q, 1)
+            cb_init(self.qprime, 1)
+            cb_init(self.qwait, self.q + 1)
+            cb_init(self.tau, array(self.max_order+1))
+            cb_init(self.R, 1)
+            cb_init(self.saved_tq5, 1)
+            cb_init(self.etamax, 10000)
+            cb_init(self.hscale, 0)
+            cb_init(self.nflag, False)
+            cb_init(self.t_int, self.t)
+            cb_init(self.hprime, 0)
+            cb_init(self.y_int, array(self.n))
+            cb_init(self.y_int, self.state)
+            cb_init(self.acor, array(self.n))
+            cb_init(self.ym, array(self.n))
+            cb_init(self.ewt, array(self.n))
+            self.ewt = emit_err_weights(cb_init, name_gen,
+                                        self.state, self.atol,
+                                        self.rtol, self.ewt, self.n)
+
+            # This is the first call - we need a step
+            # First, check that ttarg and t_current are sufficiently separated.
+            tdiff = var(name_gen("tdiff"))
+            cb_init(tdiff, self.t_targ - self.t_current)
+            with cb_init.if_(tdiff, '==', 0):
+                # Step is zero - no good
+                cb_init.raise_(ValueError, "Initial timestep faulty")
+
+            tdist = var(name_gen("tdist"))
+            uround = var(name_gen("uround"))
+            tround = var(name_gen("tround"))
+            from pymbolic.primitives import Max
+            #cb(sign, np.sign(xdiff))
+            cb_init(self.sign, tdiff / ((tdiff**2)**0.5))
+            #cb(xdist, abs(xdiff))
+            cb_init(tdist, ((tdiff)**2)**0.5)
+            # FIXME: float floor
+            #cb_init(uround, np.finfo(float).eps)
+            cb_init(uround, 2e-33)
+            #cb_init(tround, uround * Max(abs(self.t_targ), abs(self.t_current)))
+            cb_init(tround, uround * Max((((self.t_targ)**2)**0.5,
+                    ((self.t_current)**2)**0.5)))
+            with cb_init.if_(tdist, '<', 2*tround):
+                # Step is extremely tiny - no good
+                cb_init.raise_(ValueError, "Initial timestep faulty")
+
+            # Find upper and lower bounds on h, and take the geometric mean
+            # if the bounds cross each other, this is our initial step
+
+            hlb_factor = var(name_gen("hlb_factor"))
+            hub_factor = var(name_gen("hub_factor"))
+            cb_init(hlb_factor, 100)  # default in CVODE
+            cb_init(hub_factor, 0.1)  # default in CVODE
+            cb_init(self.hlb, hlb_factor * tround)
+            # initial_step_upper_bound
+            #cb(hub, hub_factor * abs(zn[0]) + atol)
+            cb_init(self.hub, hub_factor * ((self.zn[0])**2)**0.5 + self.atol)
+            #cb(hub, abs(zn[1]) / hub)
+            cb_init(self.hub, ((self.zn[1])**2)**0.5 / self.hub)
+            hub_max = var(name_gen("hub_max"))
+            #cb(hub_max, np.linalg.norm(hub, np.inf))
+            cb_init(hub_max, var("<builtin>norm_inf")(self.hub))
+            cb_init(self.hub, hub_factor*tdist)
+            with cb_init.if_(self.hub*hub_max, '>', 1):
+                cb_init(self.hub, 1.0 / hub_max)
+
+            cb_init(self.hg, (self.hlb*self.hub) ** 0.5)
+
+            h0 = var(name_gen("h0"))
+            with cb_init.if_(self.hub, '<', self.hlb):
+                with cb_init.if_(self.sign, '==', -1):
+                    cb_init(self.hg, -self.hg)
+                cb_init(h0, self.hg)
+                cb_init.switch_phase("initialize_finish")
+            with cb_init.else_():
+                # Otherwise, use a second-derivative estimate to find it.
+                cb_init(self.count, 0)
+                cb_init(self.max_iters, 4)  # default in CVODE
+                cb_init(self.hbias, 0.5)  # default in CVODE
+                cb_init.switch_phase("initial_step")
+
+        # Phase: INITIALIZE_STEP
+        with CodeBuilder(name="initial_step") as cb_initial_step:
+
+            hgs = var(name_gen("hgs"))
+            cb_initial_step(hgs, self.hg*self.sign)
+            # Compute a second-derivative estimate using difference quotients
+            y = var(name_gen("y"))
+            ftemp = var(name_gen("ftemp"))
+            ydd = var(name_gen("ydd"))
+            yddnrm = var(name_gen("yddnrm"))
+            array = var("<builtin>array")
+            cb_initial_step(y, array(self.n))
+            cb_initial_step(y, hgs*self.zn[1] + self.zn[0])
+            cb_initial_step(ftemp, array(self.n))
+            cb_initial_step(ftemp, self.eval_rhs(self.t_current, y))
+            cb_initial_step(ydd, array(self.n))
+            cb_initial_step(ydd, (ftemp - self.zn[1]) / hgs)
+            yddnrm = emit_wrms(cb_initial_step, name_gen, ydd, self.ewt, self.n)
+            with cb_initial_step.if_(yddnrm*self.hub*self.hub, '>', 2):
+                cb_initial_step(self.hnew, (2.0 / yddnrm) ** 0.5)
+            with cb_initial_step.else_():
+                cb_initial_step(self.hnew, (self.hg*self.hub) ** 0.5)
+            cb_initial_step(self.count, self.count + 1)
+            with cb_initial_step.if_(self.count, '>=', self.max_iters):
+                cb_initial_step.switch_phase("initialize_finish")
+            with cb_initial_step.else_():
+                hrat = var(name_gen("hrat"))
+                cb_initial_step(hrat, self.hnew/self.hg)
+                from pymbolic.primitives import Comparison, LogicalAnd
+                with cb_initial_step.if_(LogicalAnd((Comparison(hrat, ">", 0.5),
+                                         Comparison(hrat, "<", 2)))):
+                    cb_initial_step.switch_phase("initialize_finish")
+                with cb_initial_step.else_():
+                    with cb_initial_step.if_(LogicalAnd(
+                                             (Comparison(self.count, ">=", 2),
+                                             Comparison(hrat, ">", 2)))):
+                        cb_initial_step(self.hnew, self.hg)
+                        cb_initial_step.switch_phase("initialize_finish")
+                    with cb_initial_step.else_():
+                        cb_initial_step(self.hg, self.hnew)
+                        cb_initial_step.switch_phase("initial_step")
+
+        # Phase: INITIALIZE_FINISH
+        with CodeBuilder(name="initialize_finish") as cb_init_finish:
+
+            # Apply bias, check bounds and sign
+            cb_init_finish(h0, self.hbias*self.hnew)
+            with cb_init_finish.if_(h0, '<', self.hlb):
+                cb_init_finish(h0, self.hlb)
+            with cb_init_finish.if_(h0, '>', self.hub):
+                cb_init_finish(h0, self.hub)
+            with cb_init_finish.if_(self.sign, '==', -1):
+                cb_init_finish(h0, -h0)
+            cb_init_finish(self.h, h0)
+            cb_init_finish(self.zn[1], self.zn[1]*self.h)
+            cb_init_finish(self.gamma_p, self.h)
+            cb_init_finish(self.hprime, self.h)
+            cb_init_finish(self.hscale, self.h)
+
+        # Phase: STEP_SETUP
+        with CodeBuilder(name="step_setup") as cb_step_setup:
+            # We'll put the main CVODE runner here for now.
+            cb_step_setup(self.t_targ, self.t + self.dt)
+            cb_step_setup(self.t_current, self.t_int)
+
+            cb_step_setup(self.eta, self.hprime / self.h)
+
+            cb_step_setup(self.ym, self.state)
+
+        # Phase: STEP
+        with CodeBuilder(name="step") as cb_step:
+
+            # Reset error weights
+            self.ewt = emit_err_weights(cb_step, name_gen, self.ym,
+                                        self.atol, self.rtol, self.ewt, self.n)
+
+            # Check for order change
+            with cb_step.if_(self.qprime, '!=', self.q):
+                cb_step(self.qwait, self.qprime + 1)
+                delq = var(name_gen('delq'))
+                cb_step(delq, self.qprime - self.q)
+                self.zn, self.lvec = emit_order_adjust(cb_step, name_gen, self.zn,
+                                                    self.tau, self.q, delq,
+                                                    self.hscale, self.lvec)
+                cb_step(self.q, self.qprime)
+
+            # Rescale of Nordsieck happens here in the case of a step
+            # change.
+            with cb_step.if_(self.hprime, '!=', self.h):
+                self.zn, self.hscale = emit_rescale(cb_step, name_gen, self.zn,
+                                                    self.q, self.eta, self.h,
+                                                    self.hscale)
+
+            # For now, only allow 7 "retries" at order 1. This
+            # is the end-all case in CVODE.
+            cb_step(self.it, 1)
+
+        # Phase: INNER_STEP
+        with CodeBuilder(name="inner_step") as cb_is:
+
+            # This code is supposed to mirror CVStep in
+            # the original CVODE code - it does the prediction,
+            # sets the BDF parameters, and calls the newton solver
+
+            cb_is(self.h, self.hprime)
+
+            # Get predicted yn_0 (using history)
+            self.zn = emit_predict(cb_is, name_gen, self.zn, self.q)
+
+            # Call BDF setup to update histories, get coefficients, etc.
+            self.lvec, self.tq = emit_bdf_setup(cb_is, name_gen, self.q,
+                                             self.h, self.qwait, self.tau,
+                                             self.lvec, self.tq)
+
+            # Call the Newton solver for the "correction"
+            # to our "prediction"
+
+            linear_solve = var("<builtin>linear_solve")
+            array = var("<builtin>array")
+            # Only supports Newton for now.
+            # A simple Newton solver, which for now uses numpy (LU) for the
+            # linear solve.
+            cb_is(self.rl1, 1 / self.lvec[1])
+            cb_is(self.acor, array(self.n))
+            nflag_prev = var(name_gen("nflag_prev"))
+            cb_is(nflag_prev, self.nflag)
+            cb_is(self.nflag, True)
+
+            f = var(name_gen("f"))
+            cb_is(f, array(self.n))
+            cb_is(f, self.eval_rhs(self.t_current, self.zn[0]))
+
+            # Allow at most three iterations
+            cb_is(self.newit, 1)
+            gamma = var(name_gen("gamma"))
+            cb_is(gamma, self.rl1 * self.h)
+            gamrat = var(name_gen("gamrat"))
+            cb_is(gamrat, gamma / self.gamma_p)
+            crit = var(name_gen("crit"))
+            #cb(crit, abs(gamrat - 1))
+            cb_is(crit, ((gamrat - 1)**2)**0.5)
+            with cb_is.if_(crit, '>', 0.3):
+                cb_is(self.setup_flag, True)
+
+            with cb_is.if_(self.setup_counter, '>=', 20):
+                cb_is(self.setup_flag, True)
+
+            # Does the Jacobian estimate given to the Newton
+            # solver need to be updated?
+            with cb_is.if_(self.setup_flag, '==', True):
+                # We don't always reform J in this case
+                jac_flag = var(name_gen("jac_flag"))
+                # Compound conditional
+                from pymbolic.primitives import Comparison, LogicalAnd, LogicalOr
+                with cb_is.if_(LogicalAnd((Comparison(crit, "<", 0.2),
+                               Comparison(nflag_prev, "==", False)))):
+                    cb_is(jac_flag, True)
+                with cb_is.else_():
+                    cb_is(jac_flag, False)
+                with cb_is.if_(LogicalOr((Comparison(jac_flag, "==", True),
+                               Comparison(self.jac_counter, ">", 50)))):
+                    self.jest = self.emit_jac_estimate(cb_is, name_gen, self.zn[0],
+                                                  self.n, f, self.ewt, self.h,
+                                                  self.t_current, self.jest)
+                    cb_is(self.jac_counter, 0)
+                cb_is(self.setup_flag, False)
+                # Reset gamma_p and step counter
+                cb_is(self.setup_counter, 0)
+                cb_is(self.gamma_p, gamma)
+                cb_is(gamrat, gamma / self.gamma_p)
+                cb_is(self.R, 1)
+
+            M = var(name_gen("M"))
+            cb_is(M, array(self.n*self.n))
+
+            # Need to create and fill an identity at this point
+            eye = var(name_gen("i"))
+            cb_is(eye, array(self.n*self.n))
+            for i in range(0, self.n):
+                cb_is(eye[i*(self.n-1)], 1.0)
+            cb_is(M, eye - gamma*self.jest)
+
+            converged = var(name_gen("converged"))
+            cb_is(converged, False)
+            cb_is(self.delp, 0)
+            cb_is.switch_phase("newton_step")
+
+        # PHASE: NEWTON_STEP
+        with CodeBuilder(name="newton_step") as cb_ns:
+
+            # The actual Newton loop
+            tempv = var(name_gen("tempv"))
+            corr = var(name_gen("tempv"))
+            cb_ns(tempv, array(self.n))
+            cb_ns(corr, array(self.n))
+            cb_ns(tempv, self.rl1*self.zn[1] + self.acor)
+            cb_ns(tempv, gamma*f - tempv)
+            linear_solve = var("<builtin>linear_solve")
+            transpose = var("<builtin>transpose")
+            M_t = var(name_gen("M_t"))
+            cb_ns(M_t, array(self.n*self.n))
+            cb_ns(M_t, transpose(M, self.n))
+            cb_ns(corr, linear_solve(M_t, tempv, self.n, 1))
+            # Scale the correction to account for a change in gamma
+            with cb_ns.if_(gamrat, '!=', 1):
+                cb_ns(corr, (2 / (1 + gamrat)) * corr)
+
+            delm = var(name_gen("delm"))
+            delm = emit_wrms(cb_ns, name_gen, corr, self.ewt, self.n)
+            cb_ns(self.acor, self.acor + corr)
+            cb_ns(self.ym, self.zn[0] + self.acor)
+
+            # Call the convergence check to see if we can exit the loop.
+            converged, self.R, acnrm = emit_convergence_check(cb_ns, name_gen, delm,
+                                                              self.delp, self.tq,
+                                                              self.R, self.newit,
+                                                              self.acor, self.ewt,
+                                                              self.n)
+            with cb_ns.if_(converged, '==', True):
+                cb_ns.switch_phase("inner_step_check")
+
+            with cb_ns.else_():
+                cb_ns(self.delp, delm)
+                cb_ns(f, self.eval_rhs(self.t_current, self.ym))
+                cb_ns(self.newit, self.newit + 1)
+                with cb_ns.if_(self.newit, '>', 3):
+                    # If we haven't converged by this point, we have to reduce the
+                    # step size and try again
+                    cb_ns(self.nflag, False)
+                    cb_ns.switch_phase("inner_step_check")
+
+        # Phase: INNER_STEP_CHECK
+        with CodeBuilder(name="inner_step_check") as cb_isc:
+
+            [self.zn, self.eta, self.h,
+                kflag, self.setup_flag,
+                self.hscale, self.etamax,
+                self.jac_counter] = emit_flag_handler(cb_isc, name_gen, self.nflag,
+                                                      self.zn, self.q, self.eta,
+                                                      self.h, self.setup_flag,
+                                                      self.hscale, self.etamax,
+                                                      self.jac_counter)
+
+            # Check the error - if it's too high we need a new step
+            with cb_isc.if_(kflag, '==', True):
+                [self.hprime, self.dsm,
+                    self.etamax] = emit_error_check(cb_isc, name_gen,
+                                                    self.h, self.q,
+                                                    self.tq, self.n,
+                                                    self.ewt, self.acor,
+                                                    self.etamax, self.dsm,
+                                                    self.hprime)
+                with cb_isc.if_(self.hprime, '==', self.h):
+                    # The step wasn't changed, because the error was fine
+                    cb_isc.switch_phase("step_check")
+                with cb_isc.else_():
+                    # Else, the error check failed: increment the iteration counter
+                    cb_isc(self.it, self.it + 1)
+                    # Reset eta with new step
+                    cb_isc(self.eta, self.hprime / self.h)
+                    # Also need to restore the Nordsieck array to previous setting
+                    self.zn = emit_restore(cb_isc, name_gen, self.zn, self.q)
+                    self.zn, self.hscale = emit_rescale(cb_isc, name_gen, self.zn,
+                                                        self.q, self.eta, self.h,
+                                                        self.hscale)
+                    # Set setup flag to True
+                    cb_isc(self.setup_flag, True)
+                    # Check phase switch condition
+                    with cb_isc.if_(self.it, '>', 7):
+                        cb_isc.switch_phase("step_check")
+
+            with cb_isc.else_():
+                # The Newton solve failed to converge - try again without
+                # checking the error on an unconverged result
+                cb_isc(self.it, self.it + 1)
+                cb_isc(self.setup_flag, True)
+                cb_isc(self.hprime, self.h)
+
+        # PHASE: StepCheck
+        with CodeBuilder(name="step_check") as cb_sc:
+            with cb_sc.if_(self.it, '==', 8):
+                # This step failed - panic
+                cb_sc.raise_(ValueError, "Eight iterations")
+
+            [self.zn, self.saved_tq5,
+                self.qwait, self.tau] = emit_complete_step(cb_sc, name_gen, self.zn,
+                                                          self.l, self.acor, self.q,
+                                                          self.qwait, self.tau,
+                                                          self.h, self.tq,
+                                                          self.t_current,
+                                                          self.saved_tq5)
+            [eta, self.qprime, acor,
+                self.qwait, self.zn,
+                self.etamax] = emit_prepare_step(cb_sc, name_gen, self.tq, self.acor,
+                                                 self.zn, self.h, self.q, self.n,
+                                                 self.tau, self.saved_tq5, self.dsm,
+                                                 self.ewt, self.qwait, self.etamax,
+                                                 self.eta, self.qprime)
+            cb_sc(self.hprime, self.eta * self.h)
+            cb_sc(self.t_current, self.t_current + self.h)
+            # Only increment Jacobian and setup counters when CVODE is
+            # actually doing work
+            cb_sc(self.setup_counter, self.setup_counter + 1)
+            cb_sc(self.jac_counter, self.jac_counter + 1)
+
+            # SWITCH PHASE: CHECK
+            with cb_sc.if_(self.t_current, '>=', self.t_targ):
+                cb_sc.switch_phase("finish")
+
+        # PHASE: Finish
+        with CodeBuilder(name="finish") as cb_finish:
+            # If we overshot xtarg (we probably did), we
+            # need to interpolate.
+            with cb_finish.if_(self.t_current, '!=', self.t_targ):
+                cb_finish(self.y_int, self.ym)
+                cb_finish(self.t_int, self.t_current)
+                self.ym = emit_interp(cb_finish, name_gen, self.zn,
+                                      self.t_current, self.t_targ,
+                                      self.h, self.q, self.n, self.ym)
+
+            cb_finish(self.t, self.t + self.dt)
+            cb_finish(self.state, self.ym)
+            cb_finish.yield_state(expression=self.state,
+                                     component_id=self.component_id,
+                                     time_id='', time=self.t)
+
+        # Given the presence of while loops and adaptivity, the phase diagram
+        # here is significantly more complex.
+        return DAGCode(
+            phases={
+                "initialize": cb_init.as_execution_phase(next_phase="initial_step"),
+                "initial_step": cb_initial_step.as_execution_phase(
+                    next_phase="initial_step"),
+                "initialize_finish": cb_init_finish.as_execution_phase(
+                    next_phase="step_setup"),
+                "step_setup": cb_step_setup.as_execution_phase(
+                    next_phase="step_setup"),
+                "step": cb_step.as_execution_phase(next_phase="inner_step"),
+                "inner_step": cb_is.as_execution_phase(next_phase="newton_step"),
+                "newton_step": cb_ns.as_execution_phase(next_phase="newton_step"),
+                "inner_step_check": cb_isc.as_execution_phase(
+                    next_phase="inner_step"),
+                "step_check": cb_sc.as_execution_phase(next_phase="step"),
+                "finish": cb_finish.as_execution_phase(next_phase="step_setup")
+                },
+            initial_phase="initialize")
+
+    def eval_rhs(self, t, y):
+        """Return a node that evaluates the RHS at the given time and
+        component value."""
+        from pymbolic.primitives import CallWithKwargs
+        return CallWithKwargs(function=self.function,
+                              parameters=(),
+                              kw_parameters={"t": t, self.component_id: y})
+
+    def emit_jac_estimate(self, cb, name_gen, y, n, rhs_data, ewt, h, t, jest):
+
+        # Estimates the Jacobian using a simple difference quotient.
+
+        uround = var(name_gen("uround"))
+        # FIXME: this is not going to work - need to add this builtin
+        # cb(uround, np.finfo(float).eps)
+        cb(uround, 2e-33)
+
+        # Minimum increment is based on uround and norm of f
+        rhs_norm = var(name_gen("rhs_norm"))
+        rhs_norm = emit_wrms(cb, name_gen, rhs_data, ewt, n)
+        sigma_o = var(name_gen("sigma_o"))
+        sigma = var(name_gen("sigma"))
+        #cb(sigma_o, 1000*abs(h)*uround*n*rhs_norm)
+        cb(sigma_o, 1000*((h)**2)**0.5*uround*n*rhs_norm)
+
+        array = var("<builtin>array")
+        cb(jest, array(n*n))
+        ysave = var(name_gen("ysave"))
+        rhs_temp = var(name_gen("rhs_temp"))
+        cb(rhs_temp, array(n))
+        for j in range(0, n):
+
+            cb(ysave, y[j])
+            # Determine the increment sigma
+            from pymbolic.primitives import Max
+            #cb(sigma, Max(((uround)**0.5)*abs(y[j]), sigma_o/w[j]))
+            cb(sigma, Max((((uround)**0.5)*((y[j])**2)**0.5, sigma_o/ewt[j])))
+            cb(y[j], y[j] + sigma)
+            cb(rhs_temp, self.eval_rhs(t, y))
+            cb(jest[j*(n-1):(j+1)*(n-1)], (rhs_temp - rhs_data) / sigma)
+            cb(y[j], ysave)
+
+        return jest
+# }}}
+
+# vim: fdm=marker
-- 
GitLab


From 91cd72054f0ea3bbac8571c07969d4126d055686 Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 21 Feb 2020 17:35:11 -0600
Subject: [PATCH 2/6] Fix incorrect variable name l -> lvec

---
 leap/cvode/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/leap/cvode/__init__.py b/leap/cvode/__init__.py
index 4c37633..a5c7caa 100644
--- a/leap/cvode/__init__.py
+++ b/leap/cvode/__init__.py
@@ -981,7 +981,7 @@ class CVODEMethodBuilder(MethodBuilder):
 
             [self.zn, self.saved_tq5,
                 self.qwait, self.tau] = emit_complete_step(cb_sc, name_gen, self.zn,
-                                                          self.l, self.acor, self.q,
+                                                          self.lvec, self.acor, self.q,
                                                           self.qwait, self.tau,
                                                           self.h, self.tq,
                                                           self.t_current,
-- 
GitLab


From 449bcaaa4479a92476e810d5f3e5183a2a839e3c Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 21 Feb 2020 17:59:45 -0600
Subject: [PATCH 3/6] Placate Flake8 (de-capitalize some variables)

---
 leap/cvode/__init__.py | 96 +++++++++++++++++++++---------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/leap/cvode/__init__.py b/leap/cvode/__init__.py
index a5c7caa..566bc4a 100644
--- a/leap/cvode/__init__.py
+++ b/leap/cvode/__init__.py
@@ -146,36 +146,36 @@ def emit_bdf_setup(cb, name_gen, q, h, qwait, tau, lvec, tq):
         for i in range(q, 0, -1):
             cb(lvec[i], lvec[i] + lvec[i-1]*xistar_inv)
 
-    A1 = var(name_gen("A1"))
-    A2 = var(name_gen("A2"))
-    A3 = var(name_gen("A3"))
-    A4 = var(name_gen("A4"))
-    A5 = var(name_gen("A5"))
-    A6 = var(name_gen("A6"))
-    C = var(name_gen("C"))
-    CPrime = var(name_gen("CPrime"))
-    CPrimePrime = var(name_gen("CPrimePrime"))
-    cb(A1, 1 - alpha_0hat + alpha_0)
-    cb(A2, 1 + q * A1)
+    a1 = var(name_gen("a1"))
+    a2 = var(name_gen("a2"))
+    a3 = var(name_gen("a3"))
+    a4 = var(name_gen("a4"))
+    a5 = var(name_gen("a5"))
+    a6 = var(name_gen("a6"))
+    c = var(name_gen("c"))
+    cprime = var(name_gen("cprime"))
+    cprimeprime = var(name_gen("cprimeprime"))
+    cb(a1, 1 - alpha_0hat + alpha_0)
+    cb(a2, 1 + q * a1)
     # TEST QUANTITY ARRAY
-    #cb(tq[2], abs(alpha_0 * (A2 / A1)))  # epsilon, the upper bound on local error
-    cb(tq[2], ((alpha_0 * (A2 / A1))**2)**0.5)  # epsilon, upper bound on local error
-    #cb(tq[5], abs((A2) / (l[q] * xi_inv/xistar_inv)))
-    cb(tq[5], (((A2) / (lvec[q] * xi_inv/xistar_inv))**2)**0.5)
+    #cb(tq[2], abs(alpha_0 * (a2 / a1)))  # epsilon, the upper bound on local error
+    cb(tq[2], ((alpha_0 * (a2 / a1))**2)**0.5)  # epsilon, upper bound on local error
+    #cb(tq[5], abs((a2) / (l[q] * xi_inv/xistar_inv)))
+    cb(tq[5], (((a2) / (lvec[q] * xi_inv/xistar_inv))**2)**0.5)
     with cb.if_(qwait, '==', 1):
-        cb(C, xistar_inv / lvec[q])
-        cb(A3, alpha_0 + 1.0 / q)
-        cb(A4, alpha_0hat + xi_inv)
-        cb(CPrime, A3 / (1.0 - A4 + A3))
-        #cb(tq[1], abs(CPrime / C))
-        cb(tq[1], ((CPrime / C)**2)**0.5)
+        cb(c, xistar_inv / lvec[q])
+        cb(a3, alpha_0 + 1.0 / q)
+        cb(a4, alpha_0hat + xi_inv)
+        cb(cprime, a3 / (1.0 - a4 + a3))
+        #cb(tq[1], abs(cprime / C))
+        cb(tq[1], ((cprime / c)**2)**0.5)
         cb(hsum, hsum + tau[q])
         cb(xi_inv, h / hsum)
-        cb(A5, alpha_0 - (1.0 / (q+1)))
-        cb(A6, alpha_0hat - xi_inv)
-        cb(CPrimePrime, A2 / (1.0 - A6 + A5))
-        #cb(tq[3], abs(CPrimePrime * xi_inv * (q+2) * A5))
-        cb(tq[3], ((CPrimePrime * xi_inv * (q+2) * A5)**2)**0.5)
+        cb(a5, alpha_0 - (1.0 / (q+1)))
+        cb(a6, alpha_0hat - xi_inv)
+        cb(cprimeprime, a2 / (1.0 - a6 + a5))
+        #cb(tq[3], abs(cprimeprime * xi_inv * (q+2) * a5))
+        cb(tq[3], ((cprimeprime * xi_inv * (q+2) * a5)**2)**0.5)
 
     cb(tq[4], 0.1 * tq[2])  # for convergence check
 
@@ -284,18 +284,18 @@ def emit_rescale(cb, name_gen, zn, q, eta, h, hscale):
     return zn, hscale
 
 
-def emit_convergence_check(cb, name_gen, delm, delp, tq, R, it, acor, ewt, n):
+def emit_convergence_check(cb, name_gen, delm, delp, tq, r, it, acor, ewt, n):
 
     # Check the convergence rate - this is our stopping criteria for the
     # Newton solver
 
     from pymbolic.primitives import Max
     with cb.if_(it, '>', 1):
-        cb(R, Max((0.3*R, delm/delp)))
+        cb(r, Max((0.3*r, delm/delp)))
 
     # If it's the first iteration, r = 1.
     check = var(name_gen("check"))
-    cb(check, (R*delm) / tq[4])
+    cb(check, (r*delm) / tq[4])
 
     converged = var(name_gen("converged"))
     with cb.if_(check, '<', 1):
@@ -311,7 +311,7 @@ def emit_convergence_check(cb, name_gen, delm, delp, tq, R, it, acor, ewt, n):
         wnorm = emit_wrms(cb, name_gen, acor, ewt, n)
         cb(acnrm, wnorm)
 
-    return converged, R, acnrm
+    return converged, r, acnrm
 
 
 def emit_order_changer(cb, name_gen, h, q, zn, acor, ewt, n,
@@ -397,9 +397,9 @@ def emit_order_increase(cb, name_gen, zn, tau, q, hscale, lvec):
                 cb(lvec[i], lvec[i] * xiold + lvec[i-1])
             cb(xiold, xi)
 
-    A1 = var(name_gen("A1"))
-    cb(A1, (-alpha0 - alpha1) / prod)
-    cb(zn[q+1], A1 * zn[5])
+    a1 = var(name_gen("a1"))
+    cb(a1, (-alpha0 - alpha1) / prod)
+    cb(zn[q+1], a1 * zn[5])
     for j in range(2, q+1):
         cb(zn[j], lvec[j]*zn[q+1] + zn[j])
 
@@ -551,7 +551,7 @@ class CVODEMethodBuilder(MethodBuilder):
         self.q = var('q')
         self.qprime = var('qprime')
         self.qwait = var('qwait')
-        self.R = var('R')
+        self.r = var('r')
         self.saved_tq5 = var('saved_tq5')
         self.hscale = var('hscale')
         self.etamax = var('etamax')
@@ -618,7 +618,7 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_init(self.qprime, 1)
             cb_init(self.qwait, self.q + 1)
             cb_init(self.tau, array(self.max_order+1))
-            cb_init(self.R, 1)
+            cb_init(self.r, 1)
             cb_init(self.saved_tq5, 1)
             cb_init(self.etamax, 10000)
             cb_init(self.hscale, 0)
@@ -865,17 +865,17 @@ class CVODEMethodBuilder(MethodBuilder):
                 cb_is(self.setup_counter, 0)
                 cb_is(self.gamma_p, gamma)
                 cb_is(gamrat, gamma / self.gamma_p)
-                cb_is(self.R, 1)
+                cb_is(self.r, 1)
 
-            M = var(name_gen("M"))
-            cb_is(M, array(self.n*self.n))
+            m = var(name_gen("m"))
+            cb_is(m, array(self.n*self.n))
 
             # Need to create and fill an identity at this point
             eye = var(name_gen("i"))
             cb_is(eye, array(self.n*self.n))
             for i in range(0, self.n):
                 cb_is(eye[i*(self.n-1)], 1.0)
-            cb_is(M, eye - gamma*self.jest)
+            cb_is(m, eye - gamma*self.jest)
 
             converged = var(name_gen("converged"))
             cb_is(converged, False)
@@ -894,10 +894,10 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_ns(tempv, gamma*f - tempv)
             linear_solve = var("<builtin>linear_solve")
             transpose = var("<builtin>transpose")
-            M_t = var(name_gen("M_t"))
-            cb_ns(M_t, array(self.n*self.n))
-            cb_ns(M_t, transpose(M, self.n))
-            cb_ns(corr, linear_solve(M_t, tempv, self.n, 1))
+            m_t = var(name_gen("m_t"))
+            cb_ns(m_t, array(self.n*self.n))
+            cb_ns(m_t, transpose(m, self.n))
+            cb_ns(corr, linear_solve(m_t, tempv, self.n, 1))
             # Scale the correction to account for a change in gamma
             with cb_ns.if_(gamrat, '!=', 1):
                 cb_ns(corr, (2 / (1 + gamrat)) * corr)
@@ -908,9 +908,9 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_ns(self.ym, self.zn[0] + self.acor)
 
             # Call the convergence check to see if we can exit the loop.
-            converged, self.R, acnrm = emit_convergence_check(cb_ns, name_gen, delm,
+            converged, self.r, acnrm = emit_convergence_check(cb_ns, name_gen, delm,
                                                               self.delp, self.tq,
-                                                              self.R, self.newit,
+                                                              self.r, self.newit,
                                                               self.acor, self.ewt,
                                                               self.n)
             with cb_ns.if_(converged, '==', True):
@@ -981,9 +981,9 @@ class CVODEMethodBuilder(MethodBuilder):
 
             [self.zn, self.saved_tq5,
                 self.qwait, self.tau] = emit_complete_step(cb_sc, name_gen, self.zn,
-                                                          self.lvec, self.acor, self.q,
-                                                          self.qwait, self.tau,
-                                                          self.h, self.tq,
+                                                          self.lvec, self.acor,
+                                                          self.q, self.qwait,
+                                                          self.tau, self.h, self.tq,
                                                           self.t_current,
                                                           self.saved_tq5)
             [eta, self.qprime, acor,
-- 
GitLab


From 2b9ada3842ae8dc1e5a57f7b6859a614a2461ec8 Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 6 Mar 2020 17:25:41 -0600
Subject: [PATCH 4/6] Adds static phases for each CVODE order, fixes some bugs

---
 leap/cvode/__init__.py | 793 +++++++++++++++++++++++------------------
 1 file changed, 453 insertions(+), 340 deletions(-)

diff --git a/leap/cvode/__init__.py b/leap/cvode/__init__.py
index 566bc4a..edf12fa 100644
--- a/leap/cvode/__init__.py
+++ b/leap/cvode/__init__.py
@@ -39,7 +39,7 @@ __doc__ = """
 
 # {{{ CVODE-like adaptive implicit integration
 
-def emit_complete_step(cb, name_gen, zn, lvec, acor,
+def emit_comp_step(cb, name_gen, zn, lvec, acor,
                        q, qwait, tau, h, tq, t, saved_tq5):
 
     # This bit of code increments tau and applies
@@ -67,7 +67,7 @@ def emit_complete_step(cb, name_gen, zn, lvec, acor,
     return zn, saved_tq5, qwait, tau
 
 
-def emit_prepare_step(cb, name_gen, tq, acor, zn, h, q,
+def emit_prep_step(cb, name_gen, tq, acor, zn, h, q,
                       n, tau, saved_tq5, dsm, ewt, qwait,
                       etamax, eta, qprime):
 
@@ -75,6 +75,7 @@ def emit_prepare_step(cb, name_gen, tq, acor, zn, h, q,
 
     # if etamax is 1, defer step size or order changes
     from pymbolic.primitives import Max, Min
+    absval = var("<builtin>norm_1")
     with cb.if_(etamax, '==', 1):
         cb(qwait, Max((qwait, 2)))
         cb(qprime, q)
@@ -104,8 +105,7 @@ def emit_prepare_step(cb, name_gen, tq, acor, zn, h, q,
                 cb(eta, 1)
             with cb.else_():
                 cb(eta, Min((eta, etamax)))
-                #cb(eta, eta / Max(1.0, abs(h)*hmax_inv*eta))
-                cb(eta, eta / Max((1.0, ((h**2)**0.5)*hmax_inv*eta)))
+                cb(eta, eta / Max((1.0, absval(h)*hmax_inv*eta)))
 
         cb(etamax, 10.0)
         cb(acor, acor * (1 / tq[2]))
@@ -123,8 +123,12 @@ def emit_bdf_setup(cb, name_gen, q, h, qwait, tau, lvec, tq):
     xi_inv = var(name_gen("xi_inv"))
     xistar_inv = var(name_gen("xistar_inv"))
     hsum = var(name_gen("hsum"))
+    absval = var("<builtin>norm_1")
     cb(lvec[0], 1)
     cb(lvec[1], 1)
+    for i in range(2, q+1):
+        cb(lvec[i], 0)
+        cb(tq[i], 0)
     cb(alpha_0, -1)
     cb(alpha_0hat, -1)
     cb(xi_inv, 1)
@@ -158,24 +162,20 @@ def emit_bdf_setup(cb, name_gen, q, h, qwait, tau, lvec, tq):
     cb(a1, 1 - alpha_0hat + alpha_0)
     cb(a2, 1 + q * a1)
     # TEST QUANTITY ARRAY
-    #cb(tq[2], abs(alpha_0 * (a2 / a1)))  # epsilon, the upper bound on local error
-    cb(tq[2], ((alpha_0 * (a2 / a1))**2)**0.5)  # epsilon, upper bound on local error
-    #cb(tq[5], abs((a2) / (l[q] * xi_inv/xistar_inv)))
-    cb(tq[5], (((a2) / (lvec[q] * xi_inv/xistar_inv))**2)**0.5)
+    cb(tq[2], absval(alpha_0 * (a2 / a1)))  # epsilon, upper bound on local error
+    cb(tq[5], absval((a2) / (lvec[q] * xi_inv/xistar_inv)))
     with cb.if_(qwait, '==', 1):
         cb(c, xistar_inv / lvec[q])
         cb(a3, alpha_0 + 1.0 / q)
         cb(a4, alpha_0hat + xi_inv)
         cb(cprime, a3 / (1.0 - a4 + a3))
-        #cb(tq[1], abs(cprime / C))
-        cb(tq[1], ((cprime / c)**2)**0.5)
+        cb(tq[1], absval(cprime / c))
         cb(hsum, hsum + tau[q])
         cb(xi_inv, h / hsum)
         cb(a5, alpha_0 - (1.0 / (q+1)))
         cb(a6, alpha_0hat - xi_inv)
         cb(cprimeprime, a2 / (1.0 - a6 + a5))
-        #cb(tq[3], abs(cprimeprime * xi_inv * (q+2) * a5))
-        cb(tq[3], ((cprimeprime * xi_inv * (q+2) * a5)**2)**0.5)
+        cb(tq[3], absval(cprimeprime * xi_inv * (q+2) * a5))
 
     cb(tq[4], 0.1 * tq[2])  # for convergence check
 
@@ -234,7 +234,7 @@ def emit_error_check(cb, name_gen, h, q, tq, n, ewt, acor, etamax, err, hprime):
 
         cb(etamax, 1)
         # Reset the step size based on local error behavior.
-        hprime = emit_step_changer(cb, name_gen, h, q, err)
+        hprime = emit_step_changer(cb, name_gen, h, q, err, hprime)
 
     return hprime, err, etamax
 
@@ -284,7 +284,7 @@ def emit_rescale(cb, name_gen, zn, q, eta, h, hscale):
     return zn, hscale
 
 
-def emit_convergence_check(cb, name_gen, delm, delp, tq, r, it, acor, ewt, n):
+def emit_conv_check(cb, name_gen, delm, delp, tq, r, it, acor, ewt, n):
 
     # Check the convergence rate - this is our stopping criteria for the
     # Newton solver
@@ -325,10 +325,10 @@ def emit_order_changer(cb, name_gen, h, q, zn, acor, ewt, n,
     cb(eta_minus, 0)
     cb(eta_plus, 0)
     with cb.if_(q, '>', 1):
-        eta_minus = emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n)
+        eta_minus = emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n, eta_minus)
     with cb.if_(q, '<', 5):
         eta_plus = emit_eta_plus(cb, name_gen, zn, acor,
-                                 tau, h, ewt, tq, saved_tq5, q, n)
+                                 tau, h, ewt, tq, saved_tq5, q, n, eta_plus)
 
     # Pick one of these candidates
     etam = var(name_gen("etam"))
@@ -350,6 +350,7 @@ def emit_order_changer(cb, name_gen, h, q, zn, acor, ewt, n,
 
     hmax_inv = var(name_gen("hmax_inv"))
     cb(hmax_inv, 0)  # default in CVODE
+    absval = var("<builtin>norm_1")
 
     # CHECK THRESHOLD
     with cb.if_(etam, '<', 1.5):
@@ -357,15 +358,13 @@ def emit_order_changer(cb, name_gen, h, q, zn, acor, ewt, n,
         cb(qprime, q)
     with cb.else_():
         cb(eta, Min((eta, etamax)))
-        #cb(eta, eta / Max(1.0, abs(h)*hmax_inv*eta))
-        cb(eta, eta / Max((1.0, ((h)**2)**0.5*hmax_inv*eta)))
+        cb(eta, eta / Max((1.0, absval(h)*hmax_inv*eta)))
 
     return qprime, eta, zn
 
 
-def emit_step_changer(cb, name_gen, h, q, err):
+def emit_step_changer(cb, name_gen, h, q, err, hprime):
 
-    hprime = var(name_gen("hprime"))
     # This function resets the step size based on local error behavior.
     cb(hprime, h * (1.0 / ((6*err) ** (1/(q + 1)) + 0.000001)))
 
@@ -380,6 +379,8 @@ def emit_order_increase(cb, name_gen, zn, tau, q, hscale, lvec):
     xiold = var(name_gen("xiold"))
     alpha0 = var(name_gen("alpha0"))
     hsum = var(name_gen("hsum"))
+    for i in range(0, 6):
+        cb(lvec[i], 0)
     cb(lvec[2], 1)
     cb(alpha1, 1)
     cb(prod, 1)
@@ -409,6 +410,8 @@ def emit_order_increase(cb, name_gen, zn, tau, q, hscale, lvec):
 def emit_order_decrease(cb, name_gen, zn, tau, q, hscale, lvec):
 
     xi = var(name_gen("xi"))
+    for i in range(0, 6):
+        cb(lvec[i], 0)
     cb(lvec[2], 1)
     hsum = var(name_gen("hsum"))
     for j in range(1, q-1):
@@ -433,7 +436,8 @@ def emit_order_adjust(cb, name_gen, zn, tau, q, delq, hscale, lvec):
     return zn, lvec
 
 
-def emit_eta_plus(cb, name_gen, zn, acor, tau, h, ewt, tq, saved_tq5, q, n):
+def emit_eta_plus(cb, name_gen, zn, acor, tau, h,
+                  ewt, tq, saved_tq5, q, n, eta_plus):
 
     cquot = var(name_gen("cquot"))
     cb(cquot, (tq[5] / saved_tq5) * (h / tau[2])**(q+1))
@@ -445,13 +449,12 @@ def emit_eta_plus(cb, name_gen, zn, acor, tau, h, ewt, tq, saved_tq5, q, n):
     wnorm = var(name_gen("wnorm"))
     wnorm = emit_wrms(cb, name_gen, tempv, ewt, n)
     cb(dup, wnorm / tq[3])
-    eta_plus = var(name_gen("eta_plus"))
     cb(eta_plus, 1.0 / ((10 * dup) ** (1 / (q+2)) + 0.000001))
 
     return eta_plus
 
 
-def emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n):
+def emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n, eta_minus):
 
     ddn = var(name_gen("ddn"))
     wnorm = var(name_gen("wnorm"))
@@ -459,7 +462,6 @@ def emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n):
     wnorm = emit_wrms(cb, name_gen, zn, ewt, n)
 
     cb(ddn, wnorm / tq[1])
-    eta_minus = var(name_gen("eta_minus"))
     cb(eta_minus, 1.0 / ((6 * ddn) ** (1 / q) + 0.000001))
 
     return eta_minus
@@ -467,9 +469,11 @@ def emit_eta_minus(cb, name_gen, zn, ewt, tq, q, n):
 
 def emit_err_weights(cb, name_gen, y, atol, rtol, ewt, n):
 
-    for i in range(n):
-        #cb(ewt[i], 1.0 / (rtol*abs(y[i]) + atol))
-        cb(ewt[i], 1.0 / (rtol*((y[i])**2)**0.5 + atol))
+    # FIXME: loop bounded by state size
+    #for i in range(n):
+    absval = var("<builtin>norm_1")
+    for i in range(2):
+        cb(ewt[i], 1.0 / (rtol*absval(y[i]) + atol))
 
     return ewt
 
@@ -478,7 +482,9 @@ def emit_wrms(cb, name_gen, y, ewt, n):
 
     wnorm = var(name_gen("wnorm"))
     cb(wnorm, 0)
-    for i in range(n):
+    # FIXME: loop bounded by state size
+    #for i in range(n):
+    for i in range(2):
         cb(wnorm, wnorm + (y[i]*ewt[i])**2)
 
     cb(wnorm, wnorm * (1 / n))
@@ -541,50 +547,55 @@ class CVODEMethodBuilder(MethodBuilder):
         self.dt = var('<dt>')
 
         # CVODE's persistent state - data that needs to exist between phases
-        self.t_current = var('t_current')
-        self.t_targ = var('t_targ')
-        self.setup_counter = var('setup_counter')
-        self.jac_counter = var('jac_counter')
-        self.setup_flag = var('setup_flag')
-        self.nflag = var('nflag')
-        self.gamma_p = var('gamma_p')
-        self.q = var('q')
-        self.qprime = var('qprime')
-        self.qwait = var('qwait')
-        self.r = var('r')
-        self.saved_tq5 = var('saved_tq5')
-        self.hscale = var('hscale')
-        self.etamax = var('etamax')
-        self.t_int = var('t_int')
-        self.h = var("h")
-        self.hprime = var("hprime")
-        self.y_int = var('y_int')
-        self.tau = var('tau')
-        self.jest = var('jest')
-        self.n = var('n')
+        self.t_current = var('<p>t_current')
+        self.t_targ = var('<p>t_targ')
+        self.setup_counter = var('<p>setup_counter')
+        self.jac_counter = var('<p>jac_counter')
+        self.setup_flag = var('<p>setup_flag')
+        self.nflag = var('<p>nflag')
+        self.gamma_p = var('<p>gamma_p')
+        self.gamma = var('<p>gamma')
+        self.gamrat = var('<p>gamrat')
+        self.q = var('<p>q')
+        self.qprime = var('<p>qprime')
+        self.qwait = var('<p>qwait')
+        self.r = var('<p>r')
+        self.saved_tq5 = var('<p>saved_tq5')
+        self.hscale = var('<p>hscale')
+        self.etamax = var('<p>etamax')
+        self.t_int = var('<p>t_int')
+        self.h = var("<p>h")
+        self.hprime = var("<p>hprime")
+        self.y_int = var('<p>y_int')
+        self.tau = var('<p>tau')
+        self.jest = var('<p>jest')
+        self.m = var('<p>m')
+        self.n = var('<p>n')
         self.zn = \
-            [var('<p>zn_' + str(i)) for i in range(0, self.max_order)]
+            [var('<p>zn_' + str(i)) for i in range(0, self.max_order+2)]
         self.tq = \
-            [var('tq_' + str(i)) for i in range(0, self.max_order)]
+            [var('<p>tq_' + str(i)) for i in range(0, self.max_order+2)]
         self.lvec = \
-            [var('l_' + str(i)) for i in range(0, self.max_order)]
-        self.sign = var('sign')
-        self.hg = var('hg')
-        self.hlb = var('hlb')
-        self.hub = var('hub')
-        self.hbias = var('hbias')
-        self.hnew = var('hnew')
-        self.count = var('count')
-        self.max_iters = var('max_iters')
-        self.delp = var('delp')
-        self.dsm = var('dsm')
-        self.it = var('it')
-        self.eta = var('eta')
-        self.newit = var('newit')
-        self.rl1 = var('rl1')
-        self.ym = var('ym')
-        self.acor = var('acor')
-        self.ewt = var('ewt')
+            [var('<p>l_' + str(i)) for i in range(0, self.max_order+2)]
+        self.sign = var('<p>sign')
+        self.hg = var('<p>hg')
+        self.h0 = var('<p>h0')
+        self.hlb = var('<p>hlb')
+        self.hub = var('<p>hub')
+        self.hbias = var('<p>hbias')
+        self.hnew = var('<p>hnew')
+        self.count = var('<p>count')
+        self.max_iters = var('<p>max_iters')
+        self.delp = var('<p>delp')
+        self.dsm = var('<p>dsm')
+        self.it = var('<p>it')
+        self.eta = var('<p>eta')
+        self.newit = var('<p>newit')
+        self.rl1 = var('<p>rl1')
+        self.ym = var('<p>ym')
+        self.acor = var('<p>acor')
+        self.ewt = var('<p>ewt')
+        self.f = var('<p>f')
 
     def generate(self):
         """
@@ -614,6 +625,7 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_init(self.zn[0], self.state)
             cb_init(self.zn[1], self.eval_rhs(self.t, self.state))
             cb_init(self.jest, array(self.n*self.n))
+            cb_init(self.m, array(self.n*self.n))
             cb_init(self.q, 1)
             cb_init(self.qprime, 1)
             cb_init(self.qwait, self.q + 1)
@@ -624,11 +636,14 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_init(self.hscale, 0)
             cb_init(self.nflag, False)
             cb_init(self.t_int, self.t)
+            cb_init(self.t_targ, self.t + self.dt)
+            cb_init(self.t_current, self.t)
             cb_init(self.hprime, 0)
             cb_init(self.y_int, array(self.n))
             cb_init(self.y_int, self.state)
             cb_init(self.acor, array(self.n))
             cb_init(self.ym, array(self.n))
+            #cb_init(self.f, array(self.n))
             cb_init(self.ewt, array(self.n))
             self.ewt = emit_err_weights(cb_init, name_gen,
                                         self.state, self.atol,
@@ -645,17 +660,16 @@ class CVODEMethodBuilder(MethodBuilder):
             tdist = var(name_gen("tdist"))
             uround = var(name_gen("uround"))
             tround = var(name_gen("tround"))
+            absval = var("<builtin>norm_1")
             from pymbolic.primitives import Max
             #cb(sign, np.sign(xdiff))
-            cb_init(self.sign, tdiff / ((tdiff**2)**0.5))
-            #cb(xdist, abs(xdiff))
-            cb_init(tdist, ((tdiff)**2)**0.5)
+            cb_init(self.sign, tdiff / absval(tdiff))
+            cb_init(tdist, absval(tdiff))
             # FIXME: float floor
             #cb_init(uround, np.finfo(float).eps)
-            cb_init(uround, 2e-33)
-            #cb_init(tround, uround * Max(abs(self.t_targ), abs(self.t_current)))
-            cb_init(tround, uround * Max((((self.t_targ)**2)**0.5,
-                    ((self.t_current)**2)**0.5)))
+            cb_init(uround, 2**(-33))
+            cb_init(tround, uround * Max((absval(self.t_targ),
+                    absval(self.t_current))))
             with cb_init.if_(tdist, '<', 2*tround):
                 # Step is extremely tiny - no good
                 cb_init.raise_(ValueError, "Initial timestep faulty")
@@ -669,12 +683,9 @@ class CVODEMethodBuilder(MethodBuilder):
             cb_init(hub_factor, 0.1)  # default in CVODE
             cb_init(self.hlb, hlb_factor * tround)
             # initial_step_upper_bound
-            #cb(hub, hub_factor * abs(zn[0]) + atol)
-            cb_init(self.hub, hub_factor * ((self.zn[0])**2)**0.5 + self.atol)
-            #cb(hub, abs(zn[1]) / hub)
-            cb_init(self.hub, ((self.zn[1])**2)**0.5 / self.hub)
+            cb_init(self.hub, hub_factor * absval(self.zn[0]) + self.atol)
+            cb_init(self.hub, absval(self.zn[1]) / self.hub)
             hub_max = var(name_gen("hub_max"))
-            #cb(hub_max, np.linalg.norm(hub, np.inf))
             cb_init(hub_max, var("<builtin>norm_inf")(self.hub))
             cb_init(self.hub, hub_factor*tdist)
             with cb_init.if_(self.hub*hub_max, '>', 1):
@@ -682,11 +693,10 @@ class CVODEMethodBuilder(MethodBuilder):
 
             cb_init(self.hg, (self.hlb*self.hub) ** 0.5)
 
-            h0 = var(name_gen("h0"))
             with cb_init.if_(self.hub, '<', self.hlb):
                 with cb_init.if_(self.sign, '==', -1):
                     cb_init(self.hg, -self.hg)
-                cb_init(h0, self.hg)
+                cb_init(self.h0, self.hg)
                 cb_init.switch_phase("initialize_finish")
             with cb_init.else_():
                 # Otherwise, use a second-derivative estimate to find it.
@@ -701,15 +711,15 @@ class CVODEMethodBuilder(MethodBuilder):
             hgs = var(name_gen("hgs"))
             cb_initial_step(hgs, self.hg*self.sign)
             # Compute a second-derivative estimate using difference quotients
-            y = var(name_gen("y"))
+            yi = var(name_gen("yi"))
             ftemp = var(name_gen("ftemp"))
             ydd = var(name_gen("ydd"))
             yddnrm = var(name_gen("yddnrm"))
             array = var("<builtin>array")
-            cb_initial_step(y, array(self.n))
-            cb_initial_step(y, hgs*self.zn[1] + self.zn[0])
+            cb_initial_step(yi, array(self.n))
+            cb_initial_step(yi, hgs*self.zn[1] + self.zn[0])
             cb_initial_step(ftemp, array(self.n))
-            cb_initial_step(ftemp, self.eval_rhs(self.t_current, y))
+            cb_initial_step(ftemp, self.eval_rhs(self.t_current, yi))
             cb_initial_step(ydd, array(self.n))
             cb_initial_step(ydd, (ftemp - self.zn[1]) / hgs)
             yddnrm = emit_wrms(cb_initial_step, name_gen, ydd, self.ewt, self.n)
@@ -741,305 +751,399 @@ class CVODEMethodBuilder(MethodBuilder):
         with CodeBuilder(name="initialize_finish") as cb_init_finish:
 
             # Apply bias, check bounds and sign
-            cb_init_finish(h0, self.hbias*self.hnew)
-            with cb_init_finish.if_(h0, '<', self.hlb):
-                cb_init_finish(h0, self.hlb)
-            with cb_init_finish.if_(h0, '>', self.hub):
-                cb_init_finish(h0, self.hub)
+            cb_init_finish(self.h0, self.hbias*self.hnew)
+            with cb_init_finish.if_(self.h0, '<', self.hlb):
+                cb_init_finish(self.h0, self.hlb)
+            with cb_init_finish.if_(self.h0, '>', self.hub):
+                cb_init_finish(self.h0, self.hub)
             with cb_init_finish.if_(self.sign, '==', -1):
-                cb_init_finish(h0, -h0)
-            cb_init_finish(self.h, h0)
+                cb_init_finish(self.h0, -self.h0)
+            cb_init_finish(self.h, self.h0)
             cb_init_finish(self.zn[1], self.zn[1]*self.h)
             cb_init_finish(self.gamma_p, self.h)
             cb_init_finish(self.hprime, self.h)
             cb_init_finish(self.hscale, self.h)
 
         # Phase: STEP_SETUP
-        with CodeBuilder(name="step_setup") as cb_step_setup:
-            # We'll put the main CVODE runner here for now.
-            cb_step_setup(self.t_targ, self.t + self.dt)
-            cb_step_setup(self.t_current, self.t_int)
+        cb_step_setup = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="step_setup_{order}".format(
+                                    order=phase_order)) as cb_ss:
+                # We'll put the main CVODE runner here for now.
+                cb_ss(self.t_targ, self.t + self.dt)
+                cb_ss(self.t_current, self.t_int)
+
+                cb_ss(self.eta, self.hprime / self.h)
 
-            cb_step_setup(self.eta, self.hprime / self.h)
+                cb_ss(self.ym, self.state)
 
-            cb_step_setup(self.ym, self.state)
+                # Append to list of phases for each order.
+                cb_step_setup.append(cb_ss)
 
-        # Phase: STEP
-        with CodeBuilder(name="step") as cb_step:
+        # Phase: STEP. Create this phase once for each order.
+        cb_step = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="step_{order}".format(order=phase_order)) as cb_s:
 
-            # Reset error weights
-            self.ewt = emit_err_weights(cb_step, name_gen, self.ym,
+                # Reset error weights
+                self.ewt = emit_err_weights(cb_s, name_gen, self.ym,
                                         self.atol, self.rtol, self.ewt, self.n)
 
-            # Check for order change
-            with cb_step.if_(self.qprime, '!=', self.q):
-                cb_step(self.qwait, self.qprime + 1)
                 delq = var(name_gen('delq'))
-                cb_step(delq, self.qprime - self.q)
-                self.zn, self.lvec = emit_order_adjust(cb_step, name_gen, self.zn,
-                                                    self.tau, self.q, delq,
+                cb_s(delq, 0)
+                # Check for order change
+                with cb_s.if_(self.qprime, '!=', self.q):
+                    cb_s(self.qwait, self.qprime + 1)
+                    cb_s(delq, self.qprime - self.q)
+                    self.zn, self.lvec = emit_order_adjust(cb_s, name_gen, self.zn,
+                                                    self.tau, phase_order, delq,
                                                     self.hscale, self.lvec)
-                cb_step(self.q, self.qprime)
-
-            # Rescale of Nordsieck happens here in the case of a step
-            # change.
-            with cb_step.if_(self.hprime, '!=', self.h):
-                self.zn, self.hscale = emit_rescale(cb_step, name_gen, self.zn,
-                                                    self.q, self.eta, self.h,
-                                                    self.hscale)
-
-            # For now, only allow 7 "retries" at order 1. This
-            # is the end-all case in CVODE.
-            cb_step(self.it, 1)
-
-        # Phase: INNER_STEP
-        with CodeBuilder(name="inner_step") as cb_is:
-
-            # This code is supposed to mirror CVStep in
-            # the original CVODE code - it does the prediction,
-            # sets the BDF parameters, and calls the newton solver
-
-            cb_is(self.h, self.hprime)
-
-            # Get predicted yn_0 (using history)
-            self.zn = emit_predict(cb_is, name_gen, self.zn, self.q)
-
-            # Call BDF setup to update histories, get coefficients, etc.
-            self.lvec, self.tq = emit_bdf_setup(cb_is, name_gen, self.q,
+                    cb_s(self.q, self.qprime)
+
+                # Rescale of Nordsieck happens here in the case of a step
+                # change.
+                with cb_s.if_(self.hprime, '!=', self.h):
+                    with cb_s.if_(delq, '==', 0):
+                        self.zn, self.hscale = emit_rescale(cb_s, name_gen, self.zn,
+                                                        phase_order, self.eta,
+                                                        self.h, self.hscale)
+                    with cb_s.if_(delq, '==', 1):
+                        self.zn, self.hscale = emit_rescale(cb_s, name_gen, self.zn,
+                                                        phase_order+1, self.eta,
+                                                        self.h, self.hscale)
+                    with cb_s.if_(delq, '==', -1):
+                        self.zn, self.hscale = emit_rescale(cb_s, name_gen, self.zn,
+                                                        phase_order-1, self.eta,
+                                                        self.h, self.hscale)
+
+                # For now, only allow 7 "retries" at order 1. This
+                # is the end-all case in CVODE.
+                cb_s(self.it, 1)
+
+                # Need to ensure we switch to the proper
+                # inner step phase based on order changes.
+                with cb_s.if_(delq, '==', 0):
+                    cb_s.switch_phase("inner_step_{order}".format(order=phase_order))
+                if phase_order < self.max_order:
+                    with cb_s.if_(delq, '==', 1):
+                        cb_s.switch_phase("inner_step_{order}".format(
+                            order=phase_order+1))
+                if phase_order > 1:
+                    with cb_s.if_(delq, '==', -1):
+                        cb_s.switch_phase("inner_step_{order}".format(
+                            order=phase_order-1))
+
+                # Append to list of phases.
+                cb_step.append(cb_s)
+
+        # Phase: INNER_STEP. Create this phase once for each order.
+        cb_is = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="inner_step_{order}".format(
+                                order=phase_order)) as cb_i:
+
+                # This code is supposed to mirror CVStep in
+                # the original CVODE code - it does the prediction,
+                # sets the BDF parameters, and calls the newton solver
+
+                cb_i(self.h, self.hprime)
+
+                # Get predicted yn_0 (using history)
+                self.zn = emit_predict(cb_i, name_gen, self.zn, phase_order)
+
+                # Call BDF setup to update histories, get coefficients, etc.
+                self.lvec, self.tq = emit_bdf_setup(cb_i, name_gen, phase_order,
                                              self.h, self.qwait, self.tau,
                                              self.lvec, self.tq)
 
-            # Call the Newton solver for the "correction"
-            # to our "prediction"
-
-            linear_solve = var("<builtin>linear_solve")
-            array = var("<builtin>array")
-            # Only supports Newton for now.
-            # A simple Newton solver, which for now uses numpy (LU) for the
-            # linear solve.
-            cb_is(self.rl1, 1 / self.lvec[1])
-            cb_is(self.acor, array(self.n))
-            nflag_prev = var(name_gen("nflag_prev"))
-            cb_is(nflag_prev, self.nflag)
-            cb_is(self.nflag, True)
-
-            f = var(name_gen("f"))
-            cb_is(f, array(self.n))
-            cb_is(f, self.eval_rhs(self.t_current, self.zn[0]))
-
-            # Allow at most three iterations
-            cb_is(self.newit, 1)
-            gamma = var(name_gen("gamma"))
-            cb_is(gamma, self.rl1 * self.h)
-            gamrat = var(name_gen("gamrat"))
-            cb_is(gamrat, gamma / self.gamma_p)
-            crit = var(name_gen("crit"))
-            #cb(crit, abs(gamrat - 1))
-            cb_is(crit, ((gamrat - 1)**2)**0.5)
-            with cb_is.if_(crit, '>', 0.3):
-                cb_is(self.setup_flag, True)
-
-            with cb_is.if_(self.setup_counter, '>=', 20):
-                cb_is(self.setup_flag, True)
-
-            # Does the Jacobian estimate given to the Newton
-            # solver need to be updated?
-            with cb_is.if_(self.setup_flag, '==', True):
-                # We don't always reform J in this case
-                jac_flag = var(name_gen("jac_flag"))
-                # Compound conditional
-                from pymbolic.primitives import Comparison, LogicalAnd, LogicalOr
-                with cb_is.if_(LogicalAnd((Comparison(crit, "<", 0.2),
+                # Call the Newton solver for the "correction"
+                # to our "prediction"
+
+                linear_solve = var("<builtin>linear_solve")
+                array = var("<builtin>array")
+                # Only supports Newton for now.
+                # A simple Newton solver, which for now uses numpy (LU) for the
+                # linear solve.
+                cb_i(self.rl1, 1 / self.lvec[1])
+                # FIXME: add zeros builtin
+                for i in range(2):
+                    cb_i(self.acor[i], 0)
+                nflag_prev = var(name_gen("nflag_prev"))
+                cb_i(nflag_prev, self.nflag)
+                cb_i(self.nflag, True)
+
+                cb_i(self.f, self.eval_rhs(self.t_current, self.zn[0]))
+
+                # Allow at most three iterations
+                cb_i(self.newit, 1)
+                cb_i(self.gamma, self.rl1 * self.h)
+                cb_i(self.gamrat, self.gamma / self.gamma_p)
+                crit = var(name_gen("crit"))
+                absval = var("<builtin>norm_1")
+                cb_i(crit, absval(self.gamrat - 1))
+                with cb_i.if_(crit, '>', 0.3):
+                    cb_i(self.setup_flag, True)
+
+                with cb_i.if_(self.setup_counter, '>=', 20):
+                    cb_i(self.setup_flag, True)
+
+                # Does the Jacobian estimate given to the Newton
+                # solver need to be updated?
+                with cb_i.if_(self.setup_flag, '==', True):
+                    # We don't always reform J in this case
+                    jac_flag = var(name_gen("jac_flag"))
+                    # Compound conditional
+                    from pymbolic.primitives import Comparison, LogicalAnd, LogicalOr
+                    with cb_i.if_(LogicalAnd((Comparison(crit, "<", 0.2),
                                Comparison(nflag_prev, "==", False)))):
-                    cb_is(jac_flag, True)
-                with cb_is.else_():
-                    cb_is(jac_flag, False)
-                with cb_is.if_(LogicalOr((Comparison(jac_flag, "==", True),
+                        cb_i(jac_flag, True)
+                    with cb_i.else_():
+                        cb_i(jac_flag, False)
+                    with cb_i.if_(LogicalOr((Comparison(jac_flag, "==", True),
                                Comparison(self.jac_counter, ">", 50)))):
-                    self.jest = self.emit_jac_estimate(cb_is, name_gen, self.zn[0],
-                                                  self.n, f, self.ewt, self.h,
+                        self.jest = self.emit_jac_est(cb_i, name_gen, self.zn[0],
+                                                  self.n, self.f, self.ewt, self.h,
                                                   self.t_current, self.jest)
-                    cb_is(self.jac_counter, 0)
-                cb_is(self.setup_flag, False)
-                # Reset gamma_p and step counter
-                cb_is(self.setup_counter, 0)
-                cb_is(self.gamma_p, gamma)
-                cb_is(gamrat, gamma / self.gamma_p)
-                cb_is(self.r, 1)
-
-            m = var(name_gen("m"))
-            cb_is(m, array(self.n*self.n))
-
-            # Need to create and fill an identity at this point
-            eye = var(name_gen("i"))
-            cb_is(eye, array(self.n*self.n))
-            for i in range(0, self.n):
-                cb_is(eye[i*(self.n-1)], 1.0)
-            cb_is(m, eye - gamma*self.jest)
-
-            converged = var(name_gen("converged"))
-            cb_is(converged, False)
-            cb_is(self.delp, 0)
-            cb_is.switch_phase("newton_step")
+                        cb_i(self.jac_counter, 0)
+                    cb_i(self.setup_flag, False)
+                    # Reset gamma_p and step counter
+                    cb_i(self.setup_counter, 0)
+                    cb_i(self.gamma_p, self.gamma)
+                    cb_i(self.gamrat, self.gamma / self.gamma_p)
+                    cb_i(self.r, 1)
+
+                # Need to create and fill an identity at this point
+                eye = var(name_gen("i"))
+                cb_i(eye, array(self.n*self.n))
+                #for i in range(0, self.n):
+                # FIXME: State size as loop bound.
+                for i in range(2):
+                    cb_i(eye[i*(self.n-1)], 1.0)
+                cb_i(self.m, eye - self.gamma*self.jest)
+
+                converged = var(name_gen("converged"))
+                cb_i(converged, False)
+                cb_i(self.delp, 0)
+                cb_i.switch_phase("newton_step_{order}".format(order=phase_order))
+
+                # Append to phase list.
+                cb_is.append(cb_i)
 
         # PHASE: NEWTON_STEP
-        with CodeBuilder(name="newton_step") as cb_ns:
-
-            # The actual Newton loop
-            tempv = var(name_gen("tempv"))
-            corr = var(name_gen("tempv"))
-            cb_ns(tempv, array(self.n))
-            cb_ns(corr, array(self.n))
-            cb_ns(tempv, self.rl1*self.zn[1] + self.acor)
-            cb_ns(tempv, gamma*f - tempv)
-            linear_solve = var("<builtin>linear_solve")
-            transpose = var("<builtin>transpose")
-            m_t = var(name_gen("m_t"))
-            cb_ns(m_t, array(self.n*self.n))
-            cb_ns(m_t, transpose(m, self.n))
-            cb_ns(corr, linear_solve(m_t, tempv, self.n, 1))
-            # Scale the correction to account for a change in gamma
-            with cb_ns.if_(gamrat, '!=', 1):
-                cb_ns(corr, (2 / (1 + gamrat)) * corr)
-
-            delm = var(name_gen("delm"))
-            delm = emit_wrms(cb_ns, name_gen, corr, self.ewt, self.n)
-            cb_ns(self.acor, self.acor + corr)
-            cb_ns(self.ym, self.zn[0] + self.acor)
-
-            # Call the convergence check to see if we can exit the loop.
-            converged, self.r, acnrm = emit_convergence_check(cb_ns, name_gen, delm,
+        cb_ns = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="newton_step_{order}".format(
+                                order=phase_order)) as cb_n:
+
+                # The actual Newton loop
+                tempv = var(name_gen("tempv"))
+                corr = var(name_gen("tempv"))
+                cb_n(tempv, array(self.n))
+                cb_n(corr, array(self.n))
+                cb_n(tempv, self.rl1*self.zn[1] + self.acor)
+                cb_n(tempv, self.gamma*self.f - tempv)
+                linear_solve = var("<builtin>linear_solve")
+                transpose = var("<builtin>transpose")
+                m_t = var(name_gen("m_t"))
+                cb_n(m_t, array(self.n*self.n))
+                cb_n(m_t, transpose(self.m, self.n))
+                cb_n(corr, linear_solve(m_t, tempv, self.n, 1))
+                # Scale the correction to account for a change in gamma
+                with cb_n.if_(self.gamrat, '!=', 1):
+                    cb_n(corr, (2 / (1 + self.gamrat)) * corr)
+
+                delm = var(name_gen("delm"))
+                delm = emit_wrms(cb_n, name_gen, corr, self.ewt, self.n)
+                cb_n(self.acor, self.acor + corr)
+                cb_n(self.ym, self.zn[0] + self.acor)
+
+                # Call the convergence check to see if we can exit the loop.
+                converged, self.r, acnrm = emit_conv_check(cb_n, name_gen, delm,
                                                               self.delp, self.tq,
                                                               self.r, self.newit,
                                                               self.acor, self.ewt,
                                                               self.n)
-            with cb_ns.if_(converged, '==', True):
-                cb_ns.switch_phase("inner_step_check")
-
-            with cb_ns.else_():
-                cb_ns(self.delp, delm)
-                cb_ns(f, self.eval_rhs(self.t_current, self.ym))
-                cb_ns(self.newit, self.newit + 1)
-                with cb_ns.if_(self.newit, '>', 3):
-                    # If we haven't converged by this point, we have to reduce the
-                    # step size and try again
-                    cb_ns(self.nflag, False)
-                    cb_ns.switch_phase("inner_step_check")
+                with cb_n.if_(converged, '==', True):
+                    cb_n.switch_phase("inner_step_check_{order}".format(
+                        order=phase_order))
+
+                with cb_n.else_():
+                    cb_n(self.delp, delm)
+                    cb_n(self.f, self.eval_rhs(self.t_current, self.ym))
+                    cb_n(self.newit, self.newit + 1)
+                    with cb_n.if_(self.newit, '>', 3):
+                        # If we haven't converged by this point,
+                        # we have to reduce the step size and try again
+                        cb_n(self.nflag, False)
+                        cb_n.switch_phase("inner_step_check_{order}".format(
+                            order=phase_order))
+
+                # Append to phase list
+                cb_ns.append(cb_n)
 
         # Phase: INNER_STEP_CHECK
-        with CodeBuilder(name="inner_step_check") as cb_isc:
-
-            [self.zn, self.eta, self.h,
-                kflag, self.setup_flag,
-                self.hscale, self.etamax,
-                self.jac_counter] = emit_flag_handler(cb_isc, name_gen, self.nflag,
-                                                      self.zn, self.q, self.eta,
+        cb_isc = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="inner_step_check_{order}".format(
+                                order=phase_order)) as cb_i:
+
+                [self.zn, self.eta, self.h,
+                    kflag, self.setup_flag,
+                    self.hscale, self.etamax,
+                    self.jac_counter] = emit_flag_handler(cb_i, name_gen, self.nflag,
+                                                      self.zn, phase_order, self.eta,
                                                       self.h, self.setup_flag,
                                                       self.hscale, self.etamax,
                                                       self.jac_counter)
 
-            # Check the error - if it's too high we need a new step
-            with cb_isc.if_(kflag, '==', True):
-                [self.hprime, self.dsm,
-                    self.etamax] = emit_error_check(cb_isc, name_gen,
-                                                    self.h, self.q,
+                # Check the error - if it's too high we need a new step
+                with cb_i.if_(kflag, '==', True):
+                    [self.hprime, self.dsm,
+                        self.etamax] = emit_error_check(cb_i, name_gen,
+                                                    self.h, phase_order,
                                                     self.tq, self.n,
                                                     self.ewt, self.acor,
                                                     self.etamax, self.dsm,
                                                     self.hprime)
-                with cb_isc.if_(self.hprime, '==', self.h):
-                    # The step wasn't changed, because the error was fine
-                    cb_isc.switch_phase("step_check")
-                with cb_isc.else_():
-                    # Else, the error check failed: increment the iteration counter
-                    cb_isc(self.it, self.it + 1)
-                    # Reset eta with new step
-                    cb_isc(self.eta, self.hprime / self.h)
-                    # Also need to restore the Nordsieck array to previous setting
-                    self.zn = emit_restore(cb_isc, name_gen, self.zn, self.q)
-                    self.zn, self.hscale = emit_rescale(cb_isc, name_gen, self.zn,
-                                                        self.q, self.eta, self.h,
-                                                        self.hscale)
-                    # Set setup flag to True
-                    cb_isc(self.setup_flag, True)
-                    # Check phase switch condition
-                    with cb_isc.if_(self.it, '>', 7):
-                        cb_isc.switch_phase("step_check")
-
-            with cb_isc.else_():
-                # The Newton solve failed to converge - try again without
-                # checking the error on an unconverged result
-                cb_isc(self.it, self.it + 1)
-                cb_isc(self.setup_flag, True)
-                cb_isc(self.hprime, self.h)
+                    with cb_i.if_(self.hprime, '==', self.h):
+                        # The step wasn't changed, because the error was fine
+                        cb_i.switch_phase("step_check_{order}".format(
+                            order=phase_order))
+                    with cb_i.else_():
+                        # Else, the error check failed:
+                        # increment the iteration counter
+                        cb_i(self.it, self.it + 1)
+                        # Reset eta with new step
+                        cb_i(self.eta, self.hprime / self.h)
+                        # Also need to restore the Nordsieck array to
+                        # previous setting
+                        self.zn = emit_restore(cb_i, name_gen, self.zn, phase_order)
+                        self.zn, self.hscale = emit_rescale(cb_i, name_gen, self.zn,
+                                                        phase_order, self.eta,
+                                                        self.h, self.hscale)
+                        # Set setup flag to True
+                        cb_i(self.setup_flag, True)
+                        # Check phase switch condition
+                        with cb_i.if_(self.it, '>', 7):
+                            cb_i.switch_phase("step_check_{order}".format(
+                                order=phase_order))
+
+                with cb_i.else_():
+                    # The Newton solve failed to converge - try again without
+                    # checking the error on an unconverged result
+                    cb_i(self.it, self.it + 1)
+                    cb_i(self.setup_flag, True)
+                    cb_i(self.hprime, self.h)
+
+                cb_isc.append(cb_i)
 
         # PHASE: StepCheck
-        with CodeBuilder(name="step_check") as cb_sc:
-            with cb_sc.if_(self.it, '==', 8):
-                # This step failed - panic
-                cb_sc.raise_(ValueError, "Eight iterations")
-
-            [self.zn, self.saved_tq5,
-                self.qwait, self.tau] = emit_complete_step(cb_sc, name_gen, self.zn,
+        cb_sc = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="step_check_{order}".format(
+                                order=phase_order)) as cb_s:
+                with cb_s.if_(self.it, '==', 8):
+                    # This step failed - panic
+                    cb_s.raise_(ValueError, "Eight iterations")
+
+                [self.zn, self.saved_tq5,
+                    self.qwait, self.tau] = emit_comp_step(cb_s, name_gen, self.zn,
                                                           self.lvec, self.acor,
-                                                          self.q, self.qwait,
+                                                          phase_order, self.qwait,
                                                           self.tau, self.h, self.tq,
                                                           self.t_current,
                                                           self.saved_tq5)
-            [eta, self.qprime, acor,
-                self.qwait, self.zn,
-                self.etamax] = emit_prepare_step(cb_sc, name_gen, self.tq, self.acor,
-                                                 self.zn, self.h, self.q, self.n,
-                                                 self.tau, self.saved_tq5, self.dsm,
-                                                 self.ewt, self.qwait, self.etamax,
-                                                 self.eta, self.qprime)
-            cb_sc(self.hprime, self.eta * self.h)
-            cb_sc(self.t_current, self.t_current + self.h)
-            # Only increment Jacobian and setup counters when CVODE is
-            # actually doing work
-            cb_sc(self.setup_counter, self.setup_counter + 1)
-            cb_sc(self.jac_counter, self.jac_counter + 1)
-
-            # SWITCH PHASE: CHECK
-            with cb_sc.if_(self.t_current, '>=', self.t_targ):
-                cb_sc.switch_phase("finish")
+                [eta, self.qprime, acor,
+                    self.qwait, self.zn,
+                    self.etamax] = emit_prep_step(cb_s, name_gen, self.tq, self.acor,
+                                                 self.zn, self.h, phase_order,
+                                                 self.n, self.tau, self.saved_tq5,
+                                                 self.dsm, self.ewt, self.qwait,
+                                                 self.etamax, self.eta, self.qprime)
+                cb_s(self.hprime, self.eta * self.h)
+                cb_s(self.t_current, self.t_current + self.h)
+                # Only increment Jacobian and setup counters when CVODE is
+                # actually doing work
+                cb_s(self.setup_counter, self.setup_counter + 1)
+                cb_s(self.jac_counter, self.jac_counter + 1)
+
+                # SWITCH PHASE: CHECK
+                with cb_s.if_(self.t_current, '>=', self.t_targ):
+                    cb_s.switch_phase("finish_{order}".format(order=phase_order))
+
+                cb_sc.append(cb_s)
 
         # PHASE: Finish
-        with CodeBuilder(name="finish") as cb_finish:
-            # If we overshot xtarg (we probably did), we
-            # need to interpolate.
-            with cb_finish.if_(self.t_current, '!=', self.t_targ):
-                cb_finish(self.y_int, self.ym)
-                cb_finish(self.t_int, self.t_current)
-                self.ym = emit_interp(cb_finish, name_gen, self.zn,
+        cb_finish = []
+        for phase_order in range(1, 6):
+            with CodeBuilder(name="finish_{order}".format(
+                                order=phase_order)) as cb_f:
+                # If we overshot xtarg (we probably did), we
+                # need to interpolate.
+                with cb_f.if_(self.t_current, '!=', self.t_targ):
+                    cb_f(self.y_int, self.ym)
+                    cb_f(self.t_int, self.t_current)
+                    self.ym = emit_interp(cb_f, name_gen, self.zn,
                                       self.t_current, self.t_targ,
-                                      self.h, self.q, self.n, self.ym)
+                                      self.h, phase_order, self.n, self.ym)
 
-            cb_finish(self.t, self.t + self.dt)
-            cb_finish(self.state, self.ym)
-            cb_finish.yield_state(expression=self.state,
+                cb_f(self.t, self.t + self.dt)
+                cb_f(self.state, self.ym)
+                cb_f.yield_state(expression=self.state,
                                      component_id=self.component_id,
                                      time_id='', time=self.t)
 
-        # Given the presence of while loops and adaptivity, the phase diagram
-        # here is significantly more complex.
-        return DAGCode(
-            phases={
+                cb_finish.append(cb_f)
+
+        # Need to build dictionary for order-dependent phases
+        step_setup_dict = {}
+        step_dict = {}
+        inner_step_dict = {}
+        newton_step_dict = {}
+        inner_step_check_dict = {}
+        step_check_dict = {}
+        finish_dict = {}
+        for phase_ord in range(1, 6):
+            step_setup_dict["step_setup_{order}".format(
+                order=phase_ord)] = cb_step_setup[phase_ord-1].as_execution_phase(
+                        "step_{order}".format(order=phase_ord))
+            step_dict["step_{order}".format(
+                order=phase_ord)] = cb_step[phase_ord-1].as_execution_phase(
+                        "inner_step_{order}".format(order=phase_ord))
+            inner_step_dict["inner_step_{order}".format(
+                order=phase_ord)] = cb_is[phase_ord-1].as_execution_phase(
+                        "newton_step_{order}".format(order=phase_ord))
+            newton_step_dict["newton_step_{order}".format(
+                order=phase_ord)] = cb_ns[phase_ord-1].as_execution_phase(
+                        "newton_step_{order}".format(order=phase_ord))
+            inner_step_check_dict["inner_step_check_{order}".format(
+                order=phase_ord)] = cb_isc[phase_ord-1].as_execution_phase(
+                        "inner_step_{order}".format(order=phase_ord))
+            step_check_dict["step_check_{order}".format(
+                order=phase_ord)] = cb_sc[phase_ord-1].as_execution_phase(
+                        "step_{order}".format(order=phase_ord))
+            finish_dict["finish_{order}".format(
+                order=phase_ord)] = cb_finish[phase_ord-1].as_execution_phase(
+                        "step_setup_{order}".format(order=phase_ord))
+
+        base_phase_dict = {
                 "initialize": cb_init.as_execution_phase(next_phase="initial_step"),
                 "initial_step": cb_initial_step.as_execution_phase(
                     next_phase="initial_step"),
                 "initialize_finish": cb_init_finish.as_execution_phase(
-                    next_phase="step_setup"),
-                "step_setup": cb_step_setup.as_execution_phase(
-                    next_phase="step_setup"),
-                "step": cb_step.as_execution_phase(next_phase="inner_step"),
-                "inner_step": cb_is.as_execution_phase(next_phase="newton_step"),
-                "newton_step": cb_ns.as_execution_phase(next_phase="newton_step"),
-                "inner_step_check": cb_isc.as_execution_phase(
-                    next_phase="inner_step"),
-                "step_check": cb_sc.as_execution_phase(next_phase="step"),
-                "finish": cb_finish.as_execution_phase(next_phase="step_setup")
-                },
+                    next_phase="step_setup_1")
+                }
+        base_phase_dict.update(step_setup_dict)
+        base_phase_dict.update(step_dict)
+        base_phase_dict.update(inner_step_dict)
+        base_phase_dict.update(newton_step_dict)
+        base_phase_dict.update(inner_step_check_dict)
+        base_phase_dict.update(step_check_dict)
+        base_phase_dict.update(finish_dict)
+        # Given the presence of while loops and adaptivity, the phase diagram
+        # here is significantly more complex.
+        return DAGCode(
+            phases=base_phase_dict,
             initial_phase="initialize")
 
     def eval_rhs(self, t, y):
@@ -1050,40 +1154,49 @@ class CVODEMethodBuilder(MethodBuilder):
                               parameters=(),
                               kw_parameters={"t": t, self.component_id: y})
 
-    def emit_jac_estimate(self, cb, name_gen, y, n, rhs_data, ewt, h, t, jest):
+    def emit_jac_est(self, cb, name_gen, y, n, rhs_data, ewt, h, t, jest):
 
         # Estimates the Jacobian using a simple difference quotient.
 
         uround = var(name_gen("uround"))
         # FIXME: this is not going to work - need to add this builtin
         # cb(uround, np.finfo(float).eps)
-        cb(uround, 2e-33)
+        cb(uround, 2**(-33))
 
         # Minimum increment is based on uround and norm of f
         rhs_norm = var(name_gen("rhs_norm"))
         rhs_norm = emit_wrms(cb, name_gen, rhs_data, ewt, n)
         sigma_o = var(name_gen("sigma_o"))
         sigma = var(name_gen("sigma"))
-        #cb(sigma_o, 1000*abs(h)*uround*n*rhs_norm)
-        cb(sigma_o, 1000*((h)**2)**0.5*uround*n*rhs_norm)
+        absval = var("<builtin>norm_1")
+        cb(sigma_o, 1000*absval(h)*uround*n*rhs_norm)
 
         array = var("<builtin>array")
         cb(jest, array(n*n))
         ysave = var(name_gen("ysave"))
         rhs_temp = var(name_gen("rhs_temp"))
+        pert_vec = var(name_gen("pert_vec"))
         cb(rhs_temp, array(n))
-        for j in range(0, n):
+        cb(pert_vec, array(n))
+        #for j in range(0, n):
+        # FIXME: state size in loop bound
+        for j in range(0, 2):
 
             cb(ysave, y[j])
             # Determine the increment sigma
             from pymbolic.primitives import Max
-            #cb(sigma, Max(((uround)**0.5)*abs(y[j]), sigma_o/w[j]))
-            cb(sigma, Max((((uround)**0.5)*((y[j])**2)**0.5, sigma_o/ewt[j])))
+            cb(sigma, Max((((uround)**0.5)*absval(y[j]), sigma_o/ewt[j])))
             cb(y[j], y[j] + sigma)
             cb(rhs_temp, self.eval_rhs(t, y))
-            cb(jest[j*(n-1):(j+1)*(n-1)], (rhs_temp - rhs_data) / sigma)
+            #cb(jest[j*(n-1):(j+1)*(n-1)], (rhs_temp - rhs_data) / sigma)
+            cb(pert_vec, (rhs_temp - rhs_data) / sigma)
+            for i in range(0, 2):
+                cb(jest[j*n+i], pert_vec[i])
             cb(y[j], ysave)
 
+        transpose = var("<builtin>transpose")
+        cb(jest, transpose(jest, 2))
+
         return jest
 # }}}
 
-- 
GitLab


From 505ee8249654ab3495b0b663492c7e44b897d991 Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 6 Mar 2020 17:26:17 -0600
Subject: [PATCH 5/6] Points requirements to proper dagrt cvode branch, adds
 simple test code (which currently fails)

---
 requirements.txt   |   2 +-
 test/test_cvode.py | 133 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100755 test/test_cvode.py

diff --git a/requirements.txt b/requirements.txt
index e22f333..9d64758 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 git+git://github.com/inducer/pytools
 git+git://github.com/inducer/pymbolic
-git+https://gitlab.tiker.net/inducer/dagrt.git
\ No newline at end of file
+git+https://gitlab.tiker.net/inducer/dagrt.git@cvode
diff --git a/test/test_cvode.py b/test/test_cvode.py
new file mode 100755
index 0000000..40e8993
--- /dev/null
+++ b/test/test_cvode.py
@@ -0,0 +1,133 @@
+#! /usr/bin/env python
+from __future__ import division, with_statement, absolute_import, print_function
+
+__copyright__ = "Copyright (C) 2014 Andreas Kloeckner"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import sys
+import pytest
+
+from leap.cvode import CVODEMethodBuilder
+import numpy as np
+
+import logging
+
+from utils import (  # noqa
+        python_method_impl_interpreter as pmi_int,
+        python_method_impl_codegen as pmi_cg)
+
+logger = logging.getLogger(__name__)
+
+# {{{ adaptive test
+
+
+@pytest.mark.parametrize("method", [
+    CVODEMethodBuilder("y", multistep_method='bdf', nls='newton',
+        atol=1e-12, rtol=1e-6),
+    ])
+def test_adaptive_timestep(python_method_impl, method, show_dag=False,
+                           plot=False):
+    # Use "DEBUG" to trace execution
+    #logging.basicConfig(level=logging.INFO)
+    logging.basicConfig(level=logging.DEBUG)
+
+    component_id = method.component_id
+    code = method.generate()
+    #print(code)
+    #1/0
+
+    if show_dag:
+        from dagrt.language import show_dependency_graph
+        show_dependency_graph(code)
+
+    from stiff_test_systems import VanDerPolProblem
+    example = VanDerPolProblem()
+    y = example.initial()
+
+    interp = python_method_impl(code,
+                                function_map={"<func>" + component_id: example})
+    interp.set_up(t_start=example.t_start, dt_start=1e-5, context={component_id: y})
+
+    times = []
+    values = []
+
+    new_times = []
+    new_values = []
+
+    last_t = 0
+    step_sizes = []
+
+    for event in interp.run(t_end=example.t_end):
+        print(event)
+        if isinstance(event, interp.StateComputed):
+            assert event.component_id == component_id
+
+            new_values.append(event.state_component)
+            new_times.append(event.t)
+        elif isinstance(event, interp.StepCompleted):
+            if not new_times:
+                continue
+
+            step_sizes.append(event.t - last_t)
+            last_t = event.t
+
+            times.extend(new_times)
+            values.extend(new_values)
+            del new_times[:]
+            del new_values[:]
+        elif isinstance(event, interp.StepFailed):
+            del new_times[:]
+            del new_values[:]
+
+            logger.info("failed step at t=%s" % event.t)
+
+    times = np.array(times)
+    values = np.array(values)
+    step_sizes = np.array(step_sizes)
+
+    if plot:
+        import matplotlib.pyplot as pt
+        pt.plot(times, values[:, 1], "x-")
+        pt.show()
+        pt.plot(times, step_sizes, "x-")
+        pt.show()
+
+    step_sizes = np.array(step_sizes)
+    small_step_frac = len(np.nonzero(step_sizes < 0.01)[0]) / len(step_sizes)
+    big_step_frac = len(np.nonzero(step_sizes > 0.05)[0]) / len(step_sizes)
+
+    print("small_step_frac (<0.01): %g - big_step_frac (>.05): %g"
+            % (small_step_frac, big_step_frac))
+    assert small_step_frac <= 0.35, small_step_frac
+    assert big_step_frac >= 0.16, big_step_frac
+
+# }}}
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        exec(sys.argv[1])
+    else:
+        from pytest import main
+        main([__file__])
+
+# vim: filetype=pyopencl:fdm=marker
-- 
GitLab


From f4041419c9551eb3d4f0c764ff74e1059d472096 Mon Sep 17 00:00:00 2001
From: Cory Mikida <cmikida2@dunkel.cs.illinois.edu>
Date: Fri, 6 Mar 2020 17:31:14 -0600
Subject: [PATCH 6/6] Make Leap/Dagrt branch names consistent

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9d64758..fbbb926 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 git+git://github.com/inducer/pytools
 git+git://github.com/inducer/pymbolic
-git+https://gitlab.tiker.net/inducer/dagrt.git@cvode
+git+https://gitlab.tiker.net/inducer/dagrt.git@cory-cvode
-- 
GitLab