diff --git a/examples/acoustics3d.lpy b/examples/acoustics3d.lpy new file mode 100644 index 0000000000000000000000000000000000000000..919566deeaf65b7ee3e13eaac38bfaaf9c75ab2c --- /dev/null +++ b/examples/acoustics3d.lpy @@ -0,0 +1,86 @@ +% ------------------------------------------------------ +% VOLUME KERNEL +% input: u,v,w,p are float, Np x K +% input: DrDsDt is float4, Np x Np +% input: dRdx, dRdy, dRdz are float4, Np x K +% output: rhsu, rhsv, rhsw, rhsp, Np x K + +% loop domain: 1<= n,m <= Np, 1<= e <= K + +% BODY >>> + +% reduction on m (assume DrDsDt is float4 for convenience) +dudR = DrDsDt(n,m)*u(m,e) +dvdR = DrDsDt(n,m)*v(m,e) +dwdR = DrDsDt(n,m)*w(m,e) +dpdR = DrDsDt(n,m)*p(m,e) + +% volume flux +rhsu(n,e) = dot4(dRdx(k),dpdR) +rhsv(n,e) = dot4(dRdy(k),dpdR) +rhsw(n,e) = dot4(dRdz(k),dpdR) +rhsp(n,e) = dot4(dRdx(k), dudR) + dot4(dRdy(k), dvdR) + dot4(dRdz(k), dwdR) + +% BODY <<< +% ------------------------------------------------------ + +% ------------------------------------------------------ +% SURFACE KERNEL + +% input: u,v,w,p are float, Np x K +% input: LIFT is float, Np x (Nfp*Nfaces) +% input: nx,ny,nz,Fscale are float, (Nfp*Nfaces) x K +% input/output: rhsu, rhsv, rhsw, rhsp are float Np x K + +% loop-domain 1<= m <= Nfp*Nfaces, 1<= n <= Np, 1<= e <= K + +% BODY >>> +% find surface node indices at both traces +idP = vmapP(m,e) +idM = vmapM(m,e) + +% can we bounce to single index (row/column major is important) +% can we use this indexing here for clarity ? +du = u(idP)-u(idM) +dv = v(idP)-v(idM) +dw = w(idP)-w(idM) +dp = bc(idM)*p(idP) - p(idM) + +dQ = 0.5*Fscale(m,e)*(dp - nx(m,e)*du - ny(m,e)*dv - nz(m,e)*dw) + +fluxu = -nx(m,e)*dQ +fluxv = -ny(m,e)*dQ +fluxw = -nz(m,e)*dQ +fluxp = dQ + +% reduction here +rhsu(n,e) += LIFT(n,m)*fluxu +rhsv(n,e) += LIFT(n,m)*fluxv +rhsw(n,e) += LIFT(n,m)*fluxw +rhsp(n,e) += LIFT(n,m)*fluxp +% BODY <<< +% ------------------------------------------------------ + +% ------------------------------------------------------ +% RK kernel here + +% input/output: u,v,w,p are float, Np*K +% input/output: resu,resv,resw,resp are float, Np*K +% input parameters rk4a, rk4b, dt + +% 1 <= n <= K*Np, + +% BODY >>> + +resu(n) = rk4a*resu(n) + dt*rhsu +resv(n) = rk4a*resv(n) + dt*rhsv +resw(n) = rk4a*resw(n) + dt*rhsw +resp(n) = rk4a*resp(n) + dt*rhsp + +u(n) += rk4b*resu(n) +v(n) += rk4b*resv(n) +w(n) += rk4b*resw(n) +p(n) += rk4b*resp(n) + +% BODY <<< +% ------------------------------------------------------ \ No newline at end of file diff --git a/loopy/compiled.py b/loopy/compiled.py index 1eebc5952a7ec15ef5f625011664bc77a764a946..187c17050ee482ac2f0eeeac2f809a0d15f7d9bf 100644 --- a/loopy/compiled.py +++ b/loopy/compiled.py @@ -42,16 +42,26 @@ def _arg_matches_spec(arg, val, other_args): class CompiledKernel: def __init__(self, context, kernel, size_args=None, options=[], edit_code=False, codegen_kwargs={}): + """ + :arg kernel: may be a loopy.LoopKernel, a generator returning kernels + (a warning will be issued if more than one is returned). If the kernel + has not yet been loop-scheduled, that is done, too, with no specific + arguments. + """ import loopy as lp # {{{ do scheduling, if not yet done needs_check = False - if kernel.schedule is None: + if not isinstance(kernel, lp.LoopKernel) or kernel.schedule is None: + if isinstance(kernel, lp.LoopKernel): + # kernel-iterable, really + kernel = lp.generate_loop_schedules(kernel) + kernel_count = 0 - for scheduled_kernel in lp.generate_loop_schedules(kernel): + for scheduled_kernel in kernel: kernel_count += 1 if kernel_count == 1: