diff --git a/WENO.F90 b/WENO.F90 index 8013b2cbf01c89fa816dd332602dc813f5154ada..d798cdbe7f82a973d88be3506e1a6a9055187929 100644 --- a/WENO.F90 +++ b/WENO.F90 @@ -18,56 +18,32 @@ ! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ! THE SOFTWARE. -subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & +subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, d, & states, fluxes, metrics, metric_jacobians, flux_derivatives) implicit none - integer, intent(in) :: nvars, ndim, nx, ny, nz + integer, intent(in) :: nvars, ndim, nx, ny, nz, d real*8, intent(in) :: states(nvars, -2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(in) :: fluxes(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) + real*8, intent(in) :: fluxes(nvars, -2:nx+3, -2:ny+3, -2:nz+3) real*8, intent(in) :: metrics(ndim, ndim, -2:nx+3, -2:ny+3, -2:nz+3) real*8, intent(in) :: metric_jacobians(-2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(out) :: flux_derivatives(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) + real*8, intent(out) :: flux_derivatives(nvars, nx, ny, nz) - real*8 flux_derivatives_generalized(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) real*8 weno_flux_tmp(nvars, 0:nx+1, 0:ny+1, 0:nz+1) - real*8 generalized_fluxes(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) + real*8 metrics_frozen(ndim, ndim) + real*8 combined_frozen_metrics(ndim) real*8 generalized_states_frozen(nvars, -2:3) - real*8 generalized_fluxes_frozen(nvars, ndim, -2:3) - real*8 metric_solve_tmp(ndim) + real*8 generalized_fluxes_frozen(nvars, -2:3) + real*8 R(nvars, nvars), R_inv(nvars, nvars), lambda_roe(nvars) real*8 lambda_pointwise(nvars, -2:3) real*8 wavespeeds(nvars) - integer i, j, k - real*8 delta_xi, delta_eta, delta_zeta + real*8 characteristic_fluxes_pos(nvars, -2:3) real*8 characteristic_fluxes_neg(nvars, -2:3) - real*8 metrics_frozen(ndim, ndim) - real*8 combined_frozen_metrics(ndim) - integer v, d - - ! grid spacing in generalized coordinates - delta_xi = 1.0d0 - delta_eta = 1.0d0 - delta_zeta = 1.0d0 - !$loopy begin tagged: to_generalized - !call convert_to_generalized(nvars, ndim, nx, ny, nz, & - ! fluxes, metrics, metric_jacobians, generalized_fluxes) - - ! FIXME: Manually inlined for now - do k=-2,nz+3 - do j=-2,ny+3 - do i=-2,nx+3 - do v=1,nvars - call mult_mat_vec(ndim, ndim, 1.0d0/metric_jacobians(i,j,k), metrics(:,:,i,j,k), & - fluxes(v,:,i,j,k), generalized_fluxes(v,:,i,j,k)) - end do - end do - end do - end do - !$loopy end tagged: to_generalized + integer i, j, k, v ! for the next three loops, note carefully that the inner loop indices have a different range @@ -77,7 +53,7 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do i=0,nx call convert_to_generalized_frozen(nvars, ndim, & states(:, i-2:i+3, j, k), & - fluxes(:, :, i-2:i+3, j, k), & + fluxes(:, i-2:i+3, j, k), & metrics(:, :, i:i+1, j, k), & metric_jacobians(i:i+1, j, k), & metrics_frozen, & @@ -93,14 +69,14 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & call split_characteristic_fluxes(nvars, & generalized_states_frozen, & - generalized_fluxes_frozen(:, 1, :), & + generalized_fluxes_frozen, & R_inv, & wavespeeds, & characteristic_fluxes_pos, & characteristic_fluxes_neg) call weno_flux(nvars, & - generalized_fluxes(:, 1, i-2:i+3, j, k), & + fluxes(:, i-2:i+3, j, k), & characteristic_fluxes_pos, characteristic_fluxes_neg, & combined_frozen_metrics(1), R, weno_flux_tmp(:, i, j, k)) end do @@ -113,8 +89,8 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do j=1,ny do i=1,nx do v=1,nvars - flux_derivatives_generalized(v, 1, i, j, k) = & - (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i-1, j, k)) / delta_xi + flux_derivatives(v, i, j, k) = & + (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i-1, j, k)) end do end do end do @@ -127,7 +103,7 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do j=0,ny call convert_to_generalized_frozen(nvars, ndim, & states(:, i, j-2:j+3, k), & - fluxes(:, :, i, j-2:j+3, k), & + fluxes(:, i, j-2:j+3, k), & metrics(:, :, i, j:j+1, k), & metric_jacobians(i, j:j+1, k), & metrics_frozen, & @@ -143,14 +119,14 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & call split_characteristic_fluxes(nvars, & generalized_states_frozen, & - generalized_fluxes_frozen(:, 2, :), & + generalized_fluxes_frozen, & R_inv, & wavespeeds, & characteristic_fluxes_pos, & characteristic_fluxes_neg) call weno_flux(nvars, & - generalized_fluxes(:, 2, i, j-2:j+3, k), & + fluxes(:, i, j-2:j+3, k), & characteristic_fluxes_pos, characteristic_fluxes_neg, & combined_frozen_metrics(2), R, weno_flux_tmp(:, i, j, k)) @@ -164,8 +140,8 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do j=1,ny do i=1,nx do v=1,nvars - flux_derivatives_generalized(v, 2, i, j, k) = & - (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i, j-1, k)) / delta_eta + flux_derivatives(v, i, j, k) = & + (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i, j-1, k)) end do end do end do @@ -178,7 +154,7 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do k=0,nz call convert_to_generalized_frozen(nvars, ndim, & states(:, i, j, k-2:k+3), & - fluxes(:, :, i, j, k-2:k+3), & + fluxes(:, i, j, k-2:k+3), & metrics(:, :, i, j, k:k+1), & metric_jacobians(i, j, k:k+1), & metrics_frozen, & @@ -194,14 +170,14 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & call split_characteristic_fluxes(nvars, & generalized_states_frozen, & - generalized_fluxes_frozen(:, 3, :), & + generalized_fluxes_frozen, & R_inv, & wavespeeds, & characteristic_fluxes_pos, & characteristic_fluxes_neg) call weno_flux(nvars, & - generalized_fluxes(:, 3, i, j, k-2:k+3), & + fluxes(:, i, j, k-2:k+3), & characteristic_fluxes_pos, characteristic_fluxes_neg, & combined_frozen_metrics(3), R, weno_flux_tmp(:, i, j, k)) end do @@ -214,35 +190,13 @@ subroutine compute_flux_derivatives(nvars, ndim, nx, ny, nz, & do j=1,ny do i=1,nx do v=1,nvars - flux_derivatives_generalized(v, 3, i, j, k) = & - (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i, j, k-1)) / delta_zeta + flux_derivatives(v, i, j, k) = & + (weno_flux_tmp(v, i, j, k) - weno_flux_tmp(v, i, j, k-1)) end do end do end do end do !$loopy end tagged: flux_z_diff - - !$loopy begin tagged: from_generalized - !call convert_from_generalized(nvars, ndim, nx, ny, nz, & - ! flux_derivatives_generalized, & - ! metrics, & - ! metric_jacobians, & - ! flux_derivatives) - ! FIXME: Manually inlined for now - do k=-2,nz+3 - do j=-2,ny+3 - do i=-2,nx+3 - do v=1,nvars - call solve3x3(metrics(:,:,i,j,k), flux_derivatives_generalized(v,:,i,j,k), & - metric_solve_tmp) - do d=1,ndim - flux_derivatives(v,d,i,j,k) = metric_solve_tmp(d)*metric_jacobians(i,j,k) - end do - end do - end do - end do - end do - !$loopy end tagged: from_generalized end subroutine subroutine pointwise_eigenvalues(nvars, d, states, lambda_pointwise) @@ -472,13 +426,13 @@ subroutine convert_to_generalized_frozen( & generalized_fluxes_frozen) integer, intent(in) :: nvars, ndim real*8, intent(in) :: states(nvars, -2:3) - real*8, intent(in) :: fluxes(nvars, ndim, -2:3) + real*8, intent(in) :: fluxes(nvars, -2:3) real*8, intent(in) :: metrics(ndim, ndim, 2) real*8, intent(in) :: metric_jacobians(2) real*8, intent(out) :: metrics_frozen(ndim, ndim) real*8, intent(out) :: combined_frozen_metrics(ndim) real*8, intent(out) :: generalized_states_frozen(nvars, -2:3) - real*8, intent(out) :: generalized_fluxes_frozen(nvars, ndim, -2:3) + real*8, intent(out) :: generalized_fluxes_frozen(nvars, -2:3) real*8 jacobian_frozen @@ -510,107 +464,14 @@ subroutine convert_to_generalized_frozen( & do k=-2,3 do v=1,nvars - call mult_mat_vec(ndim, ndim, 1.0d0, metrics_frozen, & - fluxes(v,:,k), generalized_fluxes_frozen(v,:,k)) - end do - end do -end subroutine - -#if 0 -subroutine convert_to_generalized(nvars, ndim, nx, ny, nz, & - fluxes, & - metrics, & - metric_jacobians, & - generalized_fluxes) - implicit none - - integer, intent(in) :: nvars, ndim, nx, ny, nz - real*8, intent(in) :: fluxes(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(in) :: metrics(ndim, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(in) :: metric_jacobians(-2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(out) :: generalized_fluxes(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - - integer i, j, k, v - - do k=-2,nz+3 - do j=-2,ny+3 - do i=-2,nx+3 - do v=1,nvars - call mult_mat_vec(ndim, ndim, 1.0d0/metric_jacobians(i,j,k), metrics(:,:,i,j,k), & - fluxes(v,:,i,j,k), generalized_fluxes(v,:,i,j,k)) - end do - end do + ! FIXME: this is a bug + ! ... we actually need to recompute the fluxes using frozen metrics + ! ... but this works when the metric matrix is identity + generalized_fluxes_frozen(v,k) = fluxes(v,k) end do end do end subroutine -subroutine convert_from_generalized(nvars, ndim, nx, ny, nz, & - flux_derivatives_generalized, & - metrics, & - metric_jacobians, & - flux_derivatives) - implicit none - - integer, intent(in) :: nvars, ndim, nx, ny, nz - real*8, intent(in) :: flux_derivatives_generalized(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(in) :: metrics(ndim, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(in) :: metric_jacobians(-2:nx+3, -2:ny+3, -2:nz+3) - real*8, intent(out) :: flux_derivatives(nvars, ndim, -2:nx+3, -2:ny+3, -2:nz+3) - - integer i, j, k, v - integer d - - do k=-2,nz+3 - do j=-2,ny+3 - do i=-2,nx+3 - do v=1,nvars - call solve3x3(metrics(:,:,i,j,k), flux_derivatives_generalized(v,:,i,j,k), & - flux_derivatives(v,:,i,j,k)) - do d=1,ndim - flux_derivatives(v,d,i,j,k) = flux_derivatives(v,d,i,j,k)*metric_jacobians(i,j,k) - end do - end do - end do - end do - end do -end subroutine -#endif - -subroutine solve3x3(a, b, x) - implicit none - - real*8, intent(in) :: a(3,3), b(3) - real*8, intent(out) :: x(3) - - real*8 temp(3,3) - real*8 det_a(1), det_temp(1) - integer k - integer i,j - - call determinant3x3(a, det_a) - do k=1,3 - do j=1,3 - do i=1,3 - temp(i,j) = a(i,j) - temp(i,k) = b(k) - end do - end do - call determinant3x3(temp, det_temp) - x(k) = det_temp(1)/det_a(1) - end do -end subroutine - -subroutine determinant3x3(a, det) - implicit none - - real*8, intent(in) :: a(3,3) - real*8, intent(out) :: det(1) - - det(1) = a(1,1)*(a(2,2)*a(3,3) - a(2,3)*a(3,2)) & - + a(1,2)*(a(2,3)*a(3,1) - a(2,1)*a(3,3)) & - + a(1,3)*(a(2,1)*a(3,2) - a(2,2)*a(3,1)) -end subroutine - subroutine split_characteristic_fluxes(nvars, & generalized_states_frozen, & generalized_fluxes_frozen, & @@ -986,10 +847,6 @@ end subroutine ! ! knl = lp.assume(knl, "nx > 0 and ny > 0 and nz > 0") ! -! knl = lp.set_temporary_scope(knl, "flux_derivatives_generalized", -! lp.AddressSpace.GLOBAL) -! knl = lp.set_temporary_scope(knl, "generalized_fluxes", -! lp.AddressSpace.GLOBAL) ! knl = lp.set_temporary_scope(knl, "weno_flux_tmp", ! lp.AddressSpace.GLOBAL) ! diff --git a/test/data_for_test.py b/test/data_for_test.py index ccea4b9b1e984e1291ab9bfe7a7b3cfea7c64459..84033f7ed188c520042011032c0e4d709a4d71fa 100644 --- a/test/data_for_test.py +++ b/test/data_for_test.py @@ -7,6 +7,9 @@ import weno_reference_implementation as ref # {{{ FluxDataSingle class FluxDataSingle: + # FIXME: can we set some of these constants from ref.gas? + # -- if all nvars references come from there, it's relatively easy to + # introduce a new gas with more (e.g. scalar) variables nvars = 5 ndim = 3 dirs = {"x": 1, "y": 2, "z": 3} @@ -25,9 +28,14 @@ class FluxDataSingle: self.frozen_jacobian = np.mean(self.jacobians[2:4], axis=0) self.combined_frozen_metrics = 1.0 + # FIXME: Move array_from_string stuff outside FluxDataSingle + # -- just pass an array & have external utilities that generate + # Riemann, sine wave, etc. initial conditions + # FIXME: Consider handling row swapping outside as well? + # FIXME: Do we even need to swap rows? self.state_pair = self.swap_array_rows( u.transposed_array_from_string(states_str), self.dir_internal) - self.states = u.expand_to_6(self.state_pair) + self.states = u.expand_to_n(self.state_pair, 6) # FIXME: these should be generalized fluxes # FIXME: make a clear distinction between fluxes in physical and @@ -75,6 +83,93 @@ class FluxDataSingle: # }}} +# {{{ FluxDataVector + +# FIXME: is there a better way to divide responsibilities with these fixture classes? +class FluxDataVector: + nvars = 5 + ndim = 3 + dirs = {"x": 1, "y": 2, "z": 3} + halo = 3 + + def __init__(self, nx, ny, nz, states_str, direction): + self.direction = self.dirs[direction] + self.dir_internal = self.direction-1 + + self.nx = nx + self.ny = ny + self.nz = nz + + self.nxhalo = self.nx + 2*self.halo + self.nyhalo = self.ny + 2*self.halo + self.nzhalo = self.nz + 2*self.halo + + self.flux_dims = (self.nvars, self.nx, self.ny, self.nz) + + self.metrics = np.stack( + [np.stack( + [np.stack( + [np.identity(self.ndim) for i in range(self.nxhalo)], + axis=-1) for j in range(self.nyhalo)], + axis=-1) for k in range(self.nzhalo)], + axis=-1).copy(order="F") + self.jacobians = np.ones((self.nxhalo, self.nyhalo, self.nzhalo), + order="F") + + state_pair = self.swap_array_rows( + u.transposed_array_from_string(states_str), self.direction) + # FIXME: Move array_from_string stuff outside FluxDataSingle + # -- just pass an array & have external utilities that generate + # Riemann, sine wave, etc. initial conditions + # FIXME: Consider handling row swapping outside as well? + # FIXME: Do we even need to swap rows? + self.state_pair = self.swap_array_rows( + u.transposed_array_from_string(states_str), self.dir_internal) + # NOTE: dimensions are nvars x nxhalo x nyhalo x nzhalo + self.states = self.fill_from_pair() + + # NOTE: dimensions are nvars x nxhalo x nyhalo x nzhalo + # FIXME: these should be generalized fluxes + # FIXME: make a clear distinction between fluxes in physical and + # generalized coordinates + npoints = self.nxhalo*self.nyhalo*self.nzhalo + flat_states = self.states.reshape((self.nvars, npoints)) + self.fluxes = ref.pointwise_fluxes( + flat_states)[:,:,self.dir_internal].T.reshape( + (self.nvars, self.nxhalo, self.nyhalo, self.nzhalo) + ).copy(order="F") + + # FIXME: use reference implementation + # NOTE: dimensions are nvars x nx x ny x nz + self.flux_derivatives = np.zeros((self.nvars, self.nx, self.ny, + self.nz), order="F") + + def swap_array_rows(self, arr, d): + p = self.permutation(d) + arr[p, :] = arr[[1, 2, 3], :] + return arr + + def permutation(self, d): + return [(d-1+i) % 3 + 1 for i in range(3)] + + def fill_from_pair(self): + d = self.dir_internal + nx_arr = np.array([self.nxhalo, self.nyhalo, self.nzhalo]) + result = u.expand_to_n(self.state_pair, nx_arr[d]) + + for i in range(d): + result = self.add_dimension(result, nx_arr[i]) + result = np.swapaxes(result, -2, -1) + for i in range(d+1,self.ndim): + result = self.add_dimension(result, nx_arr[i]) + + return result.copy(order="F") + + def add_dimension(self, arr, n): + return np.stack([arr for i in range(n)], axis=-1) + +# }}} + single_data = {} @@ -114,6 +209,35 @@ single_data["Case d:z"] = FluxDataSingle( states_str="1 -3 -1 -2 11,2 -12 -4 -8 64", direction="z") +vector_data = {} + +vector_data["Case flat:x"] = FluxDataVector( + nx=6, ny=2, nz=2, + states_str="1 1 1 1 5.5,1 1 1 1 5.5", + direction="x") +vector_data["Case flat:y"] = FluxDataVector( + nx=2, ny=6, nz=2, + states_str="1 1 1 1 5.5,1 1 1 1 5.5", + direction="y") +vector_data["Case flat:z"] = FluxDataVector( + nx=2, ny=2, nz=6, + states_str="1 1 1 1 5.5,1 1 1 1 5.5", + direction="z") + +vector_data["Case a:x"] = FluxDataVector( + nx=6, ny=2, nz=2, + states_str="2 4 4 4 20,1 1 1 1 5.5", + direction="x") +vector_data["Case a:y"] = FluxDataVector( + nx=2, ny=6, nz=2, + states_str="2 4 4 4 20,1 1 1 1 5.5", + direction="y") +vector_data["Case a:z"] = FluxDataVector( + nx=2, ny=2, nz=6, + states_str="2 4 4 4 20,1 1 1 1 5.5", + direction="z") + + @pytest.fixture(scope="session", params=[ "Case flat:x", "Case flat:y", "Case flat:z", "Case a:x", "Case a:y", "Case a:z", @@ -124,4 +248,10 @@ def flux_test_data_fixture(request): return single_data[request.param] +@pytest.fixture(scope="session", params=[ + "Case flat:x", "Case flat:y", "Case flat:z"]) +def cfd_test_data_fixture(request): + return vector_data[request.param] + + # vim: foldmethod=marker diff --git a/test/test_eigensystem.py b/test/test_eigensystem.py index 4b0a7a6b14af212778076242e3e0fbef81e7f2e7..de37e8959b697457e314844613f4f6f68c5f49ff 100644 --- a/test/test_eigensystem.py +++ b/test/test_eigensystem.py @@ -83,7 +83,8 @@ def test_lax_wavespeeds( nvars = 5 - lam_pointwise = u.expand_to_6(u.transposed_array_from_string(lam_pointwise_str)) + lam_pointwise = u.expand_to_n( + u.transposed_array_from_string(lam_pointwise_str), 6) lam_roe = u.array_from_string(lam_roe_str) lam_dev = u.empty_array_on_device(queue, nvars) diff --git a/test/test_flux_derivatives.py b/test/test_flux_derivatives.py index aaa0e43471a62a771cd6a908f8c20324e3e01ac1..b599be081892937b4e27468b5c9b07eea4309e4b 100644 --- a/test/test_flux_derivatives.py +++ b/test/test_flux_derivatives.py @@ -15,6 +15,34 @@ from pyopencl.tools import ( # noqa as pytest_generate_tests) import utilities as u +from data_for_test import ( # noqa: F401 + cfd_test_data_fixture + ) + + +def test_compute_flux_derivatives_uniform_grid(ctx_factory, cfd_test_data_fixture): + data = cfd_test_data_fixture + + prg = u.get_weno_program() + queue = u.get_queue(ctx_factory) + prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) + + flux_derivatives_dev = u.empty_array_on_device(queue, *data.flux_dims) + + prg(queue, + nvars=data.nvars, + ndim=data.ndim, + nx=data.nx, + ny=data.ny, + nz=data.nz, + d=data.direction, + states=data.states, + fluxes=data.fluxes, + metrics=data.metrics, + metric_jacobians=data.jacobians, + flux_derivatives=flux_derivatives_dev) + + u.compare_arrays(flux_derivatives_dev.get(), data.flux_derivatives) @pytest.mark.slow @@ -25,7 +53,7 @@ def test_compute_flux_derivatives(ctx_factory): prg = prg.copy(target=lp.PyOpenCLTarget(queue.device)) lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=0, - parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16, d=0)) @pytest.mark.slow @@ -41,7 +69,7 @@ def test_compute_flux_derivatives_gpu(ctx_factory, write_code=False): u.write_target_device_code(prg) lp.auto_test_vs_ref(prg, ctx_factory(), warmup_rounds=0, - parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16)) + parameters=dict(ndim=3, nvars=5, nx=16, ny=16, nz=16, d=0)) # This lets you run 'python test.py test_case(cl._csc)' without pytest. diff --git a/test/utilities.py b/test/utilities.py index 35ea205de6bcec427579a775a3126d9108fe2fc2..bd78fb4717e179b83c00ed68eaddbe8a5ed82af6 100644 --- a/test/utilities.py +++ b/test/utilities.py @@ -74,8 +74,8 @@ def split_map_to_list(string, map_func, splitter): return list(map(map_func, string.split(splitter))) -def expand_to_6(pair): - return np.repeat(pair, 3, axis=1).copy(order="F") +def expand_to_n(pair, n): + return np.repeat(pair, n/2, axis=1).copy(order="F") # }}} @@ -139,30 +139,20 @@ def get_weno_program_with_root_kernel(knl): def transform_weno_for_gpu(prg, print_kernel=False): knl = prg["compute_flux_derivatives"] - for suffix in ["", "_1", "_2", "_3", "_4", "_5", "_6", "_7"]: + for suffix in ["", "_1", "_2", "_3", "_4", "_5"]: knl = lp.split_iname(knl, "i"+suffix, 16, outer_tag="g.0", inner_tag="l.0") knl = lp.split_iname(knl, "j"+suffix, 16, outer_tag="g.1", inner_tag="l.1") - for var_name in ["delta_xi", "delta_eta", "delta_zeta"]: - knl = lp.assignment_to_subst(knl, var_name) - - knl = lp.add_barrier(knl, "tag:to_generalized", "tag:flux_x_compute") knl = lp.add_barrier(knl, "tag:flux_x_compute", "tag:flux_x_diff") knl = lp.add_barrier(knl, "tag:flux_x_diff", "tag:flux_y_compute") knl = lp.add_barrier(knl, "tag:flux_y_compute", "tag:flux_y_diff") knl = lp.add_barrier(knl, "tag:flux_y_diff", "tag:flux_z_compute") knl = lp.add_barrier(knl, "tag:flux_z_compute", "tag:flux_z_diff") - knl = lp.add_barrier(knl, "tag:flux_z_diff", "tag:from_generalized") prg = prg.with_kernel(knl) - # FIXME: These should work, but don't - # FIXME: Undo the hand-inlining in WENO.F90 - #prg = lp.inline_callable_kernel(prg, "convert_to_generalized") - #prg = lp.inline_callable_kernel(prg, "convert_from_generalized") - if print_kernel: print(prg["convert_to_generalized_frozen"]) 1/0