diff --git a/test/test_advect.py b/test/test_advect.py new file mode 100644 index 0000000000000000000000000000000000000000..c5da3a66194c1774e717370e24c6dd547a366995 --- /dev/null +++ b/test/test_advect.py @@ -0,0 +1,107 @@ + +def test_advect(ctx_factory): + + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + N = 8 + + from pymbolic import var + K_sym = var("K") + + field_shape = (N, N, N, K_sym) + + # 1. direction-by-direction similarity transform on u + # 2. invert diagonal + # 3. transform back (direction-by-direction) + + # K - run-time symbolic + knl = lp.make_kernel(ctx.devices[0], + "[K] -> {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=e<K}" %N + [ + # differentiate u + "[|i,j,k] <float32> ur[i,j,k,e] = sum_float32(m, D[i,m]*u[m,j,k,e])", + "[|i,j,k] <float32> us[i,j,k,e] = sum_float32(m, D[j,m]*u[i,m,k,e])", + "[|i,j,k] <float32> ut[i,j,k,e] = sum_float32(m, D[k,m]*u[i,j,m,e])", + + # differentiate v + "[|i,j,k] <float32> vr[i,j,k,e] = sum_float32(m, D[i,m]*v[m,j,k,e])", + "[|i,j,k] <float32> vs[i,j,k,e] = sum_float32(m, D[j,m]*v[i,m,k,e])", + "[|i,j,k] <float32> vt[i,j,k,e] = sum_float32(m, D[k,m]*v[i,j,m,e])", + + # differentiate w + "[|i,j,k] <float32> wr[i,j,k,e] = sum_float32(m, D[i,m]*w[m,j,k,e])", + "[|i,j,k] <float32> ws[i,j,k,e] = sum_float32(m, D[j,m]*w[i,m,k,e])", + "[|i,j,k] <float32> wt[i,j,k,e] = sum_float32(m, D[k,m]*w[i,j,m,e])", + + # find velocity in (r,s,t) coordinates + "<float32> Vr[i,j,k,e] = " + "G[i,j,k,0,e]*u[i,j,k,e] + G[i,j,k,1,e]*v[i,j,k,e] + G[i,j,k,2,e]*w[i,j,k,e]", + "<float32> Vs[i,j,k,e] = " + "G[i,j,k,3,e]*u[i,j,k,e] + G[i,j,k,4,e]*v[i,j,k,e] + G[i,j,k,5,e]*w[i,j,k,e]", + "<float32> Vt[i,j,k,e] = " + "G[i,j,k,6,e]*u[i,j,k,e] + G[i,j,k,7,e]*v[i,j,k,e] + G[i,j,k,8,e]*w[i,j,k,e]", + + # form nonlinear term on integration nodes + "Nu[i,j,k,e] = Vr[i,j,k,e]*ur[i,j,k,e]+Vs[i,j,k,e]*us[i,j,k,e]+Vt[i,j,k,e]*ut[i,j,k,e]", + "Nv[i,j,k,e] = Vr[i,j,k,e]*vr[i,j,k,e]+Vs[i,j,k,e]*vs[i,j,k,e]+Vt[i,j,k,e]*vt[i,j,k,e]", + "Nw[i,j,k,e] = Vr[i,j,k,e]*wr[i,j,k,e]+Vs[i,j,k,e]*ws[i,j,k,e]+Vt[i,j,k,e]*wt[i,j,k,e]", + ], + [ + lp.ArrayArg("u", dtype, shape=field_shape, order=order), + lp.ArrayArg("v", dtype, shape=field_shape, order=order), + lp.ArrayArg("w", dtype, shape=field_shape, order=order), + lp.ArrayArg("Nu", dtype, shape=field_shape, order=order), + lp.ArrayArg("Nv", dtype, shape=field_shape, order=order), + lp.ArrayArg("Nw", dtype, shape=field_shape, order=order), + lp.ArrayArg("D", dtype, shape=(N, N), order=order), + lp.ScalarArg("K", np.int32, approximately=1000), + ], + name="sem_advect", assumptions="K>=1") + + print knl + 1/0 + + knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + + knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + + print knl + #1/0 + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) + + a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + c = cl_array.empty_like(a) + refsol = np.dot(a.get(), b.get()) + + def launcher(kernel, gsize, lsize, check): + evt = kernel(queue, gsize(), lsize(), a.data, b.data, c.data, + g_times_l=True) + + if check: + check_error(refsol, c.get()) + + return evt + + lp.drive_timing_run(kernel_gen, queue, launcher, 2*n**3) + + + + +if __name__ == "__main__": + # make sure that import failures get reported, instead of skipping the + # tests. + import pyopencl as cl + + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) diff --git a/test/test_advect_dealias.py b/test/test_advect_dealias.py new file mode 100644 index 0000000000000000000000000000000000000000..072c840a6c5074f17b74ce4bc89eb76fee51e4cd --- /dev/null +++ b/test/test_advect_dealias.py @@ -0,0 +1,142 @@ + +def test_advect(ctx_factory): + + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + N = 8 + M = 8 + + from pymbolic import var + K_sym = var("K") + + field_shape = (N, N, N, K_sym) + interim_field_shape = (M, M, M, K_sym) + + # 1. direction-by-direction similarity transform on u + # 2. invert diagonal + # 3. transform back (direction-by-direction) + + # K - run-time symbolic + knl = lp.make_kernel(ctx.devices[0], + "[K] -> {[i,ip,j,jp,k,kp,m,e]: 0<=i,j,k,m<%d AND 0<=m,ip,jp,kp<%d 0<=e<K}" %M %N + [ + + # interpolate u to integration nodes + "[|i,jp,kp] <float32> u0[i,jp,kp,e] = sum_float32(ip, I[i,ip]*u[ip,jp,kp,e])", + "[|i,j ,kp] <float32> u1[i,j,kp,e] = sum_float32(jp, I[j,jp]*u0[i,jp,kp,e])", + "[|i,j ,k ] <float32> Iu[i,j,k,e] = sum_float32(kp, I[k,kp]*u1[i,j,kp,e])", + + # differentiate u on integration nodes + "[|i,j ,k ] <float32> Iur[i,j,k,e] = sum_float32( m, D[i,m]*Iu[m,j,k,e])", + "[|i,j ,k ] <float32> Ius[i,j,k,e] = sum_float32( m, D[j,m]*Iu[i,m,k,e])", + "[|i,j ,k ] <float32> Iut[i,j,k,e] = sum_float32( m, D[k,m]*Iu[i,j,m,e])", + + # interpolate v to integration nodes + "[|i,jp,kp] <float32> v0[i,jp,kp,e] = sum_float32(ip, I[i,ip]*v[ip,jp,kp,e])", + "[|i,j ,kp] <float32> v1[i,j,kp,e] = sum_float32(jp, I[j,jp]*v0[i,jp,kp,e])", + "[|i,j ,k ] <float32> Iv[i,j,k,e] = sum_float32(kp, I[k,kp]*v1[i,j,kp,e])", + + # differentiate v on integration nodes + "[|i,j ,k ] <float32> Ivr[i,j,k,e] = sum_float32(ip, D[i,m]*Iv[m,j,k,e])", + "[|i,j ,k ] <float32> Ivs[i,j,k,e] = sum_float32(jp, D[j,m]*Iv[i,m,k,e])", + "[|i,j ,k ] <float32> Ivt[i,j,k,e] = sum_float32(kp, D[k,m]*Iv[i,j,m,e])", + + # interpolate w to integration nodes + "[|i,jp,kp] <float32> w0[i,jp,kp,e] = sum_float32(ip, I[i,ip]*w[ip,jp,kp,e])", + "[|i,j ,kp] <float32> w1[i,j,kp,e] = sum_float32(jp, I[j,jp]*w0[i,jp,kp,e])", + "[|i,j ,k ] <float32> Iw[i,j,k,e] = sum_float32(kp, I[k,kp]*w1[i,j,kp,e])", + + # differentiate w on integration nodes + "[|i,j ,k ] <float32> Iwr[i,j,k,e] = sum_float32(ip, D[i,m]*Iw[m,j,k,e])", + "[|i,j ,k ] <float32> Iws[i,j,k,e] = sum_float32(jp, D[j,m]*Iw[i,m,k,e])", + "[|i,j ,k ] <float32> Iwt[i,j,k,e] = sum_float32(kp, D[k,m]*Iw[i,j,m,e])", + + # find velocity in (r,s,t) coordinates + "<float32> Vr[i,j,k,e] = " + "G[i,j,k,0,e]*Iu[i,j,k,e] + G[i,j,k,1,e]*Iv[i,j,k,e] + G[i,j,k,2,e]*Iw[i,j,k,e]", + "<float32> Vs[i,j,k,e] = " + "G[i,j,k,3,e]*Iu[i,j,k,e] + G[i,j,k,4,e]*Iv[i,j,k,e] + G[i,j,k,5,e]*Iw[i,j,k,e]", + "<float32> Vt[i,j,k,e] = " + "G[i,j,k,6,e]*Iu[i,j,k,e] + G[i,j,k,7,e]*Iv[i,j,k,e] + G[i,j,k,8,e]*Iw[i,j,k,e]", + + # form nonlinear term on integration nodes + "<float32> Nu[i,j,k,e] = Vr[i,j,k,e]*Iur[i,j,k,e]+Vs[i,j,k,e]*Ius[i,j,k,e]+Vt[i,j,k,e]*Iut[i,j,k,e]", + "<float32> Nv[i,j,k,e] = Vr[i,j,k,e]*Ivr[i,j,k,e]+Vs[i,j,k,e]*Ivs[i,j,k,e]+Vt[i,j,k,e]*Ivt[i,j,k,e]", + "<float32> Nw[i,j,k,e] = Vr[i,j,k,e]*Iwr[i,j,k,e]+Vs[i,j,k,e]*Iws[i,j,k,e]+Vt[i,j,k,e]*Iwt[i,j,k,e]", + + # L2 project Nu back to Lagrange basis + "[|ip,j,k] <float32> Nu2[ip,j,k,e] = sum_float32(i, V[ip,i]*Nu[i,j,k,e])", + "[|ip,jp,k] <float32> Nu1[ip,jp,k,e] = sum_float32(j, V[jp,j]*Nu2[ip,j,k,e])", + "INu[ip,jp,kp,e] = sum_float32(k, V[kp,k]*Nu1[ip,jp,k,e])", + + # L2 project Nv back to Lagrange basis + "[|ip,j,k] <float32> Nv2[ip,j,k,e] = sum_float32(i, V[ip,i]*Nv[i,j,k,e])", + "[|ip,jp,k] <float32> Nv1[ip,jp,k,e] = sum_float32(j, V[jp,j]*Nv2[ip,j,k,e])", + "INv[ip,jp,kp,e] = sum_float32(k, V[kp,k]*Nv1[ip,jp,k,e])", + + # L2 project Nw back to Lagrange basis + "[|ip,j,k] <float32> Nw2[ip,j,k,e] = sum_float32(i, V[ip,i]*Nw[i,j,k,e])", + "[|ip,jp,k] <float32> Nw1[ip,jp,k,e] = sum_float32(j, V[jp,j]*Nw2[ip,j,k,e])", + "INw[ip,jp,kp,e] = sum_float32(k, V[kp,k]*Nw1[ip,jp,k,e])", + ], + [ + lp.ArrayArg("u", dtype, shape=field_shape, order=order), + lp.ArrayArg("v", dtype, shape=field_shape, order=order), + lp.ArrayArg("w", dtype, shape=field_shape, order=order), + lp.ArrayArg("INu", dtype, shape=field_shape, order=order), + lp.ArrayArg("INv", dtype, shape=field_shape, order=order), + lp.ArrayArg("INw", dtype, shape=field_shape, order=order), + lp.ArrayArg("D", dtype, shape=(M,M), order=order), + lp.ArrayArg("I", dtype, shape=(M, N), order=order), + lp.ArrayArg("V", dtype, shape=(N, M), order=order), + lp.ScalarArg("K", np.int32, approximately=1000), + ], + name="sem_advect", assumptions="K>=1") + + print knl + 1/0 + + knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + + knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + + print knl + #1/0 + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) + + a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + c = cl_array.empty_like(a) + refsol = np.dot(a.get(), b.get()) + + def launcher(kernel, gsize, lsize, check): + evt = kernel(queue, gsize(), lsize(), a.data, b.data, c.data, + g_times_l=True) + + if check: + check_error(refsol, c.get()) + + return evt + + lp.drive_timing_run(kernel_gen, queue, launcher, 2*n**3) + + + + +if __name__ == "__main__": + # make sure that import failures get reported, instead of skipping the + # tests. + import pyopencl as cl + + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) diff --git a/test/test_interp_diff.py b/test/test_interp_diff.py new file mode 100644 index 0000000000000000000000000000000000000000..eeed866463fea070e0163cabd87a56a372dcdecd --- /dev/null +++ b/test/test_interp_diff.py @@ -0,0 +1,87 @@ + +def test_interp_diff(ctx_factory): + + dtype = np.float32 + ctx = ctx_factory() + order = "C" + queue = cl.CommandQueue(ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + + N = 8 + M = 8 + + from pymbolic import var + K_sym = var("K") + + field_shape = (N, N, N, K_sym) + interim_field_shape = (M, M, M, K_sym) + + # 1. direction-by-direction similarity transform on u + # 2. invert diagonal + # 3. transform back (direction-by-direction) + + # K - run-time symbolic + knl = lp.make_kernel(ctx.devices[0], + "[K] -> {[i,ip,j,jp,k,kp,e]: 0<=i,j,k<%d AND 0<=ip,jp,kp<%d 0<=e<K}" %M %N + [ + "[|i,jp,kp] <float32> u1[i ,jp,kp,e] = sum_float32(ip, I[i,ip]*u [ip,jp,kp,e])", + "[|i,j ,kp] <float32> u2[i ,j ,kp,e] = sum_float32(jp, I[j,jp]*u1[i ,jp,kp,e])", + "[|i,j ,k ] <float32> u3[i ,j ,k ,e] = sum_float32(kp, I[k,kp]*u2[i ,j ,kp,e])", + "[|i,j ,k ] <float32> Pu[i ,j ,k ,e] = P[i,j,k,e]*u3[i,j,k,e]", + "[|i,j ,kp] <float32> Pu3[i ,j ,kp,e] = sum_float32(k, V[kp,k]*Pu[i ,j , k,e])", + "[|i,jp,kp] <float32> Pu2[i ,jp,kp,e] = sum_float32(j, V[jp,j]*Pu[i ,j ,kp,e])", + "Pu[ip,jp,kp,e] = sum_float32(i, V[ip,i]*Pu[i ,jp,kp,e])", + ], + [ + lp.ArrayArg("u", dtype, shape=field_shape, order=order), + lp.ArrayArg("P", dtype, shape=interim_field_shape, order=order), + lp.ArrayArg("I", dtype, shape=(M, N), order=order), + lp.ArrayArg("V", dtype, shape=(N, M), order=order), + lp.ArrayArg("Pu", dtype, shape=field_shape, order=order), + lp.ScalarArg("K", np.int32, approximately=1000), + ], + name="sem_lap_precon", assumptions="K>=1") + + print knl + 1/0 + + knl = lp.split_dimension(knl, "e", 16, outer_tag="g.0")#, slabs=(0, 1)) + + knl = lp.tag_dimensions(knl, dict(i="l.0", j="l.1")) + + print knl + #1/0 + + kernel_gen = lp.generate_loop_schedules(knl) + kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) + + a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) + c = cl_array.empty_like(a) + refsol = np.dot(a.get(), b.get()) + + def launcher(kernel, gsize, lsize, check): + evt = kernel(queue, gsize(), lsize(), a.data, b.data, c.data, + g_times_l=True) + + if check: + check_error(refsol, c.get()) + + return evt + + lp.drive_timing_run(kernel_gen, queue, launcher, 2*n**3) + + + + +if __name__ == "__main__": + # make sure that import failures get reported, instead of skipping the + # tests. + import pyopencl as cl + + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from py.test.cmdline import main + main([__file__]) diff --git a/test/test_linalg.py b/test/test_linalg.py index 15d4571b5321b0be946c3b4e5d6349ebd32a300f..7ae2a1d4d3e15bcdb987b9087ab802956ff017de 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -181,20 +181,6 @@ def test_transpose(ctx_factory): kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, {}) - a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order) - b = cl_array.empty_like(a) - refsol = a.get().T.copy() - - def launcher(kernel, gsize, lsize, check): - evt = kernel(queue, gsize(), lsize(), a.data, b.data, - g_times_l=True) - - if check: - check_error(refsol, b.get()) - - return evt - - #lp.drive_timing_run(kernel_gen, queue, launcher, 0) lp.auto_test_vs_seq(seq_knl, ctx, kernel_gen, op_count=dtype.itemsize*n**2*2/1e9, op_label="GByte", parameters={})