Newer
Older
nsteps = int(np.ceil((t_end + 1e-9) / dt))
for (_, _, profile_data) in stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
step += 1
tn = time.time()
logger.info("step %d/%d: %f", step, nsteps, tn - t)
t = tn
logger.info("fusion? %s", use_fusion)
for key, value in profile_data.items():
if isinstance(value, TimingFutureList):
print(key, value.elapsed())
# {{{ paper outputs
def get_example_stepper(queue, dims=2, order=3, use_fusion=True,
exec_mapper_factory=ExecutionMapper,
return_ic=False):
Andreas Klöckner
committed
sym_operator, discr = get_strong_wave_op_with_discr_direct(
queue.context, dims=dims, order=3)
Andreas Klöckner
committed
discr, sym_operator,
exec_mapper_factory=exec_mapper_factory)
stepper = RK4TimeStepper(
queue, discr, "w", bound_op, 1 + discr.dim,
get_strong_wave_component,
exec_mapper_factory=exec_mapper_factory)
else:
stepper = FusedRK4TimeStepper(
Andreas Klöckner
committed
queue, discr, "w", sym_operator, 1 + discr.dim,
get_strong_wave_component,
exec_mapper_factory=exec_mapper_factory)
if return_ic:
from pytools.obj_array import join_fields
ic = join_fields(discr.zeros(queue),
[discr.zeros(queue) for i in range(discr.dim)])
return stepper, ic
return stepper
def latex_table(table_format, header, rows):
result = []
_ = result.append
_(rf"\begin{{tabular}}{{{table_format}}}")
_(r"\toprule")
_(" & ".join(rf"\multicolumn{{1}}{{c}}{{{item}}}" for item in header) + r" \\")
_(r"\midrule")
for row in rows:
_(" & ".join(row) + r" \\")
_(r"\bottomrule")
_(r"\end{tabular}")
return "\n".join(result)
def ascii_table(table_format, header, rows):
from pytools import Table
table = Table()
table.add_row(header)
for input_row in rows:
row = []
for item in input_row:
if item.startswith(r"\num{"):
# Strip \num{...} formatting
row.append(item[5:-1])
else:
row.append(item)
table.add_row(row)
return str(table)
if not PAPER_OUTPUT:
table = ascii_table
else:
table = latex_table
def problem_stats(order=3):
cl_ctx = cl.create_some_context()
with open_output_file("grudge-problem-stats.txt") as outf:
_, dg_discr_2d = get_strong_wave_op_with_discr_direct(
Andreas Klöckner
committed
cl_ctx, dims=2, order=order)
print("Number of 2D elements:", dg_discr_2d.mesh.nelements, file=outf)
vol_discr_2d = dg_discr_2d.discr_from_dd("vol")
dofs_2d = {group.nunit_nodes for group in vol_discr_2d.groups}
from pytools import one
print("Number of DOFs per 2D element:", one(dofs_2d), file=outf)
_, dg_discr_3d = get_strong_wave_op_with_discr_direct(
Andreas Klöckner
committed
cl_ctx, dims=3, order=order)
print("Number of 3D elements:", dg_discr_3d.mesh.nelements, file=outf)
vol_discr_3d = dg_discr_3d.discr_from_dd("vol")
dofs_3d = {group.nunit_nodes for group in vol_discr_3d.groups}
from pytools import one
print("Number of DOFs per 3D element:", one(dofs_3d), file=outf)
logger.info("Wrote '%s'", outf.name)
def statement_counts_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
fused_stepper = get_example_stepper(queue, use_fusion=True)
stepper = get_example_stepper(queue, use_fusion=False)
with open_output_file("statement-counts.tex") as outf:
if not PAPER_OUTPUT:
print("==== Statement Counts ====", file=outf)
"lr",
("Operator", "Grudge Node Count"),
(
("Time integration: baseline",
r"\num{%d}"
% len(stepper.bound_op.eval_code.instructions)),
r"\num{%d}"
% len(stepper.grudge_bound_op.eval_code.instructions)),
r"\num{%d}"
% len(fused_stepper.bound_op.eval_code.instructions))
logger.info("Wrote '%s'", outf.name)
@memoize(key=lambda queue, dims: dims)
def mem_ops_results(queue, dims):
exec_mapper_factory=ExecutionMapperWithMemOpCounting)
stepper, ic = get_example_stepper(
queue,
use_fusion=False,
exec_mapper_factory=ExecutionMapperWithMemOpCounting,
return_ic=True)
t_start = 0
dt = 0.02
t_end = 0.02
for (_, _, profile_data) in stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
result["nonfused_bytes_read"] = profile_data["bytes_read"]
result["nonfused_bytes_written"] = profile_data["bytes_written"]
result["nonfused_bytes_total"] = \
result["nonfused_bytes_read"] \
+ result["nonfused_bytes_written"]
result["nonfused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["nonfused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["nonfused_bytes_total_by_scalar_assignments"] = \
result["nonfused_bytes_read_by_scalar_assignments"] \
+ result["nonfused_bytes_written_by_scalar_assignments"]
for (_, _, profile_data) in fused_stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
result["fused_bytes_read"] = profile_data["bytes_read"]
result["fused_bytes_written"] = profile_data["bytes_written"]
result["fused_bytes_total"] = \
result["fused_bytes_read"] \
+ result["fused_bytes_written"]
result["fused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["fused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["fused_bytes_total_by_scalar_assignments"] = \
result["fused_bytes_read_by_scalar_assignments"] \
+ result["fused_bytes_written_by_scalar_assignments"]
return result
def scalar_assignment_percent_of_total_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
with open_output_file("scalar-assignments-mem-op-percentage.tex") as outf:
if not PAPER_OUTPUT:
print("==== Scalar Assigment % of Total Mem Ops ====", file=outf)
print(
table(
"lr",
("Operator",
r"\parbox{1in}{\centering \% Memory Ops. "
r"Due to Scalar Assignments}"),
(
("2D: Baseline",
"%.1f" % (
100 * result2d["nonfused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total"])),
("2D: Inlined",
"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["fused_bytes_total"])),
("3D: Baseline",
"%.1f" % (
100 * result3d["nonfused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total"])),
("3D: Inlined",
"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["fused_bytes_total"])),
)),
file=outf)
logger.info("Wrote '%s'", outf.name)
def scalar_assignment_effect_of_fusion_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
with open_output_file("scalar-assignments-fusion-impact.tex") as outf:
if not PAPER_OUTPUT:
print("==== Scalar Assigment Inlining Impact ====", file=outf)
print(
table(
"lrrrr",
("Operator",
r"Bytes Read",
r"Bytes Written",
r"Total",
r"\% of Baseline"),
(
("2D: Baseline",
r"\num{%d}" % (
result2d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("2D: Inlined",
r"\num{%d}" % (
result2d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total_by_scalar_assignments"])),
("3D: Baseline",
r"\num{%d}" % (
result3d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("3D: Inlined",
r"\num{%d}" % (
result3d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total_by_scalar_assignments"])),
)),
file=outf)
logger.info("Wrote '%s'", outf.name)
Andreas Klöckner
committed
import sys
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
if not SKIP_TESTS:
# Run tests.
from py.test import main
result = main([__file__])
assert result == 0
# Run examples.
problem_stats()
statement_counts_table()
scalar_assignment_percent_of_total_mem_ops_table()
scalar_assignment_effect_of_fusion_mem_ops_table()
if __name__ == "__main__":
main()
Andreas Klöckner
committed
# vim: foldmethod=marker