Newer
Older
for key, value in profile_data.items():
if isinstance(value, TimingFutureList):
print(key, value.elapsed())
# {{{ paper outputs
def get_example_stepper(queue, dims=2, order=3, use_fusion=True,
exec_mapper_factory=ExecutionMapper,
return_ic=False):
Andreas Klöckner
committed
sym_operator, discr = get_strong_wave_op_with_discr_direct(
queue.context, dims=dims, order=3)
Andreas Klöckner
committed
discr, sym_operator,
exec_mapper_factory=exec_mapper_factory)
stepper = RK4TimeStepper(
queue, discr, "w", bound_op, 1 + discr.dim,
get_strong_wave_component,
exec_mapper_factory=exec_mapper_factory)
else:
stepper = FusedRK4TimeStepper(
Andreas Klöckner
committed
queue, discr, "w", sym_operator, 1 + discr.dim,
get_strong_wave_component,
exec_mapper_factory=exec_mapper_factory)
if return_ic:
ic = join_fields(discr.zeros(queue),
[discr.zeros(queue) for i in range(discr.dim)])
return stepper, ic
return stepper
def latex_table(table_format, header, rows):
result = []
_ = result.append
_(rf"\begin{{tabular}}{{{table_format}}}")
_(r"\toprule")
_(" & ".join(rf"\multicolumn{{1}}{{c}}{{{item}}}" for item in header) + r" \\")
_(r"\midrule")
for row in rows:
_(" & ".join(row) + r" \\")
_(r"\bottomrule")
_(r"\end{tabular}")
return "\n".join(result)
def ascii_table(table_format, header, rows):
from pytools import Table
table = Table()
table.add_row(header)
for input_row in rows:
row = []
for item in input_row:
if item.startswith(r"\num{"):
# Strip \num{...} formatting
row.append(item[5:-1])
else:
row.append(item)
table.add_row(row)
return str(table)
if not PAPER_OUTPUT:
table = ascii_table
else:
table = latex_table
def problem_stats(order=3):
cl_ctx = cl.create_some_context()
with open_output_file("grudge-problem-stats.txt") as outf:
_, dg_discr_2d = get_strong_wave_op_with_discr_direct(
Andreas Klöckner
committed
cl_ctx, dims=2, order=order)
print("Number of 2D elements:", dg_discr_2d.mesh.nelements, file=outf)
vol_discr_2d = dg_discr_2d.discr_from_dd("vol")
dofs_2d = {group.nunit_nodes for group in vol_discr_2d.groups}
from pytools import one
print("Number of DOFs per 2D element:", one(dofs_2d), file=outf)
_, dg_discr_3d = get_strong_wave_op_with_discr_direct(
Andreas Klöckner
committed
cl_ctx, dims=3, order=order)
print("Number of 3D elements:", dg_discr_3d.mesh.nelements, file=outf)
vol_discr_3d = dg_discr_3d.discr_from_dd("vol")
dofs_3d = {group.nunit_nodes for group in vol_discr_3d.groups}
from pytools import one
print("Number of DOFs per 3D element:", one(dofs_3d), file=outf)
logger.info("Wrote '%s'", outf.name)
def statement_counts_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
fused_stepper = get_example_stepper(queue, use_fusion=True)
stepper = get_example_stepper(queue, use_fusion=False)
with open_output_file("statement-counts.tex") as outf:
if not PAPER_OUTPUT:
print("==== Statement Counts ====", file=outf)
"lr",
("Operator", "Grudge Node Count"),
(
("Time integration: baseline",
r"\num{%d}"
% len(stepper.bound_op.eval_code.instructions)),
r"\num{%d}"
% len(stepper.grudge_bound_op.eval_code.instructions)),
r"\num{%d}"
% len(fused_stepper.bound_op.eval_code.instructions))
logger.info("Wrote '%s'", outf.name)
@memoize(key=lambda queue, dims: dims)
def mem_ops_results(queue, dims):
exec_mapper_factory=ExecutionMapperWithMemOpCounting)
stepper, ic = get_example_stepper(
queue,
use_fusion=False,
exec_mapper_factory=ExecutionMapperWithMemOpCounting,
return_ic=True)
t_start = 0
dt = 0.02
t_end = 0.02
for (_, _, profile_data) in stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
result["nonfused_bytes_read"] = profile_data["bytes_read"]
result["nonfused_bytes_written"] = profile_data["bytes_written"]
result["nonfused_bytes_total"] = \
result["nonfused_bytes_read"] \
+ result["nonfused_bytes_written"]
result["nonfused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["nonfused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["nonfused_bytes_total_by_scalar_assignments"] = \
result["nonfused_bytes_read_by_scalar_assignments"] \
+ result["nonfused_bytes_written_by_scalar_assignments"]
for (_, _, profile_data) in fused_stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
result["fused_bytes_read"] = profile_data["bytes_read"]
result["fused_bytes_written"] = profile_data["bytes_written"]
result["fused_bytes_total"] = \
result["fused_bytes_read"] \
+ result["fused_bytes_written"]
result["fused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["fused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["fused_bytes_total_by_scalar_assignments"] = \
result["fused_bytes_read_by_scalar_assignments"] \
+ result["fused_bytes_written_by_scalar_assignments"]
return result
def scalar_assignment_percent_of_total_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
with open_output_file("scalar-assignments-mem-op-percentage.tex") as outf:
if not PAPER_OUTPUT:
print("==== Scalar Assigment % of Total Mem Ops ====", file=outf)
print(
table(
"lr",
("Operator",
r"\parbox{1in}{\centering \% Memory Ops. "
r"Due to Scalar Assignments}"),
(
("2D: Baseline",
"%.1f" % (
100 * result2d["nonfused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total"])),
("2D: Inlined",
"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["fused_bytes_total"])),
("3D: Baseline",
"%.1f" % (
100 * result3d["nonfused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total"])),
("3D: Inlined",
"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["fused_bytes_total"])),
)),
file=outf)
logger.info("Wrote '%s'", outf.name)
def scalar_assignment_effect_of_fusion_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
with open_output_file("scalar-assignments-fusion-impact.tex") as outf:
if not PAPER_OUTPUT:
print("==== Scalar Assigment Inlining Impact ====", file=outf)
print(
table(
"lrrrr",
("Operator",
r"Bytes Read",
r"Bytes Written",
r"Total",
r"\% of Baseline"),
(
("2D: Baseline",
r"\num{%d}" % (
result2d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("2D: Inlined",
r"\num{%d}" % (
result2d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total_by_scalar_assignments"])),
("3D: Baseline",
r"\num{%d}" % (
result3d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("3D: Inlined",
r"\num{%d}" % (
result3d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total_by_scalar_assignments"])),
)),
file=outf)
logger.info("Wrote '%s'", outf.name)
Andreas Klöckner
committed
import sys
if len(sys.argv) > 1:
exec(sys.argv[1])
else:
if not SKIP_TESTS:
# Run tests.
from py.test import main
result = main([__file__])
assert result == 0
# Run examples.
problem_stats()
statement_counts_table()
scalar_assignment_percent_of_total_mem_ops_table()
scalar_assignment_effect_of_fusion_mem_ops_table()
if __name__ == "__main__":
main()
Andreas Klöckner
committed
# vim: foldmethod=marker