Newer
Older
@memoize(key=lambda queue, dims: dims)
def mem_ops_results(queue, dims):
exec_mapper_factory=ExecutionMapperWithMemOpCounting)
stepper, ic = get_example_stepper(
queue,
use_fusion=False,
exec_mapper_factory=ExecutionMapperWithMemOpCounting,
return_ic=True)
t_start = 0
dt = 0.02
t_end = 0.02
for (_, _, profile_data) in stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
result["nonfused_bytes_read"] = profile_data["bytes_read"]
result["nonfused_bytes_written"] = profile_data["bytes_written"]
result["nonfused_bytes_total"] = \
result["nonfused_bytes_read"] \
+ result["nonfused_bytes_written"]
result["nonfused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["nonfused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["nonfused_bytes_total_by_scalar_assignments"] = \
result["nonfused_bytes_read_by_scalar_assignments"] \
+ result["nonfused_bytes_written_by_scalar_assignments"]
for (_, _, profile_data) in fused_stepper.run(
ic, t_start, dt, t_end, return_profile_data=True):
pass
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
result["fused_bytes_read"] = profile_data["bytes_read"]
result["fused_bytes_written"] = profile_data["bytes_written"]
result["fused_bytes_total"] = \
result["fused_bytes_read"] \
+ result["fused_bytes_written"]
result["fused_bytes_read_by_scalar_assignments"] = \
profile_data["bytes_read_by_scalar_assignments"]
result["fused_bytes_written_by_scalar_assignments"] = \
profile_data["bytes_written_by_scalar_assignments"]
result["fused_bytes_total_by_scalar_assignments"] = \
result["fused_bytes_read_by_scalar_assignments"] \
+ result["fused_bytes_written_by_scalar_assignments"]
return result
def scalar_assignment_percent_of_total_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
outf = sys.stdout
else:
out_path = "scalar-assignments-mem-op-percentage.tex"
outf = open(out_path, "w")
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
"lr",
("Operator",
r"\parbox{1in}{\centering \% Memory Ops. Due to Scalar Assignments}"),
(
("2D: Baseline",
"%.1f" % (
100 * result2d["nonfused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total"])),
("2D: Inlined",
"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["fused_bytes_total"])),
("3D: Baseline",
"%.1f" % (
100 * result3d["nonfused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total"])),
("3D: Inlined",
"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["fused_bytes_total"])),
)),
file=outf)
def scalar_assignment_effect_of_fusion_mem_ops_table():
cl_ctx = cl.create_some_context()
queue = cl.CommandQueue(cl_ctx)
result2d = mem_ops_results(queue, 2)
result3d = mem_ops_results(queue, 3)
outf = sys.stdout
else:
out_path = "scalar-assignments-mem-op-percentage.tex"
outf = open(out_path, "w")
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
"lrrrr",
("Operator",
r"Bytes Read",
r"Bytes Written",
r"Total",
r"\% of Baseline"),
(
("2D: Baseline",
r"\num{%d}" % (
result2d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("2D: Inlined",
r"\num{%d}" % (
result2d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result2d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result2d["fused_bytes_total_by_scalar_assignments"]
/ result2d["nonfused_bytes_total_by_scalar_assignments"])),
("3D: Baseline",
r"\num{%d}" % (
result3d["nonfused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["nonfused_bytes_total_by_scalar_assignments"]),
"100"),
("3D: Inlined",
r"\num{%d}" % (
result3d["fused_bytes_read_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_written_by_scalar_assignments"]),
r"\num{%d}" % (
result3d["fused_bytes_total_by_scalar_assignments"]),
r"%.1f" % (
100 * result3d["fused_bytes_total_by_scalar_assignments"]
/ result3d["nonfused_bytes_total_by_scalar_assignments"])),
)),
file=outf)
def main():
if 1:
# Run tests.
from py.test import main
result = main([__file__])
assert result == 0
# Run examples.
scalar_assignment_percent_of_total_mem_ops_table()
scalar_assignment_effect_of_fusion_mem_ops_table()