updated doctests for count_granularity rounding changes

fffeb48b · James Stevens · d1df5444 · fffeb48b
Commit fffeb48b authored 7 years ago by James Stevens
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -1638,12 +1638,12 @@ we'll continue using the kernel from the previous example:

    >>> mem_map = lp.get_mem_access_map(knl)
    >>> print(lp.stringify_stats_mapping(mem_map))
-    MemAccess(global, np:dtype('float32'), 0, load, a, subgroup) : [m, l, n] -> { 1/16 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float32'), 0, load, b, subgroup) : [m, l, n] -> { 1/32 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float32'), 0, store, c, subgroup) : [m, l, n] -> { 1/32 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, load, g, subgroup) : [m, l, n] -> { 1/32 * m * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, load, h, subgroup) : [m, l, n] -> { 1/32 * m * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, store, e, subgroup) : [m, l, n] -> { 1/32 * m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, load, a, subgroup) : [m, l, n] -> { 2 * m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, load, b, subgroup) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, store, c, subgroup) : [m, l, n] -> { m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, load, g, subgroup) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, load, h, subgroup) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, store, e, subgroup) : [m, l, n] -> { m * n : m > 0 and l > 0 and n > 0 }
    <BLANKLINE>

 :func:`loopy.get_mem_access_map` returns a :class:`loopy.ToCountMap` of **{**
@@ -1679,10 +1679,10 @@ We can evaluate these polynomials using :func:`islpy.eval_with_dict`:
    ...                  ].eval_with_dict(param_dict)
    >>> print("f32 ld a: %i\nf32 st c: %i\nf64 ld g: %i\nf64 st e: %i" %
    ...       (f32ld_a, f32st_c, f64ld_g, f64st_e))
-    f32 ld a: 32768
-    f32 st c: 16384
-    f64 ld g: 2048
-    f64 st e: 2048
+    f32 ld a: 1048576
+    f32 st c: 524288
+    f64 ld g: 65536
+    f64 st e: 65536

 :class:`loopy.ToCountMap` also makes it easy to determine the total amount
 of data moved in bytes. Suppose we want to know the total amount of global
@@ -1693,26 +1693,26 @@ using :func:`loopy.ToCountMap.to_bytes` and :func:`loopy.ToCountMap.group_by`:

    >>> bytes_map = mem_map.to_bytes()
    >>> print(lp.stringify_stats_mapping(bytes_map))
-    MemAccess(global, np:dtype('float32'), 0, load, a, subgroup) : [m, l, n] -> { 1/4 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float32'), 0, load, b, subgroup) : [m, l, n] -> { 1/8 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float32'), 0, store, c, subgroup) : [m, l, n] -> { 1/8 * m * l * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, load, g, subgroup) : [m, l, n] -> { 1/4 * m * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, load, h, subgroup) : [m, l, n] -> { 1/4 * m * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(global, np:dtype('float64'), 0, store, e, subgroup) : [m, l, n] -> { 1/4 * m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, load, a, subgroup) : [m, l, n] -> { 8 * m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, load, b, subgroup) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float32'), 0, store, c, subgroup) : [m, l, n] -> { 4 * m * l * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, load, g, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, load, h, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(global, np:dtype('float64'), 0, store, e, subgroup) : [m, l, n] -> { 8 * m * n : m > 0 and l > 0 and n > 0 }
    <BLANKLINE>
    >>> global_ld_st_bytes = bytes_map.filter_by(mtype=['global']
    ...                                         ).group_by('direction')
    >>> print(lp.stringify_stats_mapping(global_ld_st_bytes))
-    MemAccess(None, None, None, load, None, None) : [m, l, n] -> { (1/2 * m + 3/8 * m * l) * n : m > 0 and l > 0 and n > 0 }
-    MemAccess(None, None, None, store, None, None) : [m, l, n] -> { (1/4 * m + 1/8 * m * l) * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(None, None, None, load, None, None) : [m, l, n] -> { (16 * m + 12 * m * l) * n : m > 0 and l > 0 and n > 0 }
+    MemAccess(None, None, None, store, None, None) : [m, l, n] -> { (8 * m + 4 * m * l) * n : m > 0 and l > 0 and n > 0 }
    <BLANKLINE>
    >>> loaded = global_ld_st_bytes[lp.MemAccess(direction='load')
    ...                            ].eval_with_dict(param_dict)
    >>> stored = global_ld_st_bytes[lp.MemAccess(direction='store')
    ...                            ].eval_with_dict(param_dict)
    >>> print("bytes loaded: %s\nbytes stored: %s" % (loaded, stored))
-    bytes loaded: 229376
-    bytes stored: 81920
+    bytes loaded: 7340032
+    bytes stored: 2621440

 One can see how these functions might be useful in computing, for example,
 achieved memory bandwidth in byte/sec or performance in FLOP/sec.