From 82d8ec277c06ef7b0074120fa0248e2f7fafc716 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <kloeckner@haamster.(none)>
Date: Tue, 10 Feb 2009 08:41:00 -0600
Subject: [PATCH] Non-equidistant bin starts for unicode histogram.

---
 src/__init__.py | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/__init__.py b/src/__init__.py
index 66d0753..c655e85 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -971,25 +971,33 @@ class Table:
 
 
 
-def string_histogram(iterable, min_value=None, max_value=None, bin_count=20, width=75,
-        use_unicode=True):
-    if min_value is None or max_value is None:
-        iterable = list(iterable)
-        min_value = min(iterable)
-        max_value = max(iterable)
+def string_histogram(iterable, min_value=None, max_value=None, bin_count=20, width=70,
+        bin_starts=None, use_unicode=True):
+    if bin_starts is None:
+        if min_value is None or max_value is None:
+            iterable = list(iterable)
+            min_value = min(iterable)
+            max_value = max(iterable)
 
-    bin_width = (max_value - min_value)/bin_count
-    bins = [0 for i in range(bin_count)]
+        bin_width = (max_value - min_value)/bin_count
+        bin_starts = [min_value+bin_width*i for i in range(bin_count)]
 
-    from math import floor, ceil
+    bins = [0 for i in range(len(bin_starts))]
+
+    from bisect import bisect
     for value in iterable:
-        bin_nr = int(floor((value-min_value)/bin_width))
-        if 0 <= bin_nr < bin_count:
-            bins[bin_nr] += 1
-        else:
+        if max_value is not None and value > max_value or value < bin_starts[0]:
             from warnings import warn
             warn("string_histogram: out-of-bounds value ignored")
+        else:
+            bin_nr = bisect(bin_starts, value)-1
+            try:
+                bins[bin_nr] += 1
+            except:
+                print value, bin_nr, bin_starts
+                raise
 
+    from math import floor, ceil
     if use_unicode:
         def format_bar(cnt):
             scaled = cnt*width/max_count
@@ -1005,12 +1013,12 @@ def string_histogram(iterable, min_value=None, max_value=None, bin_count=20, wid
 
     max_count = max(bins)
     total_count = sum(bins)
-    return "\n".join("%9g (%9d:%3.0f %%) : %s" % (
-        min_value+bin_nr*bin_width, 
+    return "\n".join("%9g |%9d | %3.0f %% | %s" % (
+        bin_start,
         bin_value,
         bin_value/total_count*100,
         format_bar(bin_value))
-        for bin_nr, bin_value in enumerate(bins))
+        for bin_start, bin_value in zip(bin_starts, bins))
 
             
         
-- 
GitLab