The Python function below is to compute histogram of data, with equal-sized bins. I would like to get the correct result
[1, 6, 4, 6]
[7, 12, 17, 17]
# Computes the histogram of a set of data
def histogram(data, num_bins):
# Find what range the data spans, and use it to calculate the bin size.
span = max(data) - min(data)
bin_size = span / num_bins
# Calculate the thresholds for each bin.
thresholds = [0] * num_bins
for i in range(num_bins):
thresholds[i] += bin_size * (i+1)
# Compute the histogram
counts = [0] * num_bins
for datum in data:
# Increment the count of the bin that the datum falls in
for bin_index, threshold in enumerate(thresholds):
if datum <= threshold:
counts[bin_index] += 1
return counts
# Some random data
data = [-3.2, 0, 1, 1.5, 1.6, 1.9, 5, 6, 9, 1, 4, 5, 8, 9, 5, 6.7, 9]
print("Correct result:\t" + str([1, 6, 4, 6]))
print("Your result:\t" + str(histogram(data, num_bins=4)))
Only you have two logical errors
(1) calculating the threshold
(2) add break in for, once found the range
def histogram(data, num_bins):
span = max(data) - min(data)
bin_size = float(span) / num_bins
thresholds = [0] * num_bins
for i in range(num_bins):
#I change thresholds calc
thresholds[i] = min(data) + bin_size * (i+1)
counts = [0] * num_bins
for datum in data:
for bin_index, threshold in enumerate(thresholds):
if datum <= threshold:
counts[bin_index] += 1
#I add a break
break
return counts
data = [-3.2, 0, 1, 1.5, 1.6, 1.9, 5, 6, 9, 1, 4, 5, 8, 9, 5, 6.7, 9]
print("Correct result:\t" + str([1, 6, 4, 6]))
print("Your result:\t" + str(histogram(data, num_bins=4)))