manofsins manofsins - 16 days ago 4
Python Question

How to make Julia Code more Efficient? Currently it is performing even worse than Python

I am new to Julia and have been experimenting with it as I got to know that it has amazing performances. But I am still to experience those promised performances. I have tried many methods for enhancing performance described in the book "JULIA HIGH PERFORMANCE", which has made the code a little bit less readable. But still, my python code is much faster than my Julia code, at least 3x faster for the benchmark case.
Either I am doing something very wrong with the code which must be a sin in Julia or its that Julia just can't do it. Please prove me wrong about the later.

What I am trying to do in the code is assign distinct balls into distinct boxes with a maximum and minimum limit to the capacity of each box. The order in which balls are placed in the box also matters. I need to generate all possible assignments with the given constraints in minimum possible time.

PYTHON CODE:

import itertools
import time

max_balls = 5
min_balls = 0

def get_assignments(balls, boxes, assignments=[[]]):
all_assignments = []
upper_ball_limit = len(balls)
if upper_ball_limit > max_balls:
upper_ball_limit = max_balls

n_boxes = len(boxes)
lower_ball_limit = len(balls) - upper_ball_limit * (n_boxes - 1)
if lower_ball_limit < min_balls:
lower_ball_limit = min_balls

if len(boxes) == 0:
raise Exception("No delivery boys found")

elif len(boxes) == 1:
for strategy in itertools.permutations(balls, upper_ball_limit):
# valid = evaluate_strategy(strategy, db_id)
for subplan in assignments:
subplan_copy = subplan[:]
box_id = boxes[0]
subplan_copy.append((box_id, strategy))
all_assignments.append(subplan_copy)
return all_assignments
else:
box_id = boxes[0]
for i in range(lower_ball_limit, upper_ball_limit+ 1):
for strategy in itertools.permutations(balls, i):
temp_plans = []
for subplan in assignments:
subplan_copy = subplan[:]
subplan_copy.append((box_id, strategy))
temp_plans.append(subplan_copy)
remaining_balls = set(balls).difference(strategy)
remaining_boxes = list(set(boxes).difference([box_id]))
if remaining_balls:
all_assignments.extend(get_assignments(remaining_balls, remaining_boxes, temp_plans))
else:
all_assignments.extend(temp_plans)
return all_assignments

balls = range(1, 9)
boxes = [1, 2, 3, 4]

t = time.time()
all_assignments = get_assignments(balls, boxes)

print('Number of assignments: %s' % len(all_assignments))
print('Time taken: %s' % (time.time()-t))


And here is my attempt at writing the JULIA CODE for the above.

#!/usr/bin/env julia

using Combinatorics

const max_balls=5
const min_balls=0

function plan_assignments(balls::Vector{Int32}, boxes ; plans=[Vector{Tuple{Int32,Array{Int32,1}}}(length(boxes))])
const n_boxes = length(boxes)
const n_balls = length(balls)
const n_plans = length(plans)

if n_boxes*max_balls < n_balls
print("Invalid Inputs: Number of balls exceed the number of boxes.")
end

all_plans = Vector{Tuple{Int32,Array{Int32,1}}}[]
upper_box_limit = n_balls
if upper_box_limit > max_balls
upper_box_limit = max_balls
end
lower_box_limit = n_balls - upper_box_limit * (n_boxes-1)

if lower_box_limit < min_balls
lower_box_limit = min_balls
end

if n_boxes == 1
box_id = boxes[1]
@inbounds for strategy in Combinatorics.permutations(balls, upper_box_limit)
@inbounds for subplan in plans
subplan = subplan[:]
subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
all_plans = push!(all_plans, subplan)
end
end
return all_plans

else
box_id = boxes[1]
@inbounds for i in lower_box_limit:upper_box_limit
@inbounds for strategy in Combinatorics.permutations(balls, i)
temp_plans = Array{Vector{Tuple{Int32,Array{Int32,1}}},1}(n_plans)
# temp_plans = []

@inbounds for (i,subplan) in zip(1:n_plans, plans)
subplan = subplan[:]
subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
temp_plans[i] = subplan
# subplan = push!(subplan, (db_id, strategy))
# temp_plans = push!(temp_plans, subplan)

remaining_balls = filter((x) -> !(x in strategy), balls)
remaining_boxes = filter((x) -> x != box_id , boxes)

if length(remaining_balls) > 0
@inbounds for plan in plan_assignments(remaining_balls, remaining_boxes, plans=temp_plans)
push!(all_plans, plan)
end
# append!(all_plans, plan_assignments(remaining_orders, remaining_delivery_boys, plans=temp_plans))
else
@inbounds for plan in temp_plans
push!(all_plans, plan)
end
# append!(all_plans, temp_plans)

end
end
end
end
return all_plans
end
end

balls = Int32[1,2,3,4,5,6,7,8]
boxes = Int32[1,2,3,4]

const tn_boxes = length(boxes)

@timev all_plans = plan_assignments(balls, boxes)
print(length(all_plans))


My benchmark timings are as follows:

For Python:

Number of assignments: 5040000
Time taken: 22.5003659725


For Julia: (This is while discounting the compilation time.)

76.986338 seconds (122.94 M allocations: 5.793 GB, 77.01% gc time)
elapsed time (ns): 76986338257
gc time (ns): 59287603693
bytes allocated: 6220111360
pool allocs: 122932049
non-pool GC allocs:10587
malloc() calls: 11
realloc() calls: 18
GC pauses: 270
full collections: 28

Answer

This is another version in Julia, a little bit more idiomatic and modified to avoid recursion and some allocations:

using Iterators
using Combinatorics

histograms(n,m) = [diff([0;x;n+m]).-1 for x in combinations([1:n+m-1;],m-1)]

good_histograms(n,m,minval,maxval) = 
  [h for h in histograms(n,m) if all(maxval.>=h.>=minval)]

typealias PlanGrid Matrix{SubArray{Int,1,Array{Int,1},Tuple{UnitRange{Int}},true}}

function assignmentplans(balls,boxes,minballs,maxballs)
  nballs, nboxes = length(balls),length(boxes)
  nperms = factorial(nballs)
  partslist = good_histograms(nballs,nboxes,minballs,maxballs)
  plans = PlanGrid(nboxes,nperms*length(partslist))
  permutationvector = vec([balls[p[i]] for i=1:nballs,p in permutations(balls)])
  i1 = 1
  for parts in partslist
    i2 = 0
    breaks = [(x[1]+1:x[2]) for x in partition(cumsum([0;parts]),2,1)]
    for i=1:nperms
      for j=1:nboxes
        plans[j,i1] = view(permutationvector,breaks[j]+i2)
      end
      i1 += 1
      i2 += nballs
    end
  end
  return plans
end

For timing we get:

julia> assignmentplans([1,2],[1,2],0,2)   # a simple example
2×6 Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},2}:
 Int64[]  Int64[]  [1]  [2]  [1,2]    [2,1]  
 [1,2]    [2,1]    [2]  [1]  Int64[]  Int64[]

julia> @time plans = assignmentplans([1:8;],[1:4;],0,5);
  8.279623 seconds (82.28 M allocations: 2.618 GB, 14.07% gc time)

julia> size(plans)
(4,5040000)

julia> plans[:,1000000]                    # sample ball configuration
4-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
 Int64[]    
 [7,3,8,2,5]
 [4]        
 [6,1]

Timings, of course, vary per setup, but this should be much faster. It is not exactly an apples to apples comparison, but it calculates the same stuff. Timing on the poster's (or others') machines are welcome in the comments.

Comments