mohitos -3 years ago 121
Python Question

# Python - optimise sum of a value with date constraints

I have this list

``````import datetime
sample_list = [{'start_date': datetime.datetime(2017, 8, 18), 'end_date': datetime.datetime(2017, 8, 25), 'value': 20},
{'start_date': datetime.datetime(2017, 8, 19), 'end_date': datetime.datetime(2017, 8, 25), 'value': 22},
{'start_date': datetime.datetime(2017, 8, 24), 'end_date': datetime.datetime(2017, 8, 30), 'value': 40},
{'start_date': datetime.datetime(2017, 8, 25), 'end_date': datetime.datetime(2017, 8, 26), 'value': 52},
{'start_date': datetime.datetime(2017, 8, 27), 'end_date': datetime.datetime(2017, 8, 29), 'value': 12},
{'start_date': datetime.datetime(2017, 9, 1), 'end_date': datetime.datetime(2017, 9, 5), 'value': 20}
]
``````

and would like to produce
`optimum_list`
whose contents represent the maximum sum of
`value`
such that each members dates do not overlap with each other.

the desired output for
`sample_list`
provided above would be

``````optimum_list =

{'start_date': datetime.datetime(2017, 8, 25), 'end_date': datetime.datetime(2017, 8, 26), 'value': 52},
{'start_date': datetime.datetime(2017, 8, 27), 'end_date': datetime.datetime(2017, 8, 29), 'value': 12},
{'start_date': datetime.datetime(2017, 9, 1), 'end_date': datetime.datetime(2017, 9, 5), 'value': 20}

cumulative_value_of_sum = 84
``````

any ideas on how to solve this efficiently?

`O(len(data)^2)` solution using dynamic programming:

``````from datetime import datetime
from collections import defaultdict

data = [
{'start_date': datetime(2017, 8, 18), 'end_date': datetime(2017, 8, 25), 'value': 20},
{'start_date': datetime(2017, 8, 19), 'end_date': datetime(2017, 8, 25), 'value': 22},
{'start_date': datetime(2017, 8, 24), 'end_date': datetime(2017, 8, 30), 'value': 40},
{'start_date': datetime(2017, 8, 25), 'end_date': datetime(2017, 8, 26), 'value': 52},
{'start_date': datetime(2017, 8, 27), 'end_date': datetime(2017, 8, 29), 'value': 12},
{'start_date': datetime(2017, 9, 1), 'end_date': datetime(2017, 9, 5), 'value': 20}
]

# Dict where keys are end dates, values are lists of rows with that end date
data_by_end = defaultdict(list)
for row in data:
data_by_end[row['end_date']].append(row)

# List of tuples (value, end_date, rows) where:
# - value is the sum of values of rows
# - end_date is the final date in rows
# - rows is a tuple of rows that don't overlap, representing
#     the best solution that doesn't end after end_date
# So value and end_date should monotonically increase through the
# list and the last tuple is the best solution so far
best = [(0, datetime.min, ())]

# For every end_date in the data, in order:
for end in sorted(data_by_end):

# Find the row with that end_date that produces the best partial solution
# when combined with the best partial solutions for earlier end dates
def candidates():
for value, prev_end, best_rows in best:
for row in data_by_end[end]:
if row['start_date'] <= prev_end:
continue
yield value + row['value'], end, best_rows + (row,)

new_best = max(candidates())

# Add this partial solution as the new best so far if it beats the previous best
if new_best[0] > best[-1][0]:
best.append(new_best)

value, end_date, rows = best[-1]
print('Best value:', value)
print('Rows:')
for row in rows:
print(row)
``````

EDIT: modified solution for the case of a second value that needs to be minimised:

``````from datetime import datetime
from collections import defaultdict

data = [
{'start_date': datetime(2017, 8, 22), 'end_date': datetime(2017, 8, 23), 'value': 40, 'value2': 30},
{'start_date': datetime(2017, 8, 22), 'end_date': datetime(2017, 8, 24), 'value': 40, 'value2': 2}
]

# Dict where keys are end dates, values are lists of rows with that end date
data_by_end = defaultdict(list)
for row in data:
data_by_end[row['end_date']].append(row)

# List of tuples (value, value2, end_date, rows) where:
# - value is the sum of values of rows
# - value2 is negative the sum of value2s of rows
# - end_date is the final date in rows
# - rows is a tuple of rows that don't overlap, representing
#     the best solution that doesn't end after end_date
# So value and end_date should monotonically increase through the
# list and the last tuple is the best solution so far
best = [(0, 0, datetime.min, ())]

# For every end_date in the data, in order:
for end in sorted(data_by_end):

# Find the row with that end_date that produces the best partial solution
# when combined with the best partial solutions for earlier end dates
def candidates():
for value, value2, prev_end, best_rows in best:
for row in data_by_end[end]:
if row['start_date'] <= prev_end:
continue
yield value + row['value'], value2 - row['value2'], end, best_rows + (row,)

new_best = max(candidates())

# Add this partial solution as the new best so far if it beats the previous best
if new_best[:2] > best[-1][:2]:
best.append(new_best)

value, value2, end_date, rows = best[-1]
print('Best values:', value, -value2)
print('Rows:')
for row in rows:
print(row)
``````
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download