mohitos mohitos - 1 year ago 71
Python Question

Python - optimise sum of a value with date constraints

I have this list

import datetime
sample_list = [{'start_date': datetime.datetime(2017, 8, 18), 'end_date': datetime.datetime(2017, 8, 25), 'value': 20},
{'start_date': datetime.datetime(2017, 8, 19), 'end_date': datetime.datetime(2017, 8, 25), 'value': 22},
{'start_date': datetime.datetime(2017, 8, 24), 'end_date': datetime.datetime(2017, 8, 30), 'value': 40},
{'start_date': datetime.datetime(2017, 8, 25), 'end_date': datetime.datetime(2017, 8, 26), 'value': 52},
{'start_date': datetime.datetime(2017, 8, 27), 'end_date': datetime.datetime(2017, 8, 29), 'value': 12},
{'start_date': datetime.datetime(2017, 9, 1), 'end_date': datetime.datetime(2017, 9, 5), 'value': 20}
]


and would like to produce
optimum_list
whose contents represent the maximum sum of
value
such that each members dates do not overlap with each other.

the desired output for
sample_list
provided above would be

optimum_list =

{'start_date': datetime.datetime(2017, 8, 25), 'end_date': datetime.datetime(2017, 8, 26), 'value': 52},
{'start_date': datetime.datetime(2017, 8, 27), 'end_date': datetime.datetime(2017, 8, 29), 'value': 12},
{'start_date': datetime.datetime(2017, 9, 1), 'end_date': datetime.datetime(2017, 9, 5), 'value': 20}

cumulative_value_of_sum = 84


any ideas on how to solve this efficiently?

Answer Source

O(len(data)^2) solution using dynamic programming:

from datetime import datetime
from collections import defaultdict

data = [
    {'start_date': datetime(2017, 8, 18), 'end_date': datetime(2017, 8, 25), 'value': 20},
    {'start_date': datetime(2017, 8, 19), 'end_date': datetime(2017, 8, 25), 'value': 22},
    {'start_date': datetime(2017, 8, 24), 'end_date': datetime(2017, 8, 30), 'value': 40},
    {'start_date': datetime(2017, 8, 25), 'end_date': datetime(2017, 8, 26), 'value': 52},
    {'start_date': datetime(2017, 8, 27), 'end_date': datetime(2017, 8, 29), 'value': 12},
    {'start_date': datetime(2017, 9, 1), 'end_date': datetime(2017, 9, 5), 'value': 20}
]

# Dict where keys are end dates, values are lists of rows with that end date
data_by_end = defaultdict(list)
for row in data:
    data_by_end[row['end_date']].append(row)

# List of tuples (value, end_date, rows) where:
# - value is the sum of values of rows
# - end_date is the final date in rows
# - rows is a tuple of rows that don't overlap, representing
#     the best solution that doesn't end after end_date
# So value and end_date should monotonically increase through the
# list and the last tuple is the best solution so far
best = [(0, datetime.min, ())]

# For every end_date in the data, in order:
for end in sorted(data_by_end):

    # Find the row with that end_date that produces the best partial solution
    # when combined with the best partial solutions for earlier end dates
    def candidates():
        for value, prev_end, best_rows in best:
            for row in data_by_end[end]:
                if row['start_date'] <= prev_end:
                    continue
                yield value + row['value'], end, best_rows + (row,)

    new_best = max(candidates())

    # Add this partial solution as the new best so far if it beats the previous best
    if new_best[0] > best[-1][0]:
        best.append(new_best)

value, end_date, rows = best[-1]
print('Best value:', value)
print('Rows:')
for row in rows:
    print(row)

EDIT: modified solution for the case of a second value that needs to be minimised:

from datetime import datetime
from collections import defaultdict

data = [
    {'start_date': datetime(2017, 8, 22), 'end_date': datetime(2017, 8, 23), 'value': 40, 'value2': 30},
    {'start_date': datetime(2017, 8, 22), 'end_date': datetime(2017, 8, 24), 'value': 40, 'value2': 2}
]

# Dict where keys are end dates, values are lists of rows with that end date
data_by_end = defaultdict(list)
for row in data:
    data_by_end[row['end_date']].append(row)

# List of tuples (value, value2, end_date, rows) where:
# - value is the sum of values of rows
# - value2 is negative the sum of value2s of rows
# - end_date is the final date in rows
# - rows is a tuple of rows that don't overlap, representing
#     the best solution that doesn't end after end_date
# So value and end_date should monotonically increase through the
# list and the last tuple is the best solution so far
best = [(0, 0, datetime.min, ())]

# For every end_date in the data, in order:
for end in sorted(data_by_end):

    # Find the row with that end_date that produces the best partial solution
    # when combined with the best partial solutions for earlier end dates
    def candidates():
        for value, value2, prev_end, best_rows in best:
            for row in data_by_end[end]:
                if row['start_date'] <= prev_end:
                    continue
                yield value + row['value'], value2 - row['value2'], end, best_rows + (row,)


    new_best = max(candidates())

    # Add this partial solution as the new best so far if it beats the previous best
    if new_best[:2] > best[-1][:2]:
        best.append(new_best)

value, value2, end_date, rows = best[-1]
print('Best values:', value, -value2)
print('Rows:')
for row in rows:
    print(row)
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download