ds_user ds_user - 3 months ago 15
R Question

Minimise objective function using R

Can somebody help me in solving this to multivariate function parameters optimization in R, I have a data set like this. This is just a subset of data, dimension of the full dataset is

n type * m regions * 12 months
.

Month region type physics maths allsub
Jan r1 1 4 5 9
Feb r1 1 3 8 11
Mar r1 1 5 4 9
Apr r1 1 6 7 13
May r1 1 4 4 8
Jun r1 1 8 9 17
Jul r1 1 4 3 7
Aug r1 1 5 4 9
Sep r1 1 3 8 11
Oct r1 1 9 2 11
Nov r1 1 4 7 11
Dec r1 1 7 3 10
Jan r1 2 5 8 13
Feb r1 2 4 9 13
Mar r1 2 8 3 11
Apr r1 2 5 6 11
May r1 2 6 4 10
Jun r1 2 7 6 13
Jul r1 2 3 7 10
Aug r1 2 4 8 12
Sep r1 2 4 4 8
Oct r1 2 8 1 9
Nov r1 2 2 3 5
Dec r1 2 1 6 7

... ... .. ... ... ....
... ... .. ... ... ....


I have one more dataset which has maximum number of physics and maths students in each region. And my objective function is this,
100*(physics) + 65*(maths) >= 0
. I want to minimize this function and my constraints are
1. sum of physics and maths should always be equal to allsub for that region and month.
2. total number of physics students in a region every month should be less than maximum number of physics students available in that region.
3. total number of maths students in a region every month should be less than maximum number of maths students available in that region.

I am trying to use R. The whole idea is to find the right number of physics and maths students in each region/type/month minimizing the objective function and meeting the constraints. Can someone help me with this?

EDIT : As requested in the comments.
Here is the total capacity dataset. dataframe name = totalcap

Month region physicscap mathscap
1 Jan r1 9 13
2 Feb r1 7 17
3 Mar r1 13 7
4 Apr r1 11 13
5 May r1 10 8
6 Jun r1 15 15
7 Jul r1 7 10
8 Aug r1 9 12
9 Sep r1 7 12
10 Oct r1 17 3
11 Nov r1 6 10
12 Dec r1 8 9


Here is the script I have tried,

library(dplyr)
library(MASS)
library(Rsolnp)

Month <- c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')
region <- c('r1')
physicscap <- c(5,5,8,6,7,9,5,6,4,10,5,8)
mathscap <- c(5,8,5,8,5,10,5,5,8,5,8,5)

totalcap <- data.frame(Month,region,physicscap,mathscap)
#Constraints for the optimization.
constraints2 <- function(efforts){
# constraints are:
# 1. effort - allsub <= 0 in each region/month
#
efforts$effort_calculated <- efforts$physics + efforts+maths
reqeff <- summarise(group_by(efforts,region,Month),monthlyeffreg=sum(effort_calculated))
reqeffallsub <- summarise(group_by(efforts,region,Month),allsubsum=sum(allsub))
cons1 <- mutate(inner_join(reqeff,reqeffallsub,by=c('region'='region','Month'='Month'))
,diff=monthlyeffreg-allsubsum)
constout <- cons1$diff


# 2. sum(physics) - total physics available <= 0 in each region/month
#
phyreqeff <- summarise(group_by(efforts,region,Month),physicseff=sum(physics))
cons2 <- mutate(inner_join(totalcap,phyreqeff,by=c('region'='region','Month'='Month')),
diff=physicseff-physicscap)
constout <- c(constout,cons2$diff)


# 3. sum(maths) - total maths available <= 0 in each region/month
#
matreqeff <- summarise(group_by(efforts,region,Month),mathseff=sum(maths))
cons3 <- mutate(inner_join(totalcap,matreqeff,by=c('region'='region','Month'='Month')),
diff=mathseff-mathscap)
constout <- c(constout,cons3$diff)
constout
}


#Objective function to minimize the cost function.
objectivefunc <- function(efforts){
nb_physics <- sum(efforts$physics)
nb_maths <- sum(efforts$maths)
objective <- (100*nb_physics + 55*nb_maths - 110)
objective
}

Out2 <- solnp(pars = efforts,fun=objectivefunc,ineqfun=constraints2,ineqLB = rep(-100000,36),
ineqUB = rep(0,36), LB = rep(0,length(u)))


Here is the error I am getting,

Error in p0/vscale[(neq + 2):(nc + np + 1)] :
non-numeric argument to binary operator


Hope this clears the questions in comments. I tried my level best here, hope someone help me in solving this.

Answer

Here is an approach with lpSolveAPI:

dat <- data.frame(
    mon=rep(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),2),
    region="r1", 
    type=c(rep("1", 12), rep("2", 12)),
    physicsmin=1,
    mathsmin=1,
    allsub=c(9, 11, 9, 13, 8, 17, 7, 9, 11, 11, 11, 10, 13,13,11,11,10,13,10,12,8,9,5,7),
    stringsAsFactors=FALSE
)
dat
capdat <- data.frame(
    mon=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
    region="r1",
    physicscap=c(9,7,13,11,10,15,7,9,7,17,6,8),
    mathscap=c(13,17,7,13,8,15,10,12,12,3,10,9),
    stringsAsFactors=FALSE
)
capdat

Now for each month/region combination an optimization problem is to be solved. That is why we wrap the calculation in a function:

library(lpSolveAPI)
ntypes <- length(unique(dat[,"type"])) # number of types
typemap <- setNames(seq.int(ntypes), unique(dat[,"type"])) # map typename to 1,...,ntypes

solve_one <- function(subdat, capdat) {

    # create object
    lprec <- make.lp(0, ncol=2*ntypes) # for each type, two decision variables

    # By convention, we assume that the first ntypes variables are physics for type 1, ..., ntypes
    # and the second ntypes variables are maths

    # add objective and type
    set.objfn(lprec, obj=c(rep(100, ntypes), rep(65, ntypes))) 
    set.type(lprec, columns=seq.int(2*ntypes), type="integer") # no reals

    # add capacity constraints
    idx <- which(capdat[,"mon"]==subdat[1,"mon"] & capdat[,"region"]==subdat[1,"region"]) # lookup the right cap
    add.constraint(lprec, rep(1, ntypes), type="<=", rhs=capdat[idx,"physicscap"], indices=seq.int(ntypes))
    add.constraint(lprec, rep(1, ntypes), type="<=", rhs=capdat[idx,"mathscap"], indices=seq.int(ntypes+1, 2*ntypes))

    # add allsub equality constraints and minimum constraints
    for (typ in subdat[,"type"]) {
        add.constraint(lprec, c(1,1), type="=", rhs=subdat[typemap[typ], "allsub"], indices=c(typemap[typ], ntypes+typemap[typ]))
        add.constraint(lprec, 1, type=">=", rhs=subdat[typemap[typ],"physicsmin"], indices=typemap[typ])
        add.constraint(lprec, 1, type=">=", rhs=subdat[typemap[typ],"mathsmin"], indices=ntypes+typemap[typ])
    }

    # solution data.frame
    ans <- subdat[, c("mon", "region", "type")]

    # solve       
    if(solve(lprec)==0) {
        sol <- get.variables(lprec)
        for (i in seq.int(nrow(subdat))) {
            ans[i, "physics"] <- sol[typemap[subdat[i,"type"]]]
            ans[i, "maths"] <- sol[typemap[subdat[i,"type"]]+ntypes]
        }
    } else ans[,c("physics", "maths")] <- NA # no solution found

    return(ans)
}

Now we apply the function to each subdataset which includes all types for each month/region combination. We use a split/apply/combine approach here:

sp <- split(dat, list(dat[,"mon"], dat[,"region"]))
results <- lapply(sp, solve_one, capdat=capdat)
results <- do.call(rbind, results)
rownames(results) <- NULL
results

The code does not assume that for each month/region combination all types are present (some types may be omitted), however the solution will be wrong if there are several entries present for the same month/region/type combination. (the code would need to be adapted for this).