drheinrich940 drheinrich940 - 1 month ago 21
Java Question

ArrayIndexOutOfBoundsException:-1

I'm coding some RL behaviors on a pacman bot and I messed things up with one of my lists in one of my functions arg_allmax or chooseAction

Here is the code of my class:

package rl;

import java.util.ArrayList;
import java.util.Hashtable;

public class Qlearn {
private double epsilon = 0.1; // Epsilon parameter for the Epsilon Greedy Strategy
private double alpha = 0.2; // Alpha parameter: used to influence o the refresh of Q
private double gamma = 0.9; // used to notice or not the feedback of the next action ; if =0 -> no feed back

private int actions[];
private Hashtable< Tuple<Integer,Integer>, Double> q; // Q(s,a) : hashTable : <state,action> -> value of q


public Qlearn(int[] actions) {
this.actions = actions;
q = new Hashtable< Tuple<Integer,Integer>, Double>();
}

public Qlearn(int[] actions, double epsilon, double alpha, double gamma) {
this.actions = actions;
this.epsilon = epsilon;
this.alpha = alpha;
this.gamma = gamma;
q = new Hashtable< Tuple<Integer,Integer>, Double>();
}

public Double getQ(int id_state, int id_action) {
// get the value of Q for the state of id_state and the action id_action ( return 0 if the value is not in the hashtable )
Tuple<Integer,Integer> t = new Tuple<Integer,Integer> (id_state, id_action); // we creatte a new integer object Tubple with the value of id_state and id_action
Double v = q.get(t);
if(v != null) return v;
else return 0.0;
}

// get the argmax of a list
public int argmax(double[] list) {
int arg=-1;
double max= 0;
for ( int i = 0; i<list.length; i++){
if ( list[i]>max ){
max = list[i];
arg = i;
}
}
return arg;
}

// get all the argmax if the argmax has several iterations
public ArrayList<Integer> arg_allmax(double[] list) {
ArrayList<Integer> args = new ArrayList<Integer>();
int a = argmax(list);
for ( int i = 0; i< list.length; i++){
if (list[i] == list[a]){
args.add(i);
}
}
return args;
}

// get the max of the list
public double max(double[] list) {
double max_ = -1e20;
int a = argmax(list);
max_ = list[a];
return max_;
}


/*
* Fonction that updates the hashtable
* for the action id_action and the state id_state
* if Q(s,a) had an old value, we allocate it the new value+ alpha(value - old_value)
* if Q(s,a) had not an old value : we allocate reward
*/
public void learnQ(int id_state, int id_action, double reward, double value) {
Tuple<Integer,Integer> t = new Tuple<Integer,Integer>(id_state,id_action);
Double oldv = q.get(t);

if(oldv == null) {

q.put(t, reward);
} else {

q.put(t, oldv+alpha*(value-oldv));
}
}

/*
* Here is the Epsilon Greedy strategy
* with proba epsilon :we choose a random action
* avec proba 1-eps : we choose the most favorable action in fonction of Q(s,a)
*/
public int chooseAction(int id_state) {
int action = -1;
if(Math.random() < epsilon) {

int i = (int)(Math.random()*actions.length);
action = actions[i];

} else {
double[] tab = new double[actions.length];
ArrayList<Integer> argmaxarray = new ArrayList<Integer>();
for ( int i=0; i>actions.length; i++){
tab[i]=actions[i];
}
argmaxarray=arg_allmax(tab);
int i=(int)(Math.random()*argmaxarray.size());
action=argmaxarray.get(i);

}

return action;
}


/*
* Learning after the occurence of a move
* 1) get the most profitable potential action from Q(s',a)
* 2) call learnQ
*/
public void learn(int id_state1, int id_action1, double reward, int id_state2) {
int futureAction = 0;
futureAction = chooseAction(id_state2);
double maxqnew = 0; // REMPLIR
maxqnew = getQ(futureAction, id_state2);


learnQ(id_state1, id_action1, reward, reward + gamma*maxqnew);

}

// Affiche Q(s,a)
private void printQvalue(int id_state) {
for(int action : actions) {
Tuple<Integer,Integer> t = new Tuple<Integer,Integer>(id_state,action);
Double v = q.get(t);
System.out.print(v+" ");
}
System.out.println();
}


Here is what eclipse tells me :

Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException: -1
at rl.Qlearn.arg_allmax(Qlearn.java:54)
at rl.Qlearn.chooseAction(Qlearn.java:108)
at rl.Qlearn.learn(Qlearn.java:138)


I think it comes somewhere in the else of the chooseAction method using the all_argmax fonction but I cannot find the exact error!

Here are the two involved methods (so it's more readable for you):

all_argmax :

public ArrayList<Integer> arg_allmax(double[] list) {
ArrayList<Integer> args = new ArrayList<Integer>();
int a = argmax(list);
for ( int i = 0; i< list.length; i++){
if (list[i] == list[a]){
args.add(i);
}
}
return args;
}


chooseAction :

public int chooseAction(int id_state) {
int action = -1;
if(Math.random() < epsilon) {

int i = (int)(Math.random()*actions.length);
action = actions[i];

} else {
double[] tab = new double[actions.length];
ArrayList<Integer> argmaxarray = new ArrayList<Integer>();
for ( int i=0; i>actions.length; i++){
tab[i]=actions[i];
}
argmaxarray=arg_allmax(tab);
int i=(int)(Math.random()*argmaxarray.size());
action=argmaxarray.get(i);

}

return action;
}

Answer

your IndexOutOfBoundsException occurs because of your argmax([]) method, either because of an empty array or because all the doubles in the list are negative.

In either of these cases the int arg = -1 variable is never set to another value than -1, which is obviously out of bounds in any scenario since -1 is not a valid array position.

The best course of action would be to either check if your array is empty before passing it to argmax or checking if the return value is valid (not -1) before doing anything with it. And also changing double max = 0 to double max = Double.NEGATIVE_INFINITY