Vijayendra Shah Vijayendra Shah - 14 days ago 13
Python Question

Unexpected keyword argument python

I am passing the correct parameters, but I am still getting the following error:

Starting classification

Classification running ...

Traceback (most recent call last):
File "C:/Classify/classifier.py", line 95, in <module>

train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True)
File "C:/Classify/classifier.py", line 25, in train_model

cv = ShuffleSplit(n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)

TypeError: __init__() got an unexpected keyword argument 'test_fraction'

Process finished with exit code 1


My code is:

def train_model(X, Y, name, plot=False):

labels = np.unique(Y)

cv = ShuffleSplit(n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)

train_errors = []
test_errors = []

scores = []
pr_scores = defaultdict(list)
precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list)

roc_scores = defaultdict(list)
tprs = defaultdict(list)
fprs = defaultdict(list)

clfs = [] # for the median

cms = []

for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]

clf = LogisticRegression()
clf.fit(X_train, y_train)
clfs.append(clf)

train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
scores.append(test_score)

train_errors.append(1 - train_score)
test_errors.append(1 - test_score)

y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cms.append(cm)

for label in labels:
y_label_test = np.asarray(y_test == label, dtype=int)
proba = clf.predict_proba(X_test)
proba_label = proba[:, label]

fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
roc_scores[label].append(auc(fpr, tpr))
tprs[label].append(tpr)
fprs[label].append(fpr)

if plot:
for label in labels:
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
desc = "%s %s" % (name, genre_list[label])
plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])

all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))

#print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

#save the trained model to disk
joblib.dump(clf, 'saved_model/model_ceps.pkl')

return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)

Answer

It's the name of the parameter n_iterations, which is n_iter as explained in the ShuffleSplit documentation. Change the parameter name and the error will cease to exist.

You can send the following parameters with sklearn.cross_validation.ShuffleSplit, given that we use scikit-learn 0.15:

n
n_iter
test_size
train_size
indices
random_state
n_iterations