Mona Jalal Mona Jalal - 1 month ago 20
Python Question

ValueError: too many values to unpack dealing with csv file

I have the following code:

1 import gensim
2 import nltk
3 from gensim.models import word2vec
4 from nltk.corpus import stopwords
5 from nltk.corpus import wordnet
6 import logging
7 import re
8 import itertools
9 import glob
10 from collections import defaultdict
11 import csv
12 from nltk.stem.wordnet import WordNetLemmatizer
13 import os
14 import os.path
15
16 stopwords = nltk.corpus.stopwords.words('english')
17
18 path = "/home/mona/computer_vision/imgur/tiny_comments/*.txt"
19 files = glob.glob(path)
20 csv_file_complete = open("tiny_graph.csv", "wb")
21 stat_csv_file = open("tiny_stat.csv", "r")
22 csv_reader = csv.reader(stat_csv_file)
23 lemmatizer = WordNetLemmatizer()
24 list_of_rows = []
25
26 with open('swear_words_uniq.txt') as swear_words_file:
27 swear_words = swear_words_file.read()
28 swear_words = re.sub("[^a-zA-Z]", ' ', swear_words).lower().split()
29 swear_words_file.close()
30
31
32 for file1, file2 in itertools.combinations(files, 2):
33 with open(file1) as f1:
34 f1_text = f1.read()
35 f1_text = re.sub(r'^https?:\/\/.*[\r\n]*', '',f1_text, flags=re.MULTILINE)
36 f1_words = re.sub("[^a-zA-Z]", ' ', f1_text).lower().split()
37 lemmatized_f1_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f1_words if w not in stopwords]
38 cleaned_f1_words = [w for w in lemmatized_f1_words if w not in swear_words and len(w) > 2]
39 f1.close()
40 with open(file2) as f2:
41 f2_text = f2.read()
42 f2_words = re.sub("[^a-zA-Z]", ' ', f2_text).lower().split()
43 lemmatized_f2_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f2_words if w not in stopwords]
44 cleaned_f2_words = [w for w in lemmatized_f2_words if w not in swear_words and len(w) > 2]
45 f2.close()
46 f1_head, f1_tail = os.path.split(file1)
47 f2_head, f2_tail = os.path.split(file2)
48 tail_to_numbers = {ftail: fnum for fnum, ftail in csv_reader}
49 stat_csv_file.seek(0)
50 try:
51 file1_file_number = tail_to_numbers[f1_tail]
52 file2_file_number = tail_to_numbers[f2_tail]
53 except KeyError as e:
54 print(e)
55 continue
56 else:
57 row_complete = [file1_file_number.strip(), file2_file_number.strip()]
58 list_of_rows.append(row_complete)
59 print(len(list_of_rows))
60 a_complete = csv.writer(csv_file_complete, delimiter=',')
61 for row in list_of_rows:
62 print(row)
63 a_complete.writerow(row)
64
65 csv_file_complete.close()


And I get this error:

$ python test_tiny.py
Traceback (most recent call last):
File "test_tiny.py", line 48, in <module>
tail_to_numbers = {ftail: fnum for fnum, ftail in csv_reader}
File "test_tiny.py", line 48, in <dictcomp>
tail_to_numbers = {ftail: fnum for fnum, ftail in csv_reader}
ValueError: too many values to unpack


The first 5 lines of tiny_stat.csv looks like this:

$ head -5 tiny_stat.csv
1,002qtwH.txt,bed,47%,dog,55%
2,0066Z9W.txt,person,57%
3,0082B22.txt,
4,008BdUz.txt,train,68%
5,00c5Zm8.txt,train,61%,car,59%,car,58%


in tiny_graph.csv, I want to create lines like this:
1, 80

where 1 and 80 are the files numbers of the text files opened which I find their numbers from tiny_stat.csv.

Answer

Change {ftail: fnum for fnum, ftail in csv_reader}

To {row[1]: row[0] for row in csv_reader}

Comments