HalfPintBoy HalfPintBoy - 6 months ago 31
Python Question

Populating python matrix

I'm doing the splitting of the words from the text file in python. I've receive the number of row (c) and a dictionary (word_positions) with index. Then I create a zero matrix (c, index). Here is the code:

from collections import defaultdict
import re
import numpy as np


f = open('/Users/Half_Pint_Boy/Desktop/sentenses.txt', 'r')

for line in f:
c = c + 1

word_positions = {}

with open('/Users/Half_Pint_Boy/Desktop/sentenses.txt', 'r') as f:
index = 0
for word in re.findall(r'[a-z]+', f.read().lower()):
if word not in word_positions:
word_positions[word] = index
index += 1


My question: How can I populate the matrix to be able to get this:
matrix[c,index] = count
, where
- is the number of row,
-the indexed position and
-the number of counted words in a row


Try next:

import re
import numpy as np
from itertools import chain

text = open('/Users/Half_Pint_Boy/Desktop/sentenses.txt')

text_list = text.readlines()


for i in range(len(text_list)):

text_niz = []

for i in range(len(text_list)):
    text_niz.append(text_list[i].lower()) # перевел к нижнему регистру

slovo = []

for j in range(len(text_niz)):
    slovo.append(re.split('[^a-z]', text_niz[j])) # токенизация

for e in range(len(slovo)):

    while slovo[e].count('') != 0:
        slovo[e].remove('') # удалил пустые слова

slovo_list = list(chain(*slovo))
print (slovo_list) # составил список слов

slovo_list=list(set(slovo_list)) # удалил повторяющиеся

s = []

for i in range(len(slovo)):
    for j in range(len(slovo_list)):
        s.append(slovo[i].count(slovo_list[j])) # посчитал количество слов в каждом предложении

matr = np.array(s) # матрица вхождений слов в предложения
d = matr.reshape((c, x)) # преобразовал в матрицу 22*254