HelloWorld4382 - 1 year ago 58
Python Question

I have two files, one containing over 200 tweets, and another containing key words and values. a typical tweet looks like: (I also provided my code below)

``````[41.923916200000001, -88.777469199999999] 6 2011-08-28 19:24:18 My life is a moviee. ( only the number in brackets and the words after the time are relevant)
``````

and the keywords look something like

``````love,10
like,5
best,10
hate,1
``````

With the two numbers at the beginning of the tweet, I use that to determine what region the tweet was made in (shown below in my code). & For each individual tweet (each line in the file), depending on the number of keywords in the tweet, I add them, divided by the total of values associated with them (per tweet) which gives me the score. My question is, how would I be able to total the scores for all the tweets in a region and divide that by the number of tweets in that region? Below, where I put happynessTweetScore, is how I calculated the score for the individual tweets in the file (each line) that actually contain keywords. For this part, I'm not sure how to add all the values depending on the region, and divide them depending on the number of tweets in that region? like should I add them to a list depending on their region then add?? i don't know.
I started it like this:

``````def score(tweet):
total = 0
total_value = 0
for word in tweet:
if word in sentiments:
total_value += sentiments[word]
total_count += 1
``````

But I dont know how to use SOMETHING like this in order to total the scores for all of the tweets in each region indivdually and divide that by the number of tweets in that region?

I divided the tweets into four regions (latitude, long) using these values (rectangle) ALL THE WAY at the bottom of the code:

``````p1 = (49.189787, -67.444574)
p2 = (24.660845, -67.444574)
p3 = (49.189787, -87.518395)
p4 = (24.660845, -87.518395)
p5 = (49.189787, -101.998892)
p6 = (24.660845, -101.998892)
p7 = (49.189787, -115.236428)
p8 = (24.660845, -115.236428)
p9 = (49.189787, -125.242264)
p10 = (24.660845, -125.242264)

from collections import Counter
try:
keyW_Path = input("Enter file named keywords: ")
keyFile = open(keyW_Path, "r")
except IOError:
exit()
# Read the keywords into a list
keywords = {}
wordFile = open('keywords.txt', 'r')
word = line.replace('\n', '')
if not(word in keywords.keys()): #Checks that the word doesn't already exist.
keywords[word] = 0 # Adds the word to the DB.
wordFile.close()
# Read the file name from the user and open the file.
try:
tweet_path = input("Enter file named tweets: ")
tweetFile = open(tweet_path, "r")
except IOError:
exit()
#Calculating Sentiment Values
with open('keywords.txt') as f:
sentiments = {word: int(value) for word, value in (line.split(",") for line in f)}

with open('tweets.txt') as f:
for line in f:
values = Counter(word for word in line.split() if word in sentiments)
if not values:
continue
keyW = ["love", "like", "best", "hate", "lol", "better", "worst", "good", "happy", "haha", "please", "great", "bad", "save", "saved", "pretty", "greatest", 'excited', 'tired', 'thanks', 'amazing', 'glad', 'ruined', 'negative', 'loving', 'sorry', 'hurt', 'alone', 'sad', 'positive', 'regrets', 'God']
with open('tweets.txt') as oldfile, open('newfile.txt', 'w') as newfile:
for line in oldfile:
if any(word in line for word in keyW):
newfile.write(line)
def score(tweet):
total = 0
for word in tweet:
if word in sentiments:
total += 1
def total(score):
sum = 0
for number in score:
if number in values:
sum += 1
#Classifying the regions
class Region:
def __init__(self, lat_range, long_range):
self.lat_range = lat_range
self.long_range = long_range
def contains(self, lat, long):
return self.lat_range[0] <= lat and lat < self.lat_range[1] and\
self.long_range[0] <= long and long < self.long_range[1]
eastern = Region((24.660845, 49.189787), (-87.518395, -67.444574))
central = Region((24.660845, 49.189787), (-101.998892, -87.518395))
mountain = Region((24.660845, 49.189787), (-115.236428, -101.998892))
pacific = Region((24.660845, 49.189787), (-125.242264, -115.236428))

eastScore = 0
centralScore = 0
pacificScore = 0
mountainScore = 0
happyScoreE = 0

for line in open('newfile.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1])  #Stripping the ]
if eastern.contains(lat, long):
eastScore += score(line)
elif central.contains(lat, long):
centralScore += score(line)
elif mountain.contains(lat, long):
mountainScore += score(line)
elif pacific.contains(lat, long):
pacificScore += score(line)
else:
continue
``````

Lets say - as you said, we have the file containting the data like:

``````love,10
movie,5
``````

First of all, create the dictionary from the file.

``````kw_to_score = {}
kw_file = 'keywords.txt'
with open(kw_file, 'r') as kwf:
word, score = line.split(',')
kw_to_score[word] = int(score)
``````

One we done it, we need to create the score function:

``````def score(tweet, keywords):
score = 0
count = 0
for word in tweet.split(): # split words by spaces
if word in keywords:
score += keywords[word]
count += 1
return score, count
``````

After that, continue..

``````class Region:
def __init__(self, lat_range, long_range):
self.lat_range = lat_range
self.long_range = long_range
self.score = 0 # add new field
self.quantity = 0 # add new field
def contains(self, lat, long):
return self.lat_range[0] <= lat and lat < self.lat_range[1] and\
self.long_range[0] <= long and long < self.long_range[1]

eastern = Region((24.660845, 49.189787), (-87.518395, -67.444574))
central = Region((24.660845, 49.189787), (-101.998892, -87.518395))
mountain = Region((24.660845, 49.189787), (-115.236428, -101.998892))
pacific = Region((24.660845, 49.189787), (-125.242264, -115.236428))

for line in open('newfile.txt'):
line = line.split(" ")
lat = float(line[0][1:-1]) #Stripping the [ and the ,
long = float(line[1][:-1])  #Stripping the ]
for region in (eastern, central, mountain, pacific):
if region.contains(lat, long):
region_score, count = score(line, kw_to_score) # pass the extra dict with keywords mapped to score
region.score += region_score
region.quantity += count
``````

Then all you need to do is just go for:

``````print(eastern.score / eastern.quantity) # That would give you the avg.
``````
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download