CodeRabbit CodeRabbit - 5 months ago 57
Python Question

How to handle the KeyError in Python?

I am trying to do some data analysis in Python, whereby I am trying to use some Twitter data to find the number of tweets of different countries. This is the code I am using :

import json
import pandas as pd
import matplotlib.pyplot as plt

tweets_data=[]
with open('/home/surya/tweet.txt','r') as f:
for line in f:
try:
tweet= json.loads(line)
tweets_data.append(tweet)
except:
continue

tweet_table= pd.DataFrame()
tweet_table['country']= map(lambda tweet: tweet["place"]["country"] if tweet["place"] != None else None, tweets_data)

tweets_by_country = tweet_table['country'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Countries', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 countries', fontsize=15, fontweight='bold')
tweets_by_country[:5].plot(ax=ax, kind='bar', color='blue')


This was generating an error

KeyError "place"


So I modified the code into something like this :

import json
import pandas as pd
import matplotlib.pyplot as plt
tweets_data=[]

def keyCheck(key,arr,default):
if key in arr.keys():
return arr[key]
else:
return default

with open('/home/surya/tweet.txt','r') as f:
for line in f:
try:
tweet= json.loads(line)
tweets_data.append(tweet)
except:
continue

tweet_table= pd.DataFrame()
tweet_table['country']= map(lambda tweet: tweet["place"]["country"] if keyCheck("place",tweet,"#default") != None else None, tweets_data)

tweets_by_country = tweet_table['country'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Countries', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 countries', fontsize=15, fontweight='bold')
tweets_by_country[:5].plot(ax=ax, kind='bar', color='blue')


But this resulted in the error

AttributeError: list object has no attribute "keys"


The format of my data is:

{"created_at":"Thu Jun 16 13:15:13 +0000 2016","id":743431739238932480,"id_str":"743431739238932480","text":"I fucking hate Ramsey #ENGWAL #EURO2016 https:\/\/t.co\/wkFqOu8iwf","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":343618050,"id_str":"343618050","name":"SamuEars","screen_name":"S88Griff","location":"Derbados","url":null,"description":"27 years old, @RocesterFC1876 footballer, genuine, chilled out, opinionated, but most of all, wind up merchant","protected":false,"verified":false,"followers_count":496,"friends_count":272,"listed_count":1,"favourites_count":1915,"statuses_count":5505,"created_at":"Wed Jul 27 20:53:02 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/636136111191031809\/aQyj3bgK_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/636136111191031809\/aQyj3bgK_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/343618050\/1409857726","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":{"id":"232163114ebb8671","url":"https:\/\/api.twitter.com\/1.1\/geo\/id\/232163114ebb8671.json","place_type":"city","name":"Etwall","full_name":"Etwall, England","country_code":"GB","country":"United Kingdom","bounding_box":{"type":"Polygon","coordinates":[[[-1.608732,52.874969],[-1.608732,52.887677],[-1.594409,52.887677],[-1.594409,52.874969]]]},"attributes":{}},"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"ENGWAL","indices":[22,29]},{"text":"EURO2016","indices":[30,39]}],"urls":[],"user_mentions":[],"symbols":[],"media":[{"id":743431733853433856,"id_str":"743431733853433856","indices":[40,63],"media_url":"http:\/\/pbs.twimg.com\/media\/ClEzORsWMAAlcQ3.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/ClEzORsWMAAlcQ3.jpg","url":"https:\/\/t.co\/wkFqOu8iwf","display_url":"pic.twitter.com\/wkFqOu8iwf","expanded_url":"http:\/\/twitter.com\/S88Griff\/status\/743431739238932480\/photo\/1","type":"photo","sizes":{"small":{"w":680,"h":517,"resize":"fit"},"medium":{"w":1178,"h":896,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1178,"h":896,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":743431733853433856,"id_str":"743431733853433856","indices":[40,63],"media_url":"http:\/\/pbs.twimg.com\/media\/ClEzORsWMAAlcQ3.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/ClEzORsWMAAlcQ3.jpg","url":"https:\/\/t.co\/wkFqOu8iwf","display_url":"pic.twitter.com\/wkFqOu8iwf","expanded_url":"http:\/\/twitter.com\/S88Griff\/status\/743431739238932480\/photo\/1","type":"photo","sizes":{"small":{"w":680,"h":517,"resize":"fit"},"medium":{"w":1178,"h":896,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1178,"h":896,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1466082913585"}


The problem it seems is that "place" key is not available somewhere in my tweet.txt file.

Can someone suggest a solution to it or please point out where exactly I am going wrong?

EDIT

I just updated the code into this

import json
import pandas as pd
import matplotlib.pyplot as mp

tweets_data=[]
with open('/home/surya/tweet.txt','r') as f:
for line in f:
try:
tweet= json.loads(line)
tweets_data.append(tweet)
except:
continue

tweet_table= pd.DataFrame()
tweet_table['country'] = map(lambda tweet: tweet['place']['country'] if 'place' in tweet is not None and 'country' in tweet['place'] is not None else None, tweets_data)

tweets_by_country = tweet_table['country'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Countries', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 countries', fontsize=15, fontweight='bold')
tweets_by_country[:5].plot(ax=ax, kind='bar', color='blue')


And now I get this error

TypeError: argument of type 'NoneType' is not iterable

Answer
import json
import pandas as pd
import matplotlib.pyplot as plt


with open("testfile.txt", "r") as f:
    tweet_data = [json.loads(line) for line in f]

tweet_table= pd.DataFrame()

tweet_table['country'] = [tweet['place']['country'] for tweet in tweet_data if tweet['place']]

tweets_by_country = tweet_table['country'].value_counts()

fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Countries', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 countries', fontsize=15, fontweight='bold')
tweets_by_country[:5].plot(ax=ax, kind='bar', color='blue')

plt.show()

Okay I found the problem, it's that the tweet always contains a place as a key in the json, it's just that place will be null if there is no place, so you just have to ensure place isn't null before you try to access country. This is working for me, but I'm only getting UK because the data only has 1 tweet with a country