Sitz Blogz Sitz Blogz - 6 months ago 83
Python Question

Pandas Seaborn Swarmplot doesn't plot

I am trying to plot a seaborn swarmplot where col[2] is the freq and col[3] are the classes to be grouped by. Input is given below and the code too.
Input

tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational


Code:

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid", color_codes=True)

df = pd.read_csv('input.csv', header = None)

df.columns = ['keyword','freq','class']

ax = sns.swarmplot(x="class", y="freq", data=df)

plt.show()


The code wouldn't plot nor would give any error. Any suggestion to optimize the code ?

Answer

I think you need first read_csv, then create new column class by concanecate with fillna and last strip whitespaces:

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io

temp=u"""tweetcricscore 51 high active
tweetcricscore 46 event based
tweetcricscore 12 event based
tweetcricscore 46 event based
tweetcricscore 1 viewers 
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), 
                 sep="\s+", #separator is arbitrary whitespace
                 names=['keyword','freq','class1','class2']) #set new col names
df['class'] = df['class1'] + ' ' + df['class2'].fillna('')
df['class'] = df['class'].str.strip()
print df
           keyword  freq       class1  class2        class
0   tweetcricscore    51         high  active  high active
1   tweetcricscore    46        event   based  event based
2   tweetcricscore    12        event   based  event based
3   tweetcricscore    46        event   based  event based
4   tweetcricscore     1      viewers     NaN      viewers
5   tweetcricscore   178      viewers     NaN      viewers
6   tweetcricscore    46  situational     NaN  situational
7   tweetcricscore    23  situational     NaN  situational
8   tweetcricscore     1  situational     NaN  situational
9   tweetcricscore     8  situational     NaN  situational
10  tweetcricscore    56  situational     NaN  situational

sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()

graph

Solution if column class not contains whitespaces:

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io

temp=u"""tweetcricscore 51 highactive
tweetcricscore 46 eventbased
tweetcricscore 12 eventbased
tweetcricscore 46 eventbased
tweetcricscore 1 viewers 
tweetcricscore 178 viewers
tweetcricscore 46 situational
tweetcricscore 23 situational
tweetcricscore 1 situational
tweetcricscore 8 situational
tweetcricscore 56 situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), 
                 sep="\s+", #separator is arbitrary whitespace
                 names=['keyword','freq','class']) #set new col names
print df
           keyword  freq        class
0   tweetcricscore    51   highactive
1   tweetcricscore    46   eventbased
2   tweetcricscore    12   eventbased
3   tweetcricscore    46   eventbased
4   tweetcricscore     1      viewers
5   tweetcricscore   178      viewers
6   tweetcricscore    46  situational
7   tweetcricscore    23  situational
8   tweetcricscore     1  situational
9   tweetcricscore     8  situational
10  tweetcricscore    56  situational

sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()

graph1

EDIT2:

If separator is , use:

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import io

temp=u"""tweetcricscore,51,high active
tweetcricscore,46,event based
tweetcricscore,12,event based
tweetcricscore,46,event based
tweetcricscore,1,viewers
tweetcricscore,178,viewers
tweetcricscore,46,situational
tweetcricscore,23,situational
tweetcricscore,1,situational
tweetcricscore,8,situational
tweetcricscore,56,situational"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), names=['keyword','freq','class'])
print df
           keyword  freq        class
0   tweetcricscore    51  high active
1   tweetcricscore    46  event based
2   tweetcricscore    12  event based
3   tweetcricscore    46  event based
4   tweetcricscore     1      viewers
5   tweetcricscore   178      viewers
6   tweetcricscore    46  situational
7   tweetcricscore    23  situational
8   tweetcricscore     1  situational
9   tweetcricscore     8  situational
10  tweetcricscore    56  situational

sns.set(style="whitegrid", color_codes=True)
ax = sns.swarmplot(x="class", y="freq", data=df)
plt.show()