John John - 1 month ago 14
Python Question

A table into a graph (beautifulsoup in python )

It possible (is there an easy way )to get a table out of a website and then translate it into a graph not a table ?

Here is the code the code extracts a table into a table.

import the library used to query a website



import urllib2

#specify the url
wiki = "https://en.wikipedia.org/wiki/List_of_state_and_union_territory_capitals_in_India"

#Query the website and return the html to the variable 'page'
page = urllib2.urlopen(wiki)


#import the Beautiful soup functions to parse the data returned from the website
from bs4 import BeautifulSoup


#Parse the html in the 'page' variable, and store it in Beautiful Soup format
soup = BeautifulSoup(page)



all_tables=soup.find_all('table')


right_table=soup.find('table', class_='wikitable sortable plainrowheaders')
right_table





#Generate lists
A=[]
B=[]
C=[]
D=[]
E=[]
F=[]
G=[]
for row in right_table.findAll("tr"):
cells = row.findAll('td')
states=row.findAll('th') #To store second column data
if len(cells)==6: #Only extract table body not heading
A.append(cells[0].find(text=True))
B.append(states[0].find(text=True))
C.append(cells[1].find(text=True))
D.append(cells[2].find(text=True))
E.append(cells[3].find(text=True))
F.append(cells[4].find(text=True))
G.append(cells[5].find(text=True))

#import pandas to convert list to data frame
import pandas as pd
df=pd.DataFrame(A,columns=['Number'])
df['State/UT']=B
df['Admin_Capital']=C
df['Legislative_Capital']=D
df['Judiciary_Capital']=E
df['Year_Capital']=F
df['Former_Capital']=G
df

Answer

You can use read_html and select second table by [1] (read_html return list of DataFrames from all tables in webpage) with DataFrame.plot:

df = pd.read_html('https://en.wikipedia.org/wiki/List_of_state_and_union_territory_capitals_in_India', header=0, index_col=0)[1]
print (df)

import matplotlib.pyplot as plt
#there are 2 values of year, if need first add [0] if secind add [1] after split()
df.loc[2, 'Year capital was established'] = df.loc[2, 'Year capital was established'].split()[0]
df.loc[21, 'Year capital was established'] = df.loc[21, 'Year capital was established'].split()[0]
#convert to number years
df['Year capital was established'] = df['Year capital was established'].astype(int)
df.plot(x='Judiciary capitals', y='Year capital was established')
plt.show()

graph