ak4zh ak4zh - 1 year ago 40
Python Question

Web data from table to CSV

This is the code:

import requests
from bs4 import BeautifulSoup
import re

url = 'https://olps.cgtransport.org/OLTP/Tax/VehicleStatus.aspx'

reg_number = ['CG04DS7961']

for i in reg_number:
reg1 = i[:-4]
reg2 = i[-4:]

payload = { '__VIEWSTATEFIELDCOUNT' : '3',
'__VIEWSTATE' : '5Rx7Jezv02wRDXtT58JN6uHfoZf2BCTkLyrML9D/7VLW1gz5HhU8sjA2R/7tOPruA/C5yDKTBJBtetPEAxUAPV6iDKZ9TrCt+JTtG9yZisuK5rgWRPQQ9iCqmEFBIGT9K/pVMPJVr2BE+S/S/wtmyTiZRL5zAnbBXZ+Z6xTQcmMj1VSq8vlwmx+0jsZpOHSu46nUZhurNclrV469rApFvORQTcnI2iyS4moLgwH6muz/umtBfTw31jzVsP/3R0u',
'__VIEWSTATE1' : 'pFQlf7Tpik2lCjknuojNbZw9FEYHiUYYGzxOYiwOGcSqt8nHzrZpJW8fGseyQWsG2+r12CzsbOEsxEyBh73/YHGDyK52IHBN1JLYgV45SkLp2jJqaDSbeSE6/3Xfibfd8PXX0SzoyztUTYb30K0Y9X1zTBKl6yP08Ui4I9Wuks7+4qRBDhOLedsrjBCrlWZLgUTIUgiye9UeIfQ/Q8sTR9NOM1N91b38x4+C7kaXhqn/ayrrVxJJm1uXE1ua48z',
'__VIEWSTATE2' : 'SYo3Su3gkp4339oFMeN+Q+/7XFFqlTTs4RAHi08VV252mno3weI5t9jg6ns4mhcrRQLa0bOM2Q/y/qEgkGPXoRxh1QBC/DyfGlLyVc/umb8WOdA1DDypkEt+oRRmI48fX1L6/scDrVZKUQWtF2Pm87WPQcYLP19h5vHXqGIvTHOIdoLzjC',
'__VIEWSTATEGENERATOR' : '34956357',
'__EVENTVALIDATION' : 'ygss/i7NxWFitcgCI9h84GSJJl8UM4sb1apUvzZIv1T1PL/JHswnbZ01G31EtP5I3zrr3rZRL0Hb6aAnrgkmqg7B70FsbNrF9hZ9eFjIGJKw7YBq+G+6hHXE1hYZu3i23uu0Lhdkm+S2An6ptxA+dW5P7+o=', 'ctl00$ContentPlaceHolder1$txtregPart1' : reg1,
'ctl00$ContentPlaceHolder1$txtregPart2' : reg2,
'ctl00$ContentPlaceHolder1$btnshow' : 'Search' }

r = requests.post(url, data=payload)

soup = BeautifulSoup(r.content, 'html5lib')

table = soup.find('table', attrs = {'id':'ctl00_ContentPlaceHolder1_tbPermit'})
data = table.text
headers = re.findall('.+:', data)
print(data)


Code Output:



Registration Number :


CG04DS7961


Registration Date :


20/09/2010




RTO Name :


RAIPUR


Tax Type :


LIFE TIME




Owner's Name :


PRATIK DEWANGAN


Father's Name :


.




Vehicle Class :


NON-TRANSPORT VEHICLE


Vehicle Sub Class :


MCYCLE MOTOR CYCLE




Vehicle Manufacturer :


TVS MOTORS LTD


Vehicle Model :


SCOOTYPEP+




Manufacturer Date :


9/2010


Seating Capacity :


2




UnLaden Weight :


95


Laden Weight :


0




Engine Number :


OG3FA2172150


Chassis Number :


MD626BG39A2F97895




Tax Paid Upto :





Tax Clearance Upto :







Insurance Upto :





Fitness Upto :


19/09/2025


The desired output are only the values in CSV format. I don't need the headers.

I have already searched on Stack Overflow for a solution

But it didn't work for me, as I can't use the Pandas module. I want to run this code on Pythonista which does not support Pandas. And other posts had a different table format than mine for this specific website.

I just want the values as:

"CG04DS7961","20/09/2010","RAIPUR","LIFE TIME","PRATIK DEWANGAN","......."

Answer Source

Base on your code, you can find the specific element and get the text with BeautifulSoup, and then append the text to a string list, after that write the list into csv file, below is the amended code:

import requests
from bs4 import BeautifulSoup
import re
import csv #import csv

url = 'https://olps.cgtransport.org/OLTP/Tax/VehicleStatus.aspx'

reg_number = ['CG04DS7961']

for i in reg_number:
    reg1 = i[:-4]
    reg2 = i[-4:]

    payload = { '__VIEWSTATEFIELDCOUNT' : '3', 
'__VIEWSTATE' : '5Rx7Jezv02wRDXtT58JN6uHfoZf2BCTkLyrML9D/7VLW1gz5HhU8sjA2R/7tOPruA/C5yDKTBJBtetPEAxUAPV6iDKZ9TrCt+JTtG9yZisuK5rgWRPQQ9iCqmEFBIGT9K/pVMPJVr2BE+S/S/wtmyTiZRL5zAnbBXZ+Z6xTQcmMj1VSq8vlwmx+0jsZpOHSu46nUZhurNclrV469rApFvORQTcnI2iyS4moLgwH6muz/umtBfTw31jzVsP/3R0u', 
'__VIEWSTATE1' : 'pFQlf7Tpik2lCjknuojNbZw9FEYHiUYYGzxOYiwOGcSqt8nHzrZpJW8fGseyQWsG2+r12CzsbOEsxEyBh73/YHGDyK52IHBN1JLYgV45SkLp2jJqaDSbeSE6/3Xfibfd8PXX0SzoyztUTYb30K0Y9X1zTBKl6yP08Ui4I9Wuks7+4qRBDhOLedsrjBCrlWZLgUTIUgiye9UeIfQ/Q8sTR9NOM1N91b38x4+C7kaXhqn/ayrrVxJJm1uXE1ua48z', 
'__VIEWSTATE2' : 'SYo3Su3gkp4339oFMeN+Q+/7XFFqlTTs4RAHi08VV252mno3weI5t9jg6ns4mhcrRQLa0bOM2Q/y/qEgkGPXoRxh1QBC/DyfGlLyVc/umb8WOdA1DDypkEt+oRRmI48fX1L6/scDrVZKUQWtF2Pm87WPQcYLP19h5vHXqGIvTHOIdoLzjC', 
'__VIEWSTATEGENERATOR' : '34956357', 
'__EVENTVALIDATION' : 'ygss/i7NxWFitcgCI9h84GSJJl8UM4sb1apUvzZIv1T1PL/JHswnbZ01G31EtP5I3zrr3rZRL0Hb6aAnrgkmqg7B70FsbNrF9hZ9eFjIGJKw7YBq+G+6hHXE1hYZu3i23uu0Lhdkm+S2An6ptxA+dW5P7+o=', 'ctl00$ContentPlaceHolder1$txtregPart1' : reg1, 
'ctl00$ContentPlaceHolder1$txtregPart2' : reg2, 
'ctl00$ContentPlaceHolder1$btnshow' : 'Search' }

    r = requests.post(url, data=payload)

    soup = BeautifulSoup(r.content, 'lxml')

    table = soup.find('table', attrs = {'id':'ctl00_ContentPlaceHolder1_tbPermit'})
    resultList=list() # resultList to store the find result
    #find the element text and append the result with string format to resultList 
    for i in table.find_all(attrs={"align":"left"}):
        bElement = i.font.text
        if str(bElement) != "":
            resultList.append(str(bElement))
    #write the result to output.csv
    with open("output.csv",'wb') as resultFile:
        wr = csv.writer(resultFile, dialect='excel')
        wr.writerows([resultList])
    print resultList #print the resultList

The output of resultList is :

['CG04DS7961', '20/09/2010', 'RAIPUR', 'LIFE TIME', 'PRATIK DEWANGAN', '.', 'NON-TRANSPORT VEHICLE', 'MCYCLE MOTOR CYCLE', 'TVS MOTORS LTD', 'SCOOTYPEP+', '9/2010', '2', '95', '0', 'OG3FA2172150', 'MD626BG39A2F97895', '19/09/2025']
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download