nicksheen nicksheen - 23 days ago 7
Python Question

Python3: HTTP Error 302 while using urllib

I want to read the value of different stocks from websites. Therefore I wrote this tiny script, which reads the page source and then parses out the value:

stock_reader.py



#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from re import search
from urllib import request


def main():
links = [
[
'CSG',
'UBS',
],
[
'http://www.tradegate.de/orderbuch.php?isin=CH0012138530',
'http://www.tradegate.de/orderbuch.php?isin=CH0244767585',
],
]

for i in in range(len(links[0])):
url = links[1][i]
htmltext = request.urlopen(url).read().decode('utf-8')
source = htmltext.splitlines()
for line in source:
if 'id="bid"' in line:
m = search('\d+.\d+', line)
print('{}'.format(m.string[m.start():m.end()]))


if __name__ == '__main__':
main()


sometimes it works but sometimes this error gets raised:

error message



Traceback (most recent call last):
File "./aktien_reader.py", line 39, in <module>
main()
File "./aktien_reader.py", line 30, in main
htmltext = request.urlopen(url).read().decode('utf-8')
File "/usr/lib/python3.3/urllib/request.py", line 160, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 696, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python3.3/urllib/request.py", line 479, in open
response = meth(req, response)
File "/usr/lib/python3.3/urllib/request.py", line 591, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.3/urllib/request.py", line 511, in error
result = self._call_chain(*args)
File "/usr/lib/python3.3/urllib/request.py", line 451, in _call_chain
result = func(*args)
File "/usr/lib/python3.3/urllib/request.py", line 686, in http_error_302
self.inf_msg + msg, headers, fp)
urllib.error.HTTPError: HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
The last 30x error message was:
Found


My question is: why is it happening and how can I avoid it?

Thanks in advance.

Answer Source

This happens probably because the destination site uses cookies and redirect you in case you don't send cookies.

What you can use is something like that :

from http.cookiejar import CookieJar

url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"

req = urllib.request.Request(url, None, {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Accept-Encoding': 'gzip, deflate, sdch','Accept-Language': 'en-US,en;q=0.8','Connection': 'keep-alive'})

cj = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
response = opener.open(req)
response.read()

This way, you support Cookies and website will allow you to get the page :-)

Another way would be to use the requests package which is really simplest to use. In your case, it would lead to :

import requests

url = "http://www.tradegate.de/orderbuch.php?isin=CH0012138530"
r = requests.get(url, headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}, timeout=15)
print(r.content)