Jaffer Wilson Jaffer Wilson - 3 years ago 117
Python Question

Python3 code error while downloading the images

Here is the code that I am using to download images from the Google Images.:

def get_soup(url, header):
return BeautifulSoup(urllib.request.urlopen(urllib.request.Request(url, headers=header)), 'html.parser')


def get_images_for_term(text, width, height, number_of_images=3):
search_keywords = ''
collect_valid_urls = []
query = text
if not (height and width) == '1':
keyword_keys = ' high quality background imagesize:' + width + 'x' + height
else:
keyword_keys = ' high quality background'
query = query + keyword_keys
max_images = number_of_images

try:
if not ((height == width) and (height and width) == '1'):
search_keywords = text + '_' + height + 'by' + width

else:
search_keywords = text
os.makedirs(search_keywords)
except OSError as e:
if e.errno != 17:
raise
pass

query = query.split()
query = '+'.join(query)
url = "google Url"

header = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/43.0.2357.134 Safari/537.36"}
soup = get_soup(url, header)
ActualImages = []
for a in soup.find_all("div", {"class": "rg_meta"}):
link = json.loads(a.text)["ou"]

ActualImages.append(link)

Stop_Counter = 0
for i, img in enumerate(ActualImages):

try:
print(img)
req = urllib.request.Request(img, headers={'User-Agent': header})
raw_img = urllib.request.urlopen(req, None, 15).read()

print(raw_img[0])
if raw_img[0] != '<':
image = np.asarray(bytearray(raw_img), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
try:
blur_map, score, blurry = estimate_blur(image)
except IOError and AttributeError:
blurry = False
if not blurry:
collect_valid_urls.append(img)
Output_file = open(search_keywords + "/" + str(Stop_Counter + 1) + ".jpg", 'wb')

Output_file.write(raw_img)
Output_file.close()
Stop_Counter += 1
if Stop_Counter == max_images:
print("Done with downloading the images")
break
else:
print("Image is Blurry.")

except Exception as e:
print("could not load : " + img)

print(e)


get_images_for_term('cats', '200', '300')


I am getting the following error:

https://i.pinimg.com/736x/ec/65/86/ec658681dada104797b3f1f49026c7f1--cat-wallpaper-iphone-wallpaper.jpg
could not load : https://i.pinimg.com/736x/ec/65/86/ec658681dada104797b3f1f49026c7f1--cat-wallpaper-iphone-wallpaper.jpg
expected string or bytes-like object


Kindly, help me improve the code so that it will give me no errors again.

Answer Source

urlopen accepts a string url as parameter, you should pass your link directly to :

raw_img = urllib.request.urlopen(img, timeout=15).read()

or fix the request objet, without declaring the previous header inside a new dictionary for headers:

req = urllib.request.Request(img, headers=header)
raw_img = urllib.request.urlopen(req, None, 15).read()
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download