losee losee - 4 months ago 19
Python Question

Cant understand why i am getting a keyerror in my django python app

I know that a key error is raised when a dict() object is requested (using the format a = adict[key]) and the key is not in the dictionary. But my key is there. It's telling me that the 'embed' key is the problem

heres my code

def scrape_and_store_vlad():
url_two = 'http://www.example.net'
html = requests.get(url_two, headers=headers)
soup = BeautifulSoup(html.text, 'html5lib')
titles = soup.find_all('div', {'class': 'entry-pos-1'})

def make_soup(url):
the_comments_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(the_comments_page.text, 'html5lib')
comment = soupdata.find('div', {'class': 'article-body'})
para = comment.find_all('p')
kids = [child.text for child in para]
blu = str(kids).strip('[]')
return blu
name = 'vlad'
entries = [{'href': url_two + div.a.get('href'),
'src': url_two + div.a.img.get('data-original'),
'text': div.find('p', 'entry-title').text,
'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
'name': name,
'url': url_two + div.a.get('href')
} for div in titles][:6]

# scraping from vlad part two
titles_two = soup.find_all('div', {'class': 'entry-pos-2'})

entries_two = [{'href': url_two + div.a.get('href'),
'src': url_two + div.a.img.get('data-original'),
'text': div.find('p', 'entry-title').text,
'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
'name': name,
'url': url_two + div.a.get('href'),
'embed': url_two + div.a.get('href'),
} for div in titles_two][:6]

merged_vlad_entries = entries + entries_two

return merged_vlad_entries


def panties():
from lxml import html
pan_url = 'http://www.example.net'
shtml = requests.get(pan_url, headers=headers)
soup = BeautifulSoup(shtml.text, 'html5lib')
video_row = soup.find_all('div', {'class': 'video'})
name = 'pan videos'

def youtube_link(url):
youtube_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(youtube_page.text, 'html5lib')
video_row = soupdata.find_all('script', {'type': 'text/javascript'})
entries = [{'text': str(div),
} for div in video_row]

tubby = str(entries[4])
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tubby)
return urls

def embed(url):
new_embed = url.replace("watch?v=", "embed/")
return new_embed

entries = [{'href': div.a.get('href'),
'src': youtube_link(div.a.get('href'))[1],
'text': div.h4.text,
'comments': div.h4.text,
'name': name,
'url': div.a.get('href'),
'embed': embed(youtube_link(div.a.get('href'))[0]),
} for div in video_row][:3]
return entries


def save_the_scrapes():

from_world_star = scrape_and_store_world()
from_vlad_tv = scrape_and_store_vlad()
from_pan = panties()
mergence = from_world_star + from_vlad_tv + from_pan

random.shuffle(mergence)

for entry in mergence:
post = Post()
post.title = entry['text']
title = post.title
if not Post.objects.filter(title=title):
post.title = entry['text']
post.name = entry['name']
post.url = entry['url']
post.body = entry['comments']
post.image_url = entry['src']
post.video_path = entry['embed']
post.status = 'draft'
post.save()
return mergence


before I added the embed key everything worked fine. If anyone can spot my error please let me know where I went wrong. Thanks.

Answer

The key is clearly not there, otherwise you would not get a KeyError.

You do not set the key in your scrape_and_store_vlad method.

def scrape_and_store_vlad():

    entries = [{'href': url_two + div.a.get('href'),
                'src': url_two + div.a.img.get('data-original'),
                'text': div.find('p', 'entry-title').text,
                'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
                'name': name,
                'url': url_two + div.a.get('href')
                } for div in titles][:6]
Comments