The good practice in scraping for the name, Price, links we need to have a good error handling for each of the fields we’re scraping. Something like below
import requests
import json
from bs4 import BeautifulSoup
header = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'}
base_url = "https://www.n11.com/super-firsatlar"
r = requests.get(base_url,headers=header)
if r.status_code == 200:
soup = BeautifulSoup(r.text, 'html.parser')
books = soup.find_all('li',attrs={"class":"column"})
result=[]
for book in books:
title=book.find('h3').text.strip()
link=base_url +book.find('a')['href']
picture = base_url + book.find('img')['src']
first_price = book.find('a', attrs={'class': 'newPrice'}).find('ins').text[:10].strip().strip()+" TL"
if book.find('a', attrs={'class': 'oldPrice'}):
old_price = book.find('a', attrs={'class': 'oldPrice'}).find('del').get_text(strip=True)
single ={'title':title,'link':link,'picture':picture,'first_price':first_price,'old_price':old_price}
result.append(single)
with open('book.json','w') as f:
json.dump(result ,f,indent=4)
else:
print(r.status_code)
CLICK HERE to find out more related problems solutions.