scrape the news article from the archived link from the news website beautifulsoup python

This should help you:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv

kompas = requests.get('')
beautify = BeautifulSoup(kompas.content,'html5lib')

news = beautify.find_all('div', {'class','most__list clearfix'})
arti = []
for each in news:
  nu = each.find('div', {'class','most__count'}).text
  title = each.find('h4', {'class','most__title'}).text
  lnk = each.a.get('href')
  rcount = each.find('div', {'class','most__read'}).text
  r = requests.get(lnk)
  soup = BeautifulSoup(r.text,'html5lib')
  content = soup.find('div', class_ = "read__content").text.strip()

    'Top Number': nu,
    'Headline': title,
    'Link': lnk,
    'Most Read': rcount,

df = pd.DataFrame(arti)
df.to_csv('kompas.csv', index=False)

Screenshot of csv file:

enter image description here

CLICK HERE to find out more related problems solutions.

Leave a Comment

Your email address will not be published.

Scroll to Top