is there a limited number of scraped data?

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
import pandas as pd

def main(req, num):
    r = req.get(
    soup = BeautifulSoup(r.content, 'html.parser')
        data = [(x.select_one("span.updated").text, x.findAll("a")[1].text, x.select_one("div.entry-content").get_text(strip=True)) for x in
        return data
    except AttributeError:
        return False

with ThreadPoolExecutor(max_workers=30) as executor:
    with requests.Session() as req:
        fs = [executor.submit(main, req, num) for num in range(1, 673)]
        allin = []
        for f in fs:
            f = f.result()
            if f:
        df = pd.DataFrame.from_records(
            allin, columns=["Date", "Title", "Content"])
        df.to_csv("result.csv", index=False)

CLICK HERE to find out more related problems solutions.

Leave a Comment

Your email address will not be published.

Scroll to Top