import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs
import time
def create_soup(url):
response = requests.get(url,headers={"user-agent": "Mozilla/5.0"})
response.raise_for_status()
soup = bs(response.text, "html")
return soup
def print_news(index, title, link):
print("{}. {}".format(index+1, title))
print(" (링크 : {})".format(link))
def scrape_it_news():
url = f"https://news.naver.com/main/list.naver?mode=LS2D&mid=shm&sid1=101&sid2=259"
soup = create_soup(url)
differ = ["type06_headline", "type06"]
news_list = soup.find(f"div", attrs={"class":"list_body newsflash_body"}).find_all("li")
for index, news in enumerate(news_list):
a_idx = 0
img = news.find("img")
if img:
a_idx = 1
a_tag = news.find_all("a")[a_idx]
title = a_tag.get_text().strip()
link = a_tag["href"]
print_news(index, title, link)
print()
scrape_it_news()
결과
앞으로 남은 해야할 것