library(httr)
library(rvest)
library(wordcloud2)
library(tm)
library(KoNLP)
library(tidyverse)
news_href <- c()
news_text <- c()
urls <- paste0("https://news.naver.com/main/list.naver?mode=LS2D&mid=shm&sid2=258&sid1=101&date=20220527&page=2")
for (url in urls) {
get_url <- GET(url)
my_html <- read_html(get_url)
my_nodes <- html_nodes(my_html,"dt > a")
my_href <-html_attr(my_nodes,"href")
news_href <- c(news_href,my_href)
for (news_url in 1:10) {
T.selector <- paste0("#main_content > div.list_body.newsflash_body > ul.type06_headline > li:nth-child(",news_url,") > dl > dt:nth-child(2) > a")
my_nodes1 <-html_nodes(my_html,"#main_content > div.list_body.newsflash_body > ul.type06_headline")
news <- html_text(my_nodes1)
news_text <- c(news_text,news)
}
}
df_new_href <- data.frame(news_href)