import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient
client = MongoClient('mongodb+srv://test:sparta@cluster0.mja2a.mongodb.net/Cluster0?retryWrites=true&w=majority')
db = client.dbsparta
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://music.apple.com/us/playlist/top-100-usa/pl.606afcbb70264d2eb2b51d8dbcfa6a12',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
songs= soup.select('#web-main > div.loading-inner > div > div.product-info > div > div.songs-list.typography-callout > div')
for song in songs:
title = song.select_one('div.songs-list__col.songs-list__col--song.typography-body > div > div.songs-list-row__song-wrapper > div > div.songs-list-row__explicit-wrapper > div')
if title is not None:
song_title = title.text
song_rank = song.select_one('div.songs-list__col.songs-list__col--song.typography-body > div > div.songs-list-row__rank').text #web-main > div.loading-inner > div > div.product-info > div > div.songs-list.typography-callout > div:nth-child(4) > div.songs-list__col.songs-list__col--song.typography-body > div > div.songs-list-row__rank
song_singer = song.select_one('div.songs-list__col.songs-list__col--song.typography-body > div > div.songs-list-row__song-wrapper > div > div.songs-list-row__by-line > span > a').text #web-main > div.loading-inner > div > div.product-info > div > div.songs-list.typography-callout > div:nth-child(4) > div.songs-list__col.songs-list__col--song.typography-body > div > div.songs-list-row__song-wrapper > div > div.songs-list-row__by-line > span > a
doc = {
'title': song_title,
'rank' : song_rank,
'singer': song_singer
}
db.songs.insert_one(doc)
Beautiful soup을 이용한 크롤링으로 US top100 노래/가수를 뽑아냈다!