웹 크롤링으로 환율 데이터 따오기
import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time
import random
import re
import os
os.environ["HTTP_PROXY"]="http://1" # proxy 주소 입력
os.environ["HTTPS_PROXY"]="https://1"
# read_html
rootUrl = 'https://finance.naver.com/marketindex/exchangeDetail.nhn?marketindexCd=FX_USDKRW'
def read_html(url):
headers = {"user-agent": "User-Agent: # 필요한 정보 입력"}
req = requests.post(url, headers=headers ,verify=False)
header = req.headers
status = req.status_code
is_ok = req.ok
html = req.text
return html
# HR : 헝가리 (Forint) => FX_HUFKRW
# MO : 마카오 (MOP) = 홍콩 => FX_HKDKRW
# NO : 노르웨이 (Kroner) => FX_NOKKRW
# SA : 남아프리카 공화국 (ZAR) => FX_ZARKRW
URL = {'HR':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_HUFKRW',
'MO':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_HKDKRW',
'NO':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_NOKKRW',
'SA' : 'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_ZARKRW'}
lst = []
for k in URL.keys():
u = URL[k]
for i in range(1, 130): # paging
fx = read_html(f'{u}&page={i}')
fx_rows = BeautifulSoup(fx, 'html.parser').select('tbody > tr')
for r in fx_rows:
date = r.select('td.date')
num = r.select('td.num')
lst += [[k, date[0].text.replace('.', '-').strip(), num[0].text.strip()]]