Web Crawling (BS)

고독한 키쓰차·2021년 7월 21일
0

Time Series

목록 보기
4/6

웹 크롤링으로 환율 데이터 따오기

import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time
import random
import re

import os
os.environ["HTTP_PROXY"]="http://1" # proxy 주소 입력
os.environ["HTTPS_PROXY"]="https://1"

# read_html
rootUrl = 'https://finance.naver.com/marketindex/exchangeDetail.nhn?marketindexCd=FX_USDKRW'

def read_html(url):
    headers = {"user-agent": "User-Agent: # 필요한 정보 입력"}
    req = requests.post(url, headers=headers ,verify=False)
    header = req.headers
    status = req.status_code
    is_ok = req.ok
    html = req.text
    return html
    
    
# HR : 헝가리 (Forint)  => FX_HUFKRW
# MO : 마카오 (MOP) = 홍콩  => FX_HKDKRW
# NO : 노르웨이 (Kroner) => FX_NOKKRW
# SA : 남아프리카 공화국 (ZAR) => FX_ZARKRW


URL = {'HR':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_HUFKRW',
'MO':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_HKDKRW',
'NO':'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_NOKKRW',
'SA' : 'https://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_ZARKRW'}

lst = []
for k in URL.keys():
    u = URL[k]
    
    for i in range(1, 130): # paging
        fx = read_html(f'{u}&page={i}')
        fx_rows = BeautifulSoup(fx, 'html.parser').select('tbody > tr')

        for r in fx_rows:
            date = r.select('td.date')
            num = r.select('td.num')
            lst += [[k, date[0].text.replace('.', '-').strip(), num[0].text.strip()]]
profile
Data Scientist or Gourmet

0개의 댓글