import pandas as pd
import numpy as np
import time
import warnings
from selenium import webdriver
from selenium.webdriver.common.by import By
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook
warnings.simplefilter(action = 'ignore')
문제 1.
AWS RDS (MySQL) 에 프로젝트 관련 Database 를 생성하고, 접근 가능한 사용자 계정을 생성하세요.
import mysql.connector
mydb = mysql.connector.connect(
host = "host",
port = 3306,
user = "admin",
password = "비밀번호"
)
cursor = mydb.cursor(buffered = True)
sql = 'create database oneday default character set utf8mb4'
cursor.execute(sql)
cursor.execute("create user 'oneday'@'%' identified by '1234'")
cursor.execute("grant all on oneday.* to 'oneday'@'%'")
제출 1.
result1 = "show create database oneday"
cursor.execute(result1)
result1 = cursor.fetchall()
for i in result1:
print(i)
result2 = "show grants for 'oneday'@'%'"
cursor.execute(result2)
result2 = cursor.fetchall()
for i in result2:
print(i)
문제 2.
스타벅스 이디야 데이터를 저장할 테이블을 다음의 구조로 생성하세요. (PDF 파일 참고)
cursor.execute('use oneday')
cBrand = "create table COFFEE_BRAND(id int not null auto_increment primary key, name varchar(12))"
cursor.execute(cBrand)
cStore = "create table COFFEE_STORE(id int not null auto_increment primary key, brand int, name varchar(32) not null, gu_name varchar(5) not null, address varchar(128) not null, lat decimal(16,14) not null, lng decimal(17,14) not null, foreign key (brand) references COFFEE_BRAND(id))"
cursor.execute(cStore)
문제 3.
Python 코드로 COFFEE_BRAND 데이터를 다음과 같이 입력하고 확인하세요. (PDF 파일 참고)
제출 2.
제출 3.
result3 = 'desc COFFEE_BRAND'
cursor.execute(result3)
result3 = cursor.fetchall()
for i in result3:
print(i)
result4 = 'desc COFFEE_STORE'
cursor.execute(result4)
result4 = cursor.fetchall()
for i in result4:
print(i)
# 데이터 입력
conn = mysql.connector.connect(
host = "host",
port = 3306,
user = "admin",
password = "비밀번호",
database = "oneday"
)
cursor = conn.cursor(buffered = True)
cursor.execute("insert into COFFEE_BRAND values (1, 'STARBUCKS'), (2, 'EDIYA')")
conn.commit()
result5 = "select * from COFFEE_BRAND"
cursor.execute(result5)
result5 = cursor.fetchall()
for i in result5:
print(i)
문제 4.
스타벅스 페이지에 접근하는 코드에서 팝업창이 없는 경우, 팝업창을 닫는 코드에서 에러가 발생합니다. 예외처리 해서 에러
메시지를 출력하고 실행이 중단되지 않도록 수정해주세요.
from selenium.common.exceptions import NoSuchElementException
try:
driver.find_element(By.CSS_SELECTOR, '.holiday_notice_close a').click()
except NoSuchElementException as e:
print(e)
문제 5.
Python 코드로 스타벅스 페이지에서 데이터를 가져올때, COFFEE_STORE 테이블에 바로 입력하도록 수정하세요.
url = "https://www.starbucks.co.kr/store/store_map.do"
driver = webdriver.Chrome()
driver.get(url)
# 지역검색 -> 서울 -> 서울 전체
driver.find_element(By.CSS_SELECTOR, '#container > div > form > fieldset > div > section > article.find_store_cont > article > header.loca_search > h3 > a').click()
time.sleep(0.5)
driver.find_element(By.CSS_SELECTOR, '#container > div > form > fieldset > div > section > article.find_store_cont > article > article:nth-child(4) > div.loca_step1 > div.loca_step1_cont > ul > li:nth-child(1) > a').click()
time.sleep(0.5)
xpath = '//*[@id="mCSB_2_container"]/ul/li[1]/a'
tag = driver.find_element(By.XPATH, xpath)
tag.click()
time.sleep(0.5)
soup = BeautifulSoup(driver.page_source, "html.parser")
seoul_list = driver.find_elements(By.CSS_SELECTOR, '#mCSB_3_container ul li')
soup.select_one(f'#mCSB_3_container > ul > li:nth-child(100) > p').text[:-9]
cursor = conn.cursor(buffered=True)
sql = "insert into COFFEE_STORE (brand, name, gu_name, address, lat, lng) values (1, %s, %s, %s, %s, %s)"
cnt = 1
for content in tqdm_notebook(seoul_list):
name = content.get_attribute('data-name')
address = soup.select_one(f'#mCSB_3_container > ul > li:nth-child({cnt}) > p').text[:-9]
lat = content.get_attribute('data-lat')
lng = content.get_attribute('data-long')
gu_name = address.split()[1] if address else ''
cnt += 1
cursor.execute(sql, (name, gu_name, address, lat, lng))
conn.commit()
# driver.close()
count_query = 'SELECT COUNT(*) FROM COFFEE_STORE where brand = 1;'
cursor.execute(count_query)
record_count = cursor.fetchone()[0]
record_count
check = "select * from COFFEE_STORE where brand = 1 limit 10"
cursor.execute(check)
result = cursor.fetchall()
for i in result:
print(i)
문제 6.
Python 코드로 이디야 페이지에서 데이터를 가져올때, COFFEE_STORE 테이블에 바로 입력하도록 수정하세요.
url_ediya = 'https://www.ediya.com/contents/find_store.html'
driver = webdriver.Chrome()
driver.get(url_ediya)
driver.find_element(By.CSS_SELECTOR, '#contentWrap > div.contents > div > div.store_search_pop > ul > li:nth-child(2) > a').click()
cursor = conn.cursor(buffered = True)
gu_list = []
cursor.execute("select distinct(gu_name) from COFFEE_STORE")
result = cursor.fetchall()
for row in result:
gu_list.append(str('서울 ') + row[0])
# 구 검색
gu = driver.find_element(By.XPATH, '//*[@id="keyword"]')
gu.clear()
gu.send_keys(gu_list[0])
time.sleep(0.5)
driver.find_element(By.XPATH, '//*[@id="keyword_div"]/form/button').click()
ediya = driver.page_source
soup_ediya = BeautifulSoup(ediya, "html.parser")
contents = soup_ediya.select('#placesList li')
import googlemaps
gmaps_key = "gmaps_key"
gmaps = googlemaps.Client(key = gmaps_key)
# 전체 데이터 수집
driver = webdriver.Chrome()
driver.get('https://www.ediya.com/contents/find_store.html')
driver.find_element(By.CSS_SELECTOR, '#contentWrap > div.contents > div > div.store_search_pop > ul > li:nth-child(2) > a').click()
sql = "INSERT INTO COFFEE_STORE (brand, name,gu_name, address, lat, lng) VALUES (2, %s, %s, %s, %s, %s)"
for gu in tqdm_notebook(gu_list):
# 검색어 입력
keyword = driver.find_element(By.CSS_SELECTOR, '#keyword')
keyword.clear()
keyword.send_keys(gu)
# 검색 클릭
driver.find_element(By.CSS_SELECTOR, '#keyword_div > form > button').click()
time.sleep(1) # 검색하고 로딩 기다리기
html = driver.page_source
soup_ed = BeautifulSoup(html, 'html.parser')
contents = soup_ed.select('#placesList li')
for content in contents:
name = content.select_one('dt').text
address = content.select_one('dd').text
gu_name = address.split(' ')[1]
print(f'{name}--{address}--{gu_name}')
tmp = gmaps.geocode(address, language ='ko')
lat = tmp[0].get("geometry")["location"]["lat"] if tmp else ''
lng = tmp[0].get("geometry")["location"]["lng"] if tmp else ''
cursor.execute(sql,(name, gu_name, address, lat, lng))
conn.commit()
driver.close()
count_query = 'SELECT COUNT(*) FROM COFFEE_STORE where brand = 2;'
cursor.execute(count_query)
record_count = cursor.fetchone()[0]
record_count
cursor.execute("select * from COFFEE_STORE where brand = 2 limit 10;")
result = cursor.fetchall()
for row in result:
print(row)
문제 7.
Python 코드에서 다음의 데이터를 쿼리를 사용하여 조회하세요.
strb = "select s.gu_name, count(s.brand) from COFFEE_BRAND as b, COFFEE_STORE as s where b.id = s.brand and b.name='STARBUCKS' group by s.gu_name order by count(s.brand) desc limit 5"
cursor.execute(strb)
result = cursor.fetchall()
for row in result:
print(row)
edy = "select s.gu_name, count(s.brand) from COFFEE_BRAND as b, COFFEE_STORE as s where b.id = s.brand and b.name='EDIYA' group by s.gu_name order by count(s.brand) desc limit 5"
cursor.execute(edy)
result = cursor.fetchall()
for row in result:
print(row)
# 스타벅스
gu_each_st = "select gu_name, '스타벅스' as brand, count(brand) as count from COFFEE_STORE where brand = 1 group by gu_name"
cursor.execute(gu_each_st)
result = cursor.fetchall()
for row in result:
print(row)
# 이디야
gu_each_ed = "select gu_name, '이디야' as brand, count(brand) as count from COFFEE_STORE where brand = 2 group by gu_name"
cursor.execute(gu_each_ed)
result = cursor.fetchall()
for row in result:
print(row)
count = ("select s.gu_name, "
"sum(s.brand=1) as count1, "
"sum(s.brand=2) as count2 "
"from COFFEE_BRAND b, COFFEE_STORE s "
"where b.id = s.brand "
"group by s.gu_name "
"order by s.gu_name;")
cursor.execute(count)
result = cursor.fetchall()
# (구이름, 스타벅스 매장 개수, 이디야 매장 개수)
for row in result:
gu_name, count1, count2 = row
print(f'({gu_name}, {count1}, {count2})')
문제 8.
시각화 프로젝트를 위하여 다음의 규칙으로 쿼리하여 CSV 파일로 저장합니다. (Python 코드로 작업)
final = ("select * "
"from (select s.id as s_id, b.name as s_brand, s.name as s_name, s.gu_name as s_gu, s.address as s_address, s.lat as s_lat, s.lng as s_lng "
"from COFFEE_STORE as s, COFFEE_BRAND as b "
"where b.id = s.brand and b.name like 'STARBUCKS') as st, "
"(select s.id as e_id, b.name as e_brand, s.name as e_name, s.gu_name as e_gu, s.address as e_address, s.lat as e_lat, s.lng as e_lng "
"from COFFEE_STORE as s, COFFEE_BRAND as b "
"where b.id = s.brand and b.name like 'EDIYA') as ed "
"order by st.s_id, ed.e_id")
cursor.execute(final)
result = cursor.fetchall()
df = pd.DataFrame(result)
df.columns = ['s_id', 's_brand', 's_name', 's_gu', 's_address', 's_lat', 's_lng',
'e_id', 'e_brand', 'e_name', 'e_gu', 'e_address', 'e_lat', 'e_lng']
df.to_csv('./starbucks_ediya.csv', index = False, encoding = "euc-kr")