SQL - mini test _ 유가 분석

jaam._.mini·2024년 1월 28일
0

하루 온종일 풀어본 SQL 유가 분석

ctrl+shift+i

import mysql.connector

conn = mysql.connector.connect(
    host = "",
    port = 3306,
    user = "oneday",
    password = "1234",
    database = "oneday"
)

cursor = conn.cursor(buffered=True)
# gas_brand
sql_b = "CREATE TABLE GAS_BRAND(" + \
            "id int not null auto_increment primary key, " + \
            "name varchar(16) not null)"

cursor.execute(sql_b)

# gas_station
sql_s = "CREATE TABLE GAS_STATION(" + \
            "id int auto_increment primary key, " +\
            "brand int not null, " +\
            "name varchar(64) not null, " +\
            "city char(2) not null, " +\
            "gu varchar(10) not null, " +\
            "address varchar(128) not null, " +\
            "gasoline int not null, " +\
            "diesel int not null, " +\
            "self boolean not null, " +\
            "car_wash boolean not null, " +\
            "charging_station boolean not null, " +\
            "car_maintenance boolean not null, " +\
            "convenience_store boolean not null, " +\
            "24_hours boolean not null, " +\
            "lat decimal(16,14) not null, " +\
            "lng decimal(17,14) not null, " +\
            "foreign key (brand) references GAS_BRAND(id));"

cursor.execute(sql_s)
queries1 = [
    (1, 'SK에너지'),
    (2, 'HD현대오일뱅크'),
    (3, 'GS칼텍스'),
    (4, 'S-OIL'),
    (5, '알뜰주유소'),
    (6, '자가상표')
]

query = "INSERT INTO GAS_BRAND VALUES (%s, %s)"
cursor.executemany(query, queries1)

conn.commit()
sql_result = "DESC GAS_STATION"
cursor.execute(sql_result)

result = cursor.fetchall()
for i in result:
    print(i)
sql_result = "SELECT * FROM GAS_BRAND"
cursor.execute(sql_result)

result = cursor.fetchall()
for i in result:
    print(i)
# 화폐단위 문자형 >>  숫자형 
def stringToInt(s):
    if s != '':
        s = s.replace(',', '')
        return int(s)
    else: 
        return None
        
stringToInt('1,000')
# 주유소 브랜드를 입력하면 GAS_BRAND 데이터를 참고하여 ID 반환
def getID(brand):
    sql_result = "SELECT * FROM GAS_BRAND"
    cursor.execute(sql_result)
    result = cursor.fetchall()
    for i in result:
        if i[1] == brand:
            return i[0]
        # 브랜드명이 '알뜰(ex)'인 경우 있음
        elif brand == '알뜰(ex)':
            return 5

getID('SK에너지')
# 주소를 입력받아 구 이름 반환
def getGu(add):
    addList = add.split()
    return addList[1]
    
getGu('서울시 강남구 헌릉로 730')
import googlemaps
gmaps_key = 'AIzaSyALyv5xMRzF_RJUIeJ84qh25GgNWoIJ8LM'
gmaps = googlemaps.Client(key = gmaps_key)

# 주소를 입력받아 위도, 경도 반환
def getLL(add):
    tmp = gmaps.geocode(add, language='ko')
    lat = tmp[0].get("geometry")["location"]["lat"]
    lng = tmp[0].get("geometry")["location"]["lng"]

    return lat, lng

getLL('서울시 강남구 헌릉로 730')
import time 
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook

# 오피넷 -> 구 정보 가져오기
url = 'https://www.opinet.co.kr/searRgSelect.do'
driver = webdriver.Chrome(executable_path='../driver/chromedriver.exe')
driver.get(url)
# 시/도
sido_list_raw = driver.find_element(By.ID, "SIDO_NM0")
sido_list = sido_list_raw.find_elements(By.TAG_NAME, "option")
# 서울 선택
seoul_select = sido_list[1].get_attribute("value")
sido_list_raw.send_keys(seoul_select)
# 구 리스트 만들기
gu_list_raw = driver.find_element(By.ID, "SIGUNGU_NM0")
gu_list = gu_list_raw.find_elements(By.TAG_NAME, "option")

gu_names = [option.get_attribute("value") for option in gu_list]
gu_names = gu_names[1:]
sql = "INSERT INTO GAS_STATION (brand, name, city, gu, address, gasoline, diesel, self, " +\
        "car_wash, charging_station, car_maintenance, convenience_store, 24_hours, lat, lng) " +\
        "VALUES (%s, %s, '서울', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
def check(data, tag):
    return 'off' not in data.select_one(tag)['src']
sqltmp = "ALTER TABLE GAS_STATION MODIFY diesel int NULL;"
cursor.execute(sqltmp)
conn.commit()
for gu in tqdm_notebook(gu_names):
    element = driver.find_element(By.ID, 'SIGUNGU_NM0')
    element.send_keys(gu)
    time.sleep(0.5)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    # 검색할 주유소 개수
    cnt = int(driver.find_element(By.ID, 'totCnt').text)

    for i in range(1, cnt+1):

        # 각 주유소 클릭
        station = driver.find_element(By.CSS_SELECTOR, f'#body1 > tr:nth-child({i}) > td.rlist > a')
        station.click()

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        data = soup.select('#os_dtail_info')[0]

        # brand
        brand = getID(data.select_one('#poll_div_nm').text)

        # name
        name = data.select_one('.header').text.strip()

        # address
        address = data.select_one('#rd_addr').text

        # gasoline
        gasoline = stringToInt(data.select_one('#b027_p').text)

        # diesel
        diesel = stringToInt(data.select_one('#d047_p').text)

        # self 
        slf = data.select_one('#SPAN_SELF_VLT_YN_ID')
        if type(slf.find('img')) == type(None):
            is_self = False
        else:
            is_self = True

        # car_wash
        car_wash = check(data, '#cwsh_yn')

        # charging_station
        charging_station = check(data, '#lpg_yn')

        # car_maintenance
        car_maintenance = check(data, '#maint_yn')

        # convenience_store
        convenience_store = check(data, '#cvs_yn')

        # 24_hours
        sel24 = check(data, '#sel24_yn')

        tmp = gmaps.geocode(address, language='ko')
        # lat
        lat = tmp[0].get('geometry')['location']['lat']

        # lng
        lng = tmp[0].get('geometry')['location']['lng']

        cursor.execute(sql, (brand, name, gu, address, gasoline, diesel, 
                            is_self, car_wash, charging_station, car_maintenance, convenience_store, sel24, lat, lng))
         
        conn.commit()
# 데이터 개수 확인
cursor.execute("select count(*) from GAS_STATION")
result = cursor.fetchall()
print(result[0])
# 데이터 상위 10개 출력
cursor.execute("select * from GAS_STATION limit 10")
result = cursor.fetchall()
for i in result:
    print(i)
import pandas as pd

sql = "select s.id, b.name 'brand', s.name, s.city, s.gu, s.address, s.gasoline, s.diesel, s.self, " +\
        "s.car_wash, s.charging_station, s.car_maintenance, s.convenience_store, s.24_hours, " +\
        "s.lat, s.lng " +\
        "from GAS_BRAND b, GAS_STATION s " +\
        "where b.id = s.brand ORDER BY s.id"

cursor.execute(sql)
result = cursor.fetchall()

columns = [i[0] for i in cursor.description]
df = pd.DataFrame(data=result, columns=columns)
df.to_csv("[DS]sql2_oilstation_ohjaemin.csv", index=False, encoding='euc-kr')
df = pd.read_csv("[DS]sql2_chasuhui.csv",  index_col=0, thousands=',', encoding='euc-kr')
df.head(10)
# 미왕빌딩 주소: 서울 강남구 강남대로 364
lat, lng = getLL('서울 강남구 강남대로 364')
lat, lng
# POINT(경도, 위도)
# SET @location = POINT(경도, 위도) : 기준이 되는 위치 설정
# ST_DISTANCE_SPHERE(POINT, POINT) : 두 좌표 간 거리(단위: m)

cursor.execute("SET @location = POINT(127.029340, 37.495599)")

cursor.execute("SELECT * FROM (SELECT s.id id, b.name brand, s.name name, address, \
    ST_DISTANCE_SPHERE(@location, POINT(lng, lat))/1000 distance \
    FROM GAS_BRAND b, GAS_STATION s WHERE b.id = s.brand) t \
    WHERE distance*1000 <= 1000")

result = cursor.fetchall()
for row in result:
    print(row)
cursor.execute("SELECT * FROM (SELECT s.id id, b.name brand, s.name name, address, \
    gasoline, self, 24_hours, convenience_store, \
    ST_DISTANCE_SPHERE(@location, POINT(lng, lat))/1000 distance \
    FROM GAS_BRAND b, GAS_STATION s \
    WHERE b.id = s.brand and self = 1 and 24_hours = 1 and convenience_store = 1 \
    ORDER BY distance LIMIT 10) t \
    ORDER BY gasoline")

result = cursor.fetchall()
for row in result:
    print(row)
cursor.execute("SELECT gu, b.name brand, avg(gasoline) avg_price \
    FROM GAS_BRAND b, GAS_STATION s \
    WHERE b.id = s.brand GROUP BY gu, brand ORDER BY avg_price")

result = cursor.fetchall()
for row in result:
    print(row)
conn.close()
driver.quit()
profile
비전공자의 데이터 공부법

0개의 댓글