๐Ÿ“ˆ์ˆ˜์š”์˜ˆ์ธก ๋ชจ๋ธ

๋‚˜ ์•ˆํ•ดยท2023๋…„ 3์›” 10์ผ
0

1. 1์ฐจ

1.1 ๐Ÿงพ์ „์ฒ˜๋ฆฌ

1.1.1 ๊ณต๊ณต๋ฐ์ดํ„ฐ์˜ ๋ฒ”์œ„๊ฐ€ ๋„ˆ๋ฌด ๋„“์–ด์„œ ์„œ๋น„์Šค ์ง€์—ญ์„ 2ํ˜ธ์„ ์— ํ•œ์ •ํ•˜๊ณ  ๊ทธ์— ๋”ฐ๋ฅธ ์ „์ฒ˜๋ฆฌ ์ง„ํ–‰

    # ์—ญ ์ด๋ฆ„์ˆœ์œผ๋กœ ์ •๋ ฌ
    data = self.passengers.sort_values(by='์—ญ๋ช…')  # = station_location.sort_values(by='์—ญ๋ช…')
    # ์‚ฌ์šฉํ•  ํ˜ธ์„ ์ธ 2ํ˜ธ์„ ๋งŒ ์ถ”์ถœ
    line2 = data[data["๋…ธ์„ ๋ช…"] == "2ํ˜ธ์„ "]
    print(line2)

1.1.2 y๊ฐ’ ์ƒ์„ฑ

  • ์›๋ณธ ๋ฐ์ดํ„ฐ์—์„œ๋Š” ์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜์™€ ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜๋งŒ ๋‚˜์™€์žˆ์–ด์„œ y๊ฐ’์œผ๋กœ ์‚ฌ์šฉํ•  ์ปฌ๋Ÿผ์ด ์—†๋Š” ์ƒํƒœ์ด๋ฏ€๋กœ ๊ฐ ์ผ์ž๋ณ„ ์Šนํ•˜์ฐจ ๋ณ€๋™์„ ๋‚˜ํƒ€๋‚ด๋Š” ์ปฌ๋Ÿผ์„ ์ถ”๊ฐ€

    for i, j in enumerate(data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜']):
        if i != 0:
            on_rate.append(round((data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i] - data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1])/data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1]*100, 2))
            off_rate.append(round((data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i] - data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1])/data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1]*100, 2))
        else:
            on_rate.append(0)
            off_rate.append(0)
    else:
        pass
    df_on = pd.DataFrame(on_rate, columns=['์Šน์ฐจ๋ณ€๋™(%)'])
    df_off = pd.DataFrame(off_rate, columns=['ํ•˜์ฐจ๋ณ€๋™(%)'])
    df = pd.concat([data, df_on, df_off], axis=1)
    print(df)
    df.to_csv(path_or_buf=f"./data/rate/{k}์—ญ.csv", index=False)
    print(f"{k}์—ญ ์ €์žฅ ์™„๋ฃŒ")
    on_rate.clear()
    off_rate.clear()

    1.1.2.1 ์ „์ฒ˜๋ฆฌ ๊ณผ์ •์— ์—ญ๋ณ„๋กœ csvํŒŒ์ผ์„ ๋ถ„๋ฆฌํ•˜๋Š” ๊ณผ์ • ์ถ”๊ฐ€

    data = pd.read_csv('./data/sorted_subway.csv')
    meta = self.count_index()
    df_rows = []
    
    # ์—ญ ์ด๋ฆ„๋ณ„๋กœ csv ํŒŒ์ผ ๋ถ„๋ฆฌ
    for k in meta:
        for i, j in enumerate(data['์—ญ๋ช…']):
            if j == k:
                df_rows.append(data.loc[i])
            else:
                pass
    
        df = pd.DataFrame(df_rows, columns=['์‚ฌ์šฉ์ผ์ž', '์—ญ๋ช…', '์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜', 'ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'])
        df.to_csv(path_or_buf=f"{k}์—ญ.csv", index=False)
        print(f"{k}์—ญ ์ €์žฅ ์™„๋ฃŒ")
        df_rows.clear()

    1.1.2.2 ์—ญ๋ณ„ csv๋กœ ๋ณ€๊ฒฝํ•œ ์ดํ›„ ์•„๋ž˜ ์ฝ”๋“œ๋กœ y๊ฐ’ ์ƒ์„ฑ

    meta = self.count_index()
    on_rate = []
    off_rate = []
    
    for k in meta:
        data = pd.read_csv(f'./data/save/{k}์—ญ.csv')
        for i, j in enumerate(data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜']):
            if i != 0:
                on_rate.append(round((data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i] - data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1])/data['์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1]*100, 2))
                off_rate.append(round((data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i] - data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1])/data['ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜'][i-1]*100, 2))
            else:
                on_rate.append(0)
                off_rate.append(0)
        else:
            pass
        df_on = pd.DataFrame(on_rate, columns=['์Šน์ฐจ๋ณ€๋™(%)'])
        df_off = pd.DataFrame(off_rate, columns=['ํ•˜์ฐจ๋ณ€๋™(%)'])
        df = pd.concat([data, df_on, df_off], axis=1)
        print(df)
        df.to_csv(path_or_buf=f"./data/rate/{k}์—ญ.csv", index=False)
        print(f"{k}์—ญ ์ €์žฅ ์™„๋ฃŒ")
        on_rate.clear()
        off_rate.clear()

    1.1.2.3 ๋ถ„ํ•  ๋ฐ y๊ฐ’ ์ƒ์„ฑ ๊ฒฐ๊ณผ

  • ์ „์ฒ˜๋ฆฌ ์ด์ „

  • ์„œ๋น„์Šค ๋Œ€์ƒ ์ง€์—ญ ์ถ•์†Œ

  • ๊ฐ ์—ญ๋ณ„ csv ์ƒ์„ฑ

  • y๊ฐ’์ธ ์Šนํ•˜์ฐจ ์Šน๊ฐ์ˆ˜ ๋ณ€๋™ ์ปฌ๋Ÿผ ์ถ”๊ฐ€

1.1.3 Min Max Scale

numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
return numerator / (denominator + 1e-7) # 0์œผ๋กœ ๋‚˜๋ˆ„๋Š” ์—๋Ÿฌ๊ฐ€ ๋ฐœ์ƒ์„ ๋ง‰๊ธฐ ์œ„ํ•ด ๋งค์šฐ ์ž‘์€ ๊ฐ’(1e-7)์„ ๋”ํ•ด์„œ ๋‚˜๋ˆˆ๋‹ค

df = pd.read_csv('./data/rate/๊ฐ•๋‚จ์—ญ.csv')
dfx = df[['์‚ฌ์šฉ์ผ์ž', '์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜', 'ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜']]
dfx = self.min_max_scaler(dfx)
dfy = df[['์Šน์ฐจ๋ณ€๋™(%)']]

print(f'dfx : {dfx.head()}')
print(f'0~1 : {dfx.describe()}')
print(f'y : {dfy}')

1.2 ๋ชจ๋ธ ์ƒ์„ฑ

  • ๊ณ„ํš : ๊ฐ ์—ญ๋ณ„๋กœ ์ผ๋ณ„ ์Šนํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜๋ฅผ ์˜ˆ์ธกํ•œ๋‹ค

    import gmaps
    import keras
    import pandas as pd
    import numpy as np
    from keras import Sequential
    from keras.layers import Dropout, LSTM, Dense
    from matplotlib import pyplot as plt
    
    from preprocess import Preprocess

class Forecast_model():
def init(self):
pass

  def model(self):
      meta = Preprocess()

      for k in meta.count_index():
          df = pd.read_csv(f'./data/rate/{k}์—ญ.csv')
          dfx = df[['์‚ฌ์šฉ์ผ์ž', '์Šน์ฐจ์ด์Šน๊ฐ์ˆ˜', 'ํ•˜์ฐจ์ด์Šน๊ฐ์ˆ˜']]
          dfx = meta.min_max_scaler(dfx)
          dfy = df[['ํ•˜์ฐจ๋ณ€๋™(%)']]

          print(f'1. min max ํ™•์ธ')
          print(dfx.describe())
          print('#####################################')
          print('2. ํ•˜์ฐจ ๋ณ€๋™')
          print(dfy)

          X = dfx.values.tolist()
          y = dfy.values.tolist()
          window_size = 10

          data_X = []
          data_y = []
          for i in range(len(y) - window_size):
              _X = X[i: i + window_size]  # ๋‹ค์Œ ๋‚  ์ข…๊ฐ€(i+windows_size)๋Š” ํฌํ•จ๋˜์ง€ ์•Š์Œ
              _y = y[i + window_size]  # ๋‹ค์Œ ๋‚  ์ข…๊ฐ€
              data_X.append(_X)
              data_y.append(_y)
          print(_X, "->", _y)
          print(data_X[0])

          print('์ „์ฒด ๋ฐ์ดํ„ฐ์˜ ํฌ๊ธฐ :')
          print(len(data_X), len(data_y))

          train_size = int(len(data_y) * 0.7)
          train_X = np.array(data_X[0: train_size])
          train_y = np.array(data_y[0: train_size])

          test_size = len(data_y) - train_size
          test_X = np.array(data_X[train_size: len(data_X)])
          test_y = np.array(data_y[train_size: len(data_y)])

          print('ํ›ˆ๋ จ ๋ฐ์ดํ„ฐ์˜ ํฌ๊ธฐ :', train_X.shape, train_y.shape)
          print('ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ์˜ ํฌ๊ธฐ :', test_X.shape, test_y.shape)
          model = Sequential()
          model.add(LSTM(units=20, activation='relu', return_sequences=True, input_shape=(10, 3))) #!!!ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 10, 4), found shape=(None, 10, 3)
          model.add(Dropout(0.1))
          model.add(LSTM(units=20, activation='relu'))
          model.add(Dropout(0.1))
          model.add(Dense(units=1))
          model.summary()
          model.compile(optimizer='adam', loss='mean_squared_error')
          model.fit(train_X, train_y, epochs=70, batch_size=30)
          pred_y = model.predict(test_X)

          plt.figure()
          plt.plot(test_y, color='red', label='floating off')
          plt.plot(pred_y, color='blue', label='pred off')
          plt.title(f'{k} floating population', family='Malgun Gothic')
          plt.xlabel('time')
          plt.ylabel('off passengers')
          plt.legend()
          plt.show()
          model.save(f'./model/{k}์—ญ_์˜ˆ์ธก.h5')

if name == 'main':
Forecast_model().model()

![](https://velog.velcdn.com/images/boost_dev/post/d07df164-c58b-4597-a9de-4f0968413e13/image.png)

> ## ๋ณด์™„์‚ฌํ•ญ
๊ฐ ์—ญ๋ณ„๋กœ ๋ชจ๋ธ์„ ๋งŒ๋“ค๊ณ  ๊ฐ™์€ ๊ธฐ๊ฐ„๋™์•ˆ ์ง€ํ•˜์ฒ  ์Šน๊ฐ ๋ณ€๋™์ด ๊ฐ€์žฅ ์‹ฌํ•œ ์ง€์—ญ ์œ„์ฃผ๋กœ ์ง€๋„์— ๋‚˜ํƒ€๋‚ด๋Š”๊ฒŒ ๊ธฐ์กด์˜ ๊ณ„ํš์ด์—ˆ์œผ๋‚˜ ๋ชจ๋“  ์—ญ์ด ์š”์ผ์„ ๊ธฐ์ค€์œผ๋กœ ๊ฑฐ์˜ ์ผ์ •ํ•œ ์š”๋™์ธ๊ตฌ ๋ณ€ํ™”๋ฅผ ๋‚˜ํƒ€๋‚ด๊ธฐ ๋•Œ๋ฌธ์— ์ง€ํ•˜์ฒ ์„ ์ด์šฉํ•œ ์Šน๊ฐ ๋ณ€ํ™”๋ฅผ ํ†ตํ•ด ํ•™์Šตํ•œ ๋ชจ๋ธ์˜ ๋ฉ”๋ฆฌํŠธ๊ฐ€ ๋–จ์–ด์ง„๋‹ค. 
>
> ## ๋Œ€์•ˆ
๋ชจ๋“  ์—ญ์— ๋Œ€ํ•œ ์ˆ˜์š”์˜ˆ์ธก ํ›„ ๋น„๊ต๋ผ๋Š” ์ดˆ๋ฐ˜์˜ ๋ฐฉ์‹์„ ๋ฒ„๋ฆฌ๊ณ  ํ•˜๋‚˜์˜ ์—ญ์— ๋Œ€ํ•œ ์ž‘์—… ์šฐ์„  ์ง„ํ–‰
๊ฐ•๋‚จ ํ•˜๋‚˜๋งŒ ์ง‘์–ด์„œ ๋‚ ์”จ ๋“ฑ ๋‹ค๋ฅธ ๋ณ€์ˆ˜ ์ถ”๊ฐ€ (2023-02-27)

# 2. 2์ฐจ(๋‚ ์”จ ์ถ”๊ฐ€)
## 2.1 ์ „์ฒ˜๋ฆฌ
- ์ด์ „ ๋‹จ๊ณ„์—์„œ ์‚ฌ์šฉํ•œ ๋ฐ์ดํ„ฐ์…‹์—๋Š” ๋ณ€์ˆ˜๊ฐ€ ๋„ˆ๋ฌด ์ ๊ธฐ ๋•Œ๋ฌธ์— ํ•˜๋‚˜์˜ ์—ญ์„ ์ •ํ•ด์„œ ๊ฐ•์ˆ˜ ์—ฌ๋ถ€๋ฅผ ์ถ”๊ฐ€

import pandas as pd

class Weather():
def adding_y(self):
data = pd.read_csv('./data/rate/๊ฐ•๋‚จ์—ญ.csv')
rain = pd.DataFrame(pd.read_csv('./data/extremum_20230306105846.csv', encoding= 'euc-kr')['์ผ๊ฐ•์ˆ˜๋Ÿ‰(mm)'])
print(f'์›๋ณธ ์ธ๋ฑ์Šค ์ˆ˜ : {len(data.index)}')
print(f"๊ฐ•์ˆ˜๋Ÿ‰ ์ธ๋ฑ์Šค ์ˆ˜ : {len(pd.read_csv('./data/extremum_20230306105846.csv', encoding= 'euc-kr').index)}")
rain_data = rain.rename(columns={'์ผ๊ฐ•์ˆ˜๋Ÿ‰(mm)': '๊ฐ•์ˆ˜์—ฌ๋ถ€'})
add = []
for i in rain_data['๊ฐ•์ˆ˜์—ฌ๋ถ€']:
if i > 0.0:
add.append(i)
else:
add.append(i)
rain_add = pd.DataFrame(add, columns=['๊ฐ•์ˆ˜์—ฌ๋ถ€'])
df = pd.concat([data, rain_add], axis=1)
df.to_csv('./data/rain/๊ฐ•๋‚จ์—ญ.csv')


## 2.2 ํ•™์Šต
### 2.2.1 ๊ฒฐ๊ณผ
- ์„œ๋น„์Šค ํ•ญ๋ชฉ์ธ ์šฐ์‚ฐ๊ณผ ๋น„์Šทํ•˜๊ฒŒ ๋‚ ์”จ์— ์˜ํ–ฅ์„ ๋ฐ›๋Š” ์ƒํ’ˆ์˜ ์ˆ˜์š” ๋ณ€ํ™”๋ฅผ ๋ณ€์ˆ˜๋กœ ๋‘˜ ํ•„์š”๊ฐ€ ์žˆ๋‹ค

> **๋‚ ์”จ ์ถ”๊ฐ€ ํ›„**
![](https://velog.velcdn.com/images/boost_dev/post/8fb60c5a-1f69-41c9-b507-755a7925bb40/image.png)
๐Ÿ˜… ๋‹ค์‹œ ์ƒ๊ฐํ•ด๋ณด๋‹ˆ ๋‚ ์”จ๊ฐ€ ์ง€ํ•˜์ฒ ์„ ์ด์šฉ๋Ÿ‰์— ์˜ํ–ฅ์„ ๋ฏธ์น˜์ง€ ์•Š๊ณ  ์ด๊ฑธ ๋ฝ‘์•„๋‚ด๋”๋ผ๋„ ์šฐ์‚ฐ ๋Œ€์—ฌ์™€ ๊ด€๋ จ์ด ์—†๋‹ค

---
# 3. 3์ฐจ(๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ์ •๋ณด ์ถ”๊ฐ€)
## 3.1 ์ „์ฒ˜๋ฆฌ
> ์šฐ์‚ฐ๊ณผ ์ˆ˜์š”๊ฐ€ ๋ฐ˜๋น„๋ก€ํ•˜๋Š” ๋”ฐ๋ฆ‰์ด์˜ ์ž๋ฃŒ๋ฅผ ๊ฐ€์ ธ์˜ค๊ณ  y๊ฐ’์„ ์Šนํ•˜์ฐจ ๋ณ€๋™๋ฅ ์ด ์•„๋‹ˆ๋ผ ๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ๋Ÿ‰์œผ๋กœ ๋ฐ”๊พธ๊ธฐ ์œ„ํ•ด ์ „์ฒ˜๋ฆฌ ๊ณผ์ •์„ ๊ฑฐ์นจ'

### 3.1.1 ์„œ์šธ์—ญ ์Šนํ•˜์ฐจ์ •๋ณด ์ถ”์ถœ
- ์„œ์šธ์—ญ์˜ ์—ญ๋ณ„ ์Šนํ•˜์ฐจ ์ •๋ณด์™€ ๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ์†Œ ์œ„์น˜์ •๋ณด ๊ฐ„์˜ ์—ฐ๊ฒฐ์ด ๊ฐ€์žฅ ์ˆ˜์›”ํ•˜๊ธฐ ๋•Œ๋ฌธ์— ๊ฐ•๋‚จ์—ญ์„ ๋Œ€์ƒ์œผ๋กœ ํ•œ ์ด์ „ ์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๋ฅผ ๋ฒ„๋ฆฌ๊ณ  ์„œ์šธ์—ญ์œผ๋กœ ๋‹ค์‹œ ์ง„ํ–‰
      data = self.passengers.sort_values(by='์—ญ๋ช…')  # ์—ญ ์ด๋ฆ„๋ณ„๋กœ ์ •๋ ฌ
      data = data[data["๋…ธ์„ ๋ช…"] == "1ํ˜ธ์„ "]    # ํ˜ธ์„ ๋งŒ ๋ฐ˜ํ™˜
      data = data[data["์—ญ๋ช…"] == "์„œ์šธ์—ญ"]    # ์„œ์šธ์—ญ๋งŒ ๋ฐ˜ํ™˜
      print(sorted_csv.isnull().sum())        # ๋„ ์ฒดํฌ
      sorted_data = data.sort_values(by='์‚ฌ์šฉ์ผ์ž')
      print(sorted_data)
      sorted_data.to_csv(f"{self.save_dir}/sorted_์„œ์šธ์—ญ.csv", index=False)
![](https://velog.velcdn.com/images/boost_dev/post/5d2772b1-1a19-434d-b9eb-c8564e49c3f1/image.png)

### 3.1.2 ๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ๋Ÿ‰ ์ถ”์ถœ
> 1๋…„์น˜ ๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ์ •๋ณด๊ฐ€ ํ•˜๋ฃจ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ์ ๊ฒŒ๋Š” 5๋งŒ์—์„œ 20๋งŒ๊ฐœ์˜ ๋ฐ์ดํ„ฐ๊ฐ€ ๋ชจ์—ฌ์žˆ์–ด์„œ ์ „์ฒ˜๋ฆฌ๊ฐ€ ํ•„์ˆ˜
![](https://velog.velcdn.com/images/boost_dev/post/7c7c9a98-a0dd-4c31-9033-f82ede25c224/image.png)

- ์„œ์šธ์—ญ ์Šนํ•˜์ฐจ ์ •๋ณด์™€ ๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ๋Ÿ‰ join
  data = self.passengers.sort_values(by='์—ญ๋ช…')  # ์—ญ ์ด๋ฆ„๋ณ„๋กœ ์ •๋ ฌ
  data = data[data["๋…ธ์„ ๋ช…"] == "1ํ˜ธ์„ "]    # ํ˜ธ์„ ๋งŒ ๋ฐ˜ํ™˜
  data = data[data["์—ญ๋ช…"] == "์„œ์šธ์—ญ"]    # ์„œ์šธ์—ญ๋งŒ ๋ฐ˜ํ™˜
  print(sorted_csv.isnull().sum())        # ๋„ ์ฒดํฌ
  sorted_data = data.sort_values(by='์‚ฌ์šฉ์ผ์ž')
  print(sorted_data)
  sorted_data.to_csv(f"{self.save_dir}/sorted_์„œ์šธ์—ญ.csv", index=False)
  ttareungi = pd.read_csv(f"{self.save_dir}/์„œ์šธ์—ญ_๋”ฐ๋ฆ‰์ด_๋Œ€์—ฌ๋Ÿ‰.csv")
  ttareungi = ttareungi['๋”ฐ๋ฆ‰์ด ๋Œ€์—ฌ๋Ÿ‰']
  join_df = pd.concat([sorted_data, ttareungi], axis=1)
  join_df.to_csv(f"{self.save_dir}/ttareungi_์„œ์šธ์—ญ.csv", index=False)

- 365๊ฐœ์˜ csv ํŒŒ์ผ ์ค‘ ์ผ๋ถ€ ํŒŒ์ผ์˜ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ, ์ธ์ฝ”๋”ฉ ๋ฐฉ์‹์ด ์ผ์น˜ํ•˜์ง€ ์•Š์•„์„œ ์—๋Ÿฌ ๋ฐœ์ƒ
![](https://velog.velcdn.com/images/boost_dev/post/876b8d0a-e85f-4252-a11e-606e444c09dd/image.png)
>์•ž์˜ ์ฝ”๋“œ ์•ž์— ์•„๋ž˜ ์ฝ”๋“œ ์ถ”๊ฐ€
>```
        dirs = os.listdir('.\\data\\tpss')
        for i in dirs:  # ์›”
            dir = os.listdir(f'.\\data\\tpss\\{i}')
            for j in dir:  # ์ผ
                try:
                    df = pd.read_csv(f'.\\data\\tpss\\{i}\\{j}', encoding='cp949')
                    if df.columns[2] != '์‹œ์ž‘_๋Œ€์—ฌ์†Œ_ID':
                        df.rename(columns={df.columns[2]: '์‹œ์ž‘_๋Œ€์—ฌ์†Œ_ID'}, inplace=True)
                        df.to_csv(f'.\\data\\tpss\\{i}\\{j}')
                except UnicodeDecodeError as df:
                    df = pd.read_csv(f'.\\data\\tpss\\{i}\\{j}', encoding='utf8')
                    if df.columns[2] != '์‹œ์ž‘_๋Œ€์—ฌ์†Œ_ID':
                        df.rename(columns={df.columns[2]: '์‹œ์ž‘_๋Œ€์—ฌ์†Œ_ID'}, inplace=True)
                        df.to_csv(f'.\\data\\tpss\\{i}\\{j}')
                print(f'{j}์ผ ์ข…๋ฃŒ')


## 3.2 ํ•™์Šต 
### 3.2.1 ๊ฒฐ๊ณผ
- epoch : 80
![](https://velog.velcdn.com/images/boost_dev/post/567d9f02-6383-4b39-ab7b-6c6e30c71be5/image.png)
- epoch : 800
![](https://velog.velcdn.com/images/boost_dev/post/465f6fed-ee14-409a-bf84-08963b0e02f9/image.png)

> ๋‚ ์”จ ๋ฐ์ดํ„ฐ๊ฐ€ ๋น ์ง„ ์ฑ„๋กœ ํ•™์Šตํ•ด์„œ ๊ทธ๋Ÿฐ๊ฑฐ๋ผ๊ณ  ์ƒ๊ฐ๋œ๋‹ค

---
# 4. 4์ฐจ(๊ธฐ์˜จ, ํ’์† ์ถ”๊ฐ€)
## 4.1 ์ „์ฒ˜๋ฆฌ
- 3์ฐจ ์‹œ๊ธฐ์˜ ๋ฐ์ดํ„ฐ์— ๊ธฐ์˜จ, ํ’์† ๋“ฑ ๋ณ€์ˆ˜ ์ถ”๊ฐ€
![](https://velog.velcdn.com/images/boost_dev/post/4faa5329-023b-40d8-a3aa-9e187b9d51e7/image.png)

## 4.2 ํ•™์Šต
### 4.2.1 ๊ฒฐ๊ณผ
- 3์ฐจ ์‹œ๋„ ๋•Œ๋ณด๋‹ค๋Š” loss๊ฐ€ ๊ฐ์†Œํ–ˆ๋‹ค
![](https://velog.velcdn.com/images/boost_dev/post/aa08603e-68bd-4081-8fe0-94cafba9d87a/image.png)


### 4.2.2 epoch, batch_size์— ๋”ฐ๋ฅธ ๊ฒฐ๊ณผ ๋น„๊ต
- 400,30
![](https://velog.velcdn.com/images/boost_dev/post/3b52bfce-0787-4997-9c9b-af10a3e0501e/image.png)

- 700,30
![](https://velog.velcdn.com/images/boost_dev/post/f24a2a06-d159-47ab-86c5-41c0c6144a56/image.png)

- 400,20
![](https://velog.velcdn.com/images/boost_dev/post/1b227b0a-86c8-45a1-ba30-0100cddd3ddf/image.png)
- 300, 20
![](https://velog.velcdn.com/images/boost_dev/post/095d4395-1ebf-4dcd-9001-d47374e3dff5/image.png)
- 500, 20
![](https://velog.velcdn.com/images/boost_dev/post/e443fdce-9a86-4a33-a56a-0174cc600f3f/image.png)
- 600, 12
![](https://velog.velcdn.com/images/boost_dev/post/d3d802ef-96e6-4cc7-8985-2ad9045b6d95/image.png)
- 800, 15
![](https://velog.velcdn.com/images/boost_dev/post/9c3f1390-29fa-46fd-b29d-9d1b4947e6da/image.png)

-4000, 64
![](https://velog.velcdn.com/images/boost_dev/post/1bd08963-c708-4140-a60a-5450a3e1645a/image.png)


![](https://velog.velcdn.com/images/boost_dev/post/ff965f0a-9dde-499e-afc3-7257e8ff9e2c/image.png)
### 4.2.3 batch size
> ๊ฐ ์‚ฌ์ด์ฆˆ ํ…Œ์ŠคํŠธ์‹œ ์—ํฌํฌ๋Š” 1000	
      history = model.fit(train_X, train_y, batch_size=32, epochs=10, validation_split=0.2)

      # ์†์‹ค ๊ทธ๋ž˜ํ”„
      plt.plot(history.history['loss'])
      plt.plot(history.history['val_loss'])
      plt.title('Model loss')
      plt.ylabel('Loss')
      plt.xlabel('Epoch')
      plt.legend(['Train', 'Validation'], loc='upper left')
      plt.show()

      # ์ •ํ™•๋„ ๊ทธ๋ž˜ํ”„
      plt.plot(history.history['accuracy'])
      plt.plot(history.history['val_accuracy'])
      plt.title('Model accuracy')
      plt.ylabel('Accuracy')
      plt.xlabel('Epoch')
      plt.legend(['Train', 'Validation'], loc='upper left')
- batch : 64
![](https://velog.velcdn.com/images/boost_dev/post/71615817-29d3-4820-8d3e-70c168ed92a3/image.png)
- batch : 30
![](https://velog.velcdn.com/images/boost_dev/post/9dbe1f09-b186-4fb1-8508-234356145144/image.png)

- batch : 20	
![](https://velog.velcdn.com/images/boost_dev/post/b37ae3aa-972d-4a92-861b-2d5d7bbf8080/image.png)
- batch : 15
![](https://velog.velcdn.com/images/boost_dev/post/ab06cc2c-1ff5-4657-9ff0-12d69854254d/image.png)

### 4.2.4 ํ•˜๋‚˜์”ฉ ์ฐพ์•„๋ณด๊ธฐ ํž˜๋“ค์–ด์กŒ๋‹ค ํ•œ ๋ฒˆ์— ์•Œ์•„๋ณด์ž
      batch_sizes = [๋น„๊ตํ•  batch ์‚ฌ์ด์ฆˆ๋“ค]
      losses = []
      for batch_size in batch_sizes:
          history = model.fit(train_X, train_y, epochs=10, batch_size=batch_size, verbose=0)
          losses.append(history.history['loss'])

      for i in range(len(batch_sizes)):
          plt.plot(np.arange(1, 11), losses[i], label='batch_size=' + str(batch_sizes[i]))
      plt.title('Model loss by batch size')
      plt.ylabel('Loss')
      plt.xlabel('Epoch')
      plt.legend(loc='upper right')
      plt.show()
- 16 ๋‹จ์œ„๋กœ batch ์‚ฌ์ด์ฆˆ ๋น„๊ต
![](https://velog.velcdn.com/images/boost_dev/post/4a2197eb-f4a1-4a6c-b1a5-45522c3eac0b/image.png)

- 5 ๋‹จ์œ„๋กœ ๋น„๊ต
![](https://velog.velcdn.com/images/boost_dev/post/977179e2-71a9-4a1c-bfff-b83db24fc8d7/image.png)


---
# ?. ์—๋Ÿฌ๐Ÿ˜ณ
## ?.1 ํ•œ๊ธ€ ๊นจ์ง 
![](https://velog.velcdn.com/images/boost_dev/post/2b2e90fa-db6a-4953-9ab0-f86f403f96f5/image.png)

> ๋ฌธ์ œ ๋ฐœ์ƒ์ง€์ ์— 
```, family='Malgun Gothic'``` ์ถ”๊ฐ€

## ?.2 ValueError: Length of values (18200) dose not match length of index (18250)

>#### **์›์ธ**
> - ์ด์ „ ์ผ์ž์™€ ๋น„๊ตํ•ด์„œ y๊ฐ’์„ ๋งŒ๋“ค๋ ค๊ณ  ํ–ˆ์œผ๋‚˜ i์™€ i+1์˜ ์—ญ์ด๋ฆ„์ด ๋‹ค๋ฅธ ์ง€์ ๋“ค์—์„œ๋Š” nall๊ฐ’์ด ๋ฐœ์ƒ
![](https://velog.velcdn.com/images/boost_dev/post/dda4dcac-1a53-4648-b2fc-35d14a00e5f7/image.png)
> - ์ •ํ™•ํžˆ ์—ญ์˜ ์ข…๋ฅ˜๋งŒํผ value์˜ ๊ธธ์ด๊ฐ€ ์ค„์–ด๋“ค์—ˆ๋‹ค๋Š” ๊ฒƒ์„ ํ™•์ธ
> ---
> #### **๊ณ„ํš ๋ณ€๊ฒฝ**
- ๋ชจ๋“  ์—ญ์„ ํ•œ ๋ฒˆ์— ๋น„๊ตํ•˜๊ธฐ๋ณด๋‹ค ๊ฐ ์—ญ๋ณ„ ์ด๋™์ธ๊ตฌ ๋ณ€๋™์œจ์„ ์˜ˆ์ธกํ•˜๊ณ  ์ด๋ฅผ ๋น„๊ตํ•˜๋Š” ๋ฐฉํ–ฅ์œผ๋กœ ๋ณ€๊ฒฝ
	- 2.1.2์—์„œ y๊ฐ’ ์ถ”๊ฐ€์‹œ ์ธ๋ฑ์Šค์˜ ๊ธธ์ด๊ฐ€ ๋งž์ง€ ์•Š์•„ ๋ฐœ์ƒํ•œ ์—๋Ÿฌ ํ•ด๊ฒฐ
	- ๋ชจ๋“  ์—ญ์„ ํ•œ ๋ฒˆ์— ๋น„๊ตํ•  ๊ฒฝ์šฐ ํšจ์œจ์ด ๋‚ฎ์Œ

## ?.3
![](https://velog.velcdn.com/images/boost_dev/post/c8ee11b3-f29c-41a9-a20c-5286ff7b4a09/image.png)
> ์›์ธ
365๊ฐœ์˜ csv ํŒŒ์ผ ์ค‘ ์ผ๋ถ€ ํŒŒ์ผ์˜ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ, ์ธ์ฝ”๋”ฉ ๋ฐฉ์‹์ด ์ผ์น˜ํ•˜์ง€ ์•Š์•„์„œ ์—๋Ÿฌ ๋ฐœ์ƒ

> ํ•ด๊ฒฐ
๋”ฐ๋กœ ์ •ํ•ด์ง„ ๋ฐฉ์‹์€ ์—†๊ณ  ์•„๋ž˜ ์ฝ”๋“œ๋ฅผ ํ†ตํ•ด ํ•ด๊ฒฐ

dirs = os.listdir('.\data\tpss')
for i in dirs: # ์›”
dir = os.listdir(f'.\data\tpss\{i}')
for j in dir: # ์ผ
try:
df = pd.readcsv(f'.\data\tpss\{i}\{j}', encoding='cp949')
if df.columns[2] != '์‹œ์ž‘
๋Œ€์—ฌ์†ŒID':
df.rename(columns={df.columns[2]: '์‹œ์ž‘
๋Œ€์—ฌ์†ŒID'}, inplace=True)
df.to_csv(f'.\data\tpss\{i}\{j}')
except UnicodeDecodeError as df:
df = pd.read_csv(f'.\data\tpss\{i}\{j}', encoding='utf8')
if df.columns[2] != '์‹œ์ž‘
๋Œ€์—ฌ์†ŒID':
df.rename(columns={df.columns[2]: '์‹œ์ž‘
๋Œ€์—ฌ์†Œ_ID'}, inplace=True)
df.to_csv(f'.\data\tpss\{i}\{j}')
print(f'{j}์ผ ์ข…๋ฃŒ')


## ?.4 TypeError: numpy boolean subtract,
> ํ•ด๊ฒฐ
MinMaxScale ๊ณผ์ •์—์„œ String๊ฐ’์„ ๊ฐ€์ง€๋Š” ์ปฌ๋Ÿผ๊นŒ์ง€ ํฌํ•จ๋ผ์„œ ๋ฐœ์ƒํ•œ ๋ฌธ์ œ๋กœ int๊ฐ’๋งŒ ์‚ฌ์šฉ๋  ์ˆ˜ ์žˆ๋„๋ก ๋ฐ”๊ฟ”์ค€๋‹ค

        

0๊ฐœ์˜ ๋Œ“๊ธ€