ํฌ์ผ๋ชฌ
์ฌ๊ธฐ์๋ numpy, pandas๋ฅผ ํ์ฉํ ์ค ์์์ผ ํ๊ณ
pandas์ ๋ฌธ๋ฒ๊ณผ ๋ฉ์๋์ ๋ํด ์์์ผ ํ๊ณ
matplotib๋ฅผ ํ์ฉํด์ ๋ฐ์ดํฐ ์๊ฐํ๋ฅผ ํ ์ค ์๊ณ
๋ฐ์ดํฐ์
์ train/test๋ก ๋๋ ์ ํ์ต์ด๋ ๊ฒ์ฆ์ ํ ์ค ์์์ผ ํด
Exploratory Data Analysis : EDA(ํ์์ ๋ฐ์ดํฐ ๋ถ์)
- ํฌ์ผ๋ชฌ ๋ฐ์ดํฐ์
- ํฌ์ผ๋ชฌ์ ์ด๋ฆ, ์์ฑ
- ์คํฏ
๋ฐ์ดํฐ ๊ตฌํ๊ธฐ
- ๋ช๊ฐ์ ํผ์ณ๊ฐ ์๋๊ฐ
Import(๋ผ์ด๋ธ๋ฌ๋ฆฌ ๊ฐ์ ธ์ค๊ธฐ)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
print('์๋ฃ')
์๋ฃ
๋ฐ์ดํฐ์
์ Pandas๋ก ๋ถ๋ฌ์ค๊ธฐ.
import os
csv_path = "./pokemon_eda/data/Pokemon.csv"
original_data = pd.read_csv(csv_path)
print('์=3')
์=3
import pandas as ํผ๋
๋ฐ์ดํฐํ์ผ๊ฒฝ๋ก = "./pokemon_eda/data/Pokemon.csv"
๋ชจ์
๋์๋ก๋ง๋คํ
์ดํฐํ๋ ์ = ํผ๋.read_csv(๋ฐ์ดํฐํ์ผ๊ฒฝ๋ก)
print('์๋ฃ')
์๋ฃ
pokemon = original_data.copy()
print(pokemon.shape)
pokemon.head()
(800, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
1 |
Bulbasaur |
Grass |
Poison |
318 |
45 |
49 |
49 |
65 |
65 |
45 |
1 |
False |
1 |
2 |
Ivysaur |
Grass |
Poison |
405 |
60 |
62 |
63 |
80 |
80 |
60 |
1 |
False |
2 |
3 |
Venusaur |
Grass |
Poison |
525 |
80 |
82 |
83 |
100 |
100 |
80 |
1 |
False |
3 |
3 |
VenusaurMega Venusaur |
Grass |
Poison |
625 |
80 |
100 |
123 |
122 |
120 |
80 |
1 |
False |
4 |
4 |
Charmander |
Fire |
NaN |
309 |
39 |
52 |
43 |
60 |
50 |
65 |
1 |
False |
๋ณ์ใ
กํฌ์ผ๋ชฌ = ๋ชจ์
๋์๋ก๋ง๋คํ
์ดํฐํ๋ ์.copy()
print(๋ณ์ใ
กํฌ์ผ๋ชฌ.shape)
๋ณ์ใ
กํฌ์ผ๋ชฌ.head()
(800, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
1 |
Bulbasaur |
Grass |
Poison |
318 |
45 |
49 |
49 |
65 |
65 |
45 |
1 |
False |
1 |
2 |
Ivysaur |
Grass |
Poison |
405 |
60 |
62 |
63 |
80 |
80 |
60 |
1 |
False |
2 |
3 |
Venusaur |
Grass |
Poison |
525 |
80 |
82 |
83 |
100 |
100 |
80 |
1 |
False |
3 |
3 |
VenusaurMega Venusaur |
Grass |
Poison |
625 |
80 |
100 |
123 |
122 |
120 |
80 |
1 |
False |
4 |
4 |
Charmander |
Fire |
NaN |
309 |
39 |
52 |
43 |
60 |
50 |
65 |
1 |
False |
์ ์ค์ ํฌ์ผ๋ชฌ์ธ์ง ์๋์ง๋ง ํ์ธ
legendary = pokemon[pokemon["Legendary"] == True].reset_index(drop=True)
print(legendary.shape)
legendary.head()
(65, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
144 |
Articuno |
Ice |
Flying |
580 |
90 |
85 |
100 |
95 |
125 |
85 |
1 |
True |
1 |
145 |
Zapdos |
Electric |
Flying |
580 |
90 |
90 |
85 |
125 |
90 |
100 |
1 |
True |
2 |
146 |
Moltres |
Fire |
Flying |
580 |
90 |
100 |
90 |
125 |
85 |
90 |
1 |
True |
3 |
150 |
Mewtwo |
Psychic |
NaN |
680 |
106 |
110 |
90 |
154 |
90 |
130 |
1 |
True |
4 |
150 |
MewtwoMega Mewtwo X |
Psychic |
Fighting |
780 |
106 |
190 |
100 |
154 |
100 |
130 |
1 |
True |
๋ ์ ๋๋ฆฌ = ๋ณ์ใ
กํฌ์ผ๋ชฌ[๋ณ์ใ
กํฌ์ผ๋ชฌ["Legendary"] == True].reset_index(drop=True)
print(๋ ์ ๋๋ฆฌ.shape)
๋ ์ ๋๋ฆฌ.head()
(65, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
144 |
Articuno |
Ice |
Flying |
580 |
90 |
85 |
100 |
95 |
125 |
85 |
1 |
True |
1 |
145 |
Zapdos |
Electric |
Flying |
580 |
90 |
90 |
85 |
125 |
90 |
100 |
1 |
True |
2 |
146 |
Moltres |
Fire |
Flying |
580 |
90 |
100 |
90 |
125 |
85 |
90 |
1 |
True |
3 |
150 |
Mewtwo |
Psychic |
NaN |
680 |
106 |
110 |
90 |
154 |
90 |
130 |
1 |
True |
4 |
150 |
MewtwoMega Mewtwo X |
Psychic |
Fighting |
780 |
106 |
190 |
100 |
154 |
100 |
130 |
1 |
True |
ordinary = pokemon[pokemon["Legendary"] == False].reset_index(drop=True)
print(ordinary.shape)
ordinary.head()
(735, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
1 |
Bulbasaur |
Grass |
Poison |
318 |
45 |
49 |
49 |
65 |
65 |
45 |
1 |
False |
1 |
2 |
Ivysaur |
Grass |
Poison |
405 |
60 |
62 |
63 |
80 |
80 |
60 |
1 |
False |
2 |
3 |
Venusaur |
Grass |
Poison |
525 |
80 |
82 |
83 |
100 |
100 |
80 |
1 |
False |
3 |
3 |
VenusaurMega Venusaur |
Grass |
Poison |
625 |
80 |
100 |
123 |
122 |
120 |
80 |
1 |
False |
4 |
4 |
Charmander |
Fire |
NaN |
309 |
39 |
52 |
43 |
60 |
50 |
65 |
1 |
False |
์ผ๋ฐ๋๋ฆฌ = ๋ณ์ใ
กํฌ์ผ๋ชฌ[๋ณ์ใ
กํฌ์ผ๋ชฌ["Legendary"] == False].reset_index(drop=True)
print(์ผ๋ฐ๋๋ฆฌ.shape)
์ผ๋ฐ๋๋ฆฌ.head()
(735, 13)
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
0 |
1 |
Bulbasaur |
Grass |
Poison |
318 |
45 |
49 |
49 |
65 |
65 |
45 |
1 |
False |
1 |
2 |
Ivysaur |
Grass |
Poison |
405 |
60 |
62 |
63 |
80 |
80 |
60 |
1 |
False |
2 |
3 |
Venusaur |
Grass |
Poison |
525 |
80 |
82 |
83 |
100 |
100 |
80 |
1 |
False |
3 |
3 |
VenusaurMega Venusaur |
Grass |
Poison |
625 |
80 |
100 |
123 |
122 |
120 |
80 |
1 |
False |
4 |
4 |
Charmander |
Fire |
NaN |
309 |
39 |
52 |
43 |
60 |
50 |
65 |
1 |
False |
๊ฒฐ์ธก์น ํ์ธํ๊ธฐ
pokemon.isnull().sum()
# 0
Name 0
Type 1 0
Type 2 386
Total 0
HP 0
Attack 0
Defense 0
Sp. Atk 0
Sp. Def 0
Speed 0
Generation 0
Legendary 0
dtype: int64
๋ณ์ใ
กํฌ์ผ๋ชฌ.isnull().sum()
# 0
Name 0
Type 1 0
Type 2 386
Total 0
HP 0
Attack 0
Defense 0
Sp. Atk 0
Sp. Def 0
Speed 0
Generation 0
Legendary 0
dtype: int64
์ ์ฒด ์ปฌ๋ผ ์ดํดํ๊ธฐ
print(len(pokemon.columns))
pokemon.columns
13
Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
dtype='object')
print(len(๋ณ์ใ
กํฌ์ผ๋ชฌ.columns))
๋ณ์ใ
กํฌ์ผ๋ชฌ.columns
13
Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
dtype='object')
- # : ํฌ์ผ๋ชฌ Id number. ์ฑ๋ณ์ด ๋ค๋ฅด์ง๋ง ๊ฐ์ ํฌ์ผ๋ชฌ์ธ ๊ฒฝ์ฐ ๋ฑ์ ๊ฐ์ #๊ฐ์ ๊ฐ์ง๋ค. int
- Name : ํฌ์ผ๋ชฌ ์ด๋ฆ. ํฌ์ผ๋ชฌ ๊ฐ๊ฐ์ ์ด๋ฆ์ผ๋ก ์ ์ฅ๋๊ณ , 800๊ฐ์ ํฌ์ผ๋ชฌ์ ์ด๋ฆ ๋ฐ์ดํฐ๋ ๋ชจ๋ ๋ค๋ฅด๋ค. (unique) str
- Type 1 : ์ฒซ ๋ฒ์งธ ์์ฑ. ์์ฑ์ ํ๋๋ง ๊ฐ์ง๋ ๊ฒฝ์ฐ Type 1์ ์
๋ ฅ๋๋ค. str
- Type 2 : ๋ ๋ฒ์งธ ์์ฑ. ์์ฑ์ ํ๋๋ง ๊ฐ์ง๋ ํฌ์ผ๋ชฌ์ ๊ฒฝ์ฐ Type 2๋ NaN(๊ฒฐ์ธก๊ฐ)์ ๊ฐ์ง๋ค. str
- Total : ์ ์ฒด 6๊ฐ์ง ์คํฏ์ ์ดํฉ. int
- HP : ํฌ์ผ๋ชฌ์ ์ฒด๋ ฅ. int
- Attack : ๋ฌผ๋ฆฌ ๊ณต๊ฒฉ๋ ฅ. (scratch, punch ๋ฑ) int
- Defense : ๋ฌผ๋ฆฌ ๊ณต๊ฒฉ์ ๋ํ ๋ฐฉ์ด๋ ฅ. int
- Sp. Atk : ํน์ ๊ณต๊ฒฉ๋ ฅ. (fire blast, bubble beam ๋ฑ) int
- Sp. Def : ํน์ ๊ณต๊ฒฉ์ ๋ํ ๋ฐฉ์ด๋ ฅ. int
- Speed : ํฌ์ผ๋ชฌ ๋งค์น์ ๋ํด ์ด๋ค ํฌ์ผ๋ชฌ์ด ๋จผ์ ๊ณต๊ฒฉํ ์ง๋ฅผ ๊ฒฐ์ . (๋ ๋์ ํฌ์ผ๋ชฌ์ด ๋จผ์ ๊ณต๊ฒฉํ๋ค) int
- Generation : ํฌ์ผ๋ชฌ์ ์ธ๋. ํ์ฌ ๋ฐ์ดํฐ์๋ 6์ธ๋๊น์ง ์๋ค. int
- Legendary : ์ ์ค์ ํฌ์ผ๋ชฌ ์ฌ๋ถ. !! Target feature !! bool
# ์ปฌ๋ผ : ID numbers
len(set(pokemon["#"]))
721
len(set(๋ณ์ใ
กํฌ์ผ๋ชฌ["#"]))
721
800๋ณด๋ค ์์ 721์ ๊ฐ์ง๋ ๊ฒ์ผ๋ก ๋ณด์ ID๊ฐ ๊ฐ์ ๊ฒ์ด ์กด์ฌํจ์ ์ ์ ์์
pokemon[pokemon["#"] == 6]
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
6 |
6 |
Charizard |
Fire |
Flying |
534 |
78 |
84 |
78 |
109 |
85 |
100 |
1 |
False |
7 |
6 |
CharizardMega Charizard X |
Fire |
Dragon |
634 |
78 |
130 |
111 |
130 |
85 |
100 |
1 |
False |
8 |
6 |
CharizardMega Charizard Y |
Fire |
Flying |
634 |
78 |
104 |
78 |
159 |
115 |
100 |
1 |
False |
๋ณ์ใ
กํฌ์ผ๋ชฌ[๋ณ์ใ
กํฌ์ผ๋ชฌ["#"] == 6]
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
6 |
6 |
Charizard |
Fire |
Flying |
534 |
78 |
84 |
78 |
109 |
85 |
100 |
1 |
False |
7 |
6 |
CharizardMega Charizard X |
Fire |
Dragon |
634 |
78 |
130 |
111 |
130 |
85 |
100 |
1 |
False |
8 |
6 |
CharizardMega Charizard Y |
Fire |
Flying |
634 |
78 |
104 |
78 |
159 |
115 |
100 |
1 |
False |
Name ์ปฌ๋ผ : ์ด๋ฆ
- pokemon["Name"]์ ์งํฉ(set)์ผ๋ก ๋ง๋ค์ด ์ค ํ ๊ธธ์ด(len)๋ฅผ ํ์ธํ๋ฉด ์ค๋ณต์ด ์ฌ๋ผ์ง๋ฉด์ ์ ์ผํ ์ด๋ฆ ๊ฐ์ ํ์ธ ๊ฐ๋ฅ
len(set(pokemon["Name"]))
800
len(set(๋ณ์ใ
กํฌ์ผ๋ชฌ["Name"]))
800
์ ๋ํฌ ํ๊ตฐ
Type 1 & Type 2 : ํฌ์ผ๋ชฌ์ ์์ฑ
๋ฐ์ดํฐ ์ง์ ๋ณด๊ธฐ
pokemon.loc[[6, 10]]
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
6 |
6 |
Charizard |
Fire |
Flying |
534 |
78 |
84 |
78 |
109 |
85 |
100 |
1 |
False |
10 |
8 |
Wartortle |
Water |
NaN |
405 |
59 |
63 |
80 |
65 |
80 |
58 |
1 |
False |
๋ณ์ใ
กํฌ์ผ๋ชฌ.loc[[6, 10]]
|
# |
Name |
Type 1 |
Type 2 |
Total |
HP |
Attack |
Defense |
Sp. Atk |
Sp. Def |
Speed |
Generation |
Legendary |
6 |
6 |
Charizard |
Fire |
Flying |
534 |
78 |
84 |
78 |
109 |
85 |
100 |
1 |
False |
10 |
8 |
Wartortle |
Water |
NaN |
405 |
59 |
63 |
80 |
65 |
80 |
58 |
1 |
False |
len(list(set(pokemon["Type 1"]))), len(list(set(pokemon["Type 2"])))
(18, 19)
len(list(set(๋ณ์ใ
กํฌ์ผ๋ชฌ["Type 1"]))), len(list(set(๋ณ์ใ
กํฌ์ผ๋ชฌ["Type 2"])))
(18, 19)
ํ์
2๊ฐ ํ๊ฐ์ง ๋ ๋ง์, ๊ทธ๊ฒ ๋ญ๊น?
์ด๋ป๊ฒ ํ์ธํ์ง?
๊ณตํต์ ์ธ ๊ฑด ๋บด๊ณ ๋ค๋ฅธ๊ฒ๋ง ์ถ๋ ฅํ์
ํ์ด์ฌ ์ฐจ์งํฉ ํจ์(set difference)
https://www.w3schools.com/python/ref_set_difference.asp
File "/var/folders/59/gjb3x8rx30s2cxwfl3zh2m040000gn/T/ipykernel_5431/1204803596.py", line 1
https://www.w3schools.com/python/ref_set_difference.asp
^
SyntaxError: invalid syntax