저번주 : Categorical descriptive feature(범주형)
오늘 : continuous descriptive feature(연속형), continuous일땐 같은 feature 다시 써도 됨
print((-2 * 2/7 * math.log2(2/7)) - (3/7 * math.log2(3/7)))
print(6/7 * (-1/6 * math.log2(1/6) - 1/2 * math.log2(1/2) - 1/3 * math.log2(1/3))) #750
print(2/7 * -math.log2(1/2) + 5/7 * (-1/5 * math.log2(1/5) -2*(2/5 * math.log2(2/5)))) #1350
print(3/7 * (-2/3*math.log2(2/3)-1/3*math.log2(1/3)) + 4/7*(-math.log2(1/2))) #2250
print(5/7 * (-2/5*math.log2(2/5)-3/5*math.log2(3/5))) #4175
print(4/7 * (-1/2*math.log2(1/2)-1/2*math.log2(1/4)) + 3/7*(-2/3*math.log2(2/3)-1/3*math.log2(1/3))) #stream
print(5/7*(-2/5*math.log2(1/5)-3/5*math.log2(3/5))) #slope
print(-2/5*math.log2(2/5)-3/5*math.log2(3/5))
print(4/5*(-1/4*math.log2(1/4)-3/4*math.log2(3/4))) #750
print(2/5*(-math.log2(1/2)) + 3/5*(-1/3*math.log2(1/3)-2/3*math.log2(2/3))) #1350
print(3/5*(-2/3*math.log2(2/3)-1/3*math.log2(1/3))) #2250, stream
print(4/5*(-1/4*math.log2(1/4)-3/4*math.log2(3/4))) #slope
print(-2/3*math.log2(2/3)-1/3*math.log2(1/3))
print(2/3*-math.log2(1/2))
#졸았음.....
#target feature값의 분산으로 ......
#season과 work day 중, 분기 이후의 분산이 더 작아지는 feature로 선택
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
#머신러닝 학습시, dataset은 train과 test로 나눠짐. train이 잘 됐는지 test로 확인
#머신러닝은 7~80%가 데이터와 연관되어 있음
#아이리스 데이터 불러오기, dir함수를 통해 아이리스가 어떤 데이터를 가지고 있는지 확인가능
iris = load_iris()
data, targets = iris.data, iris.target
print(data.shape, targets.shape, '\n')
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size = 0.2, random_state = 11)
#random_state : 랜덤하게 뽑힌 값을 고정해서 계속 쓰도록 설정
print(f"{type(X_train) = } / {X_train.shape = }")
print(f"{type(X_test) = } / {X_test.shape = }")
print(f"{type(y_train) = } / {y_train.shape = }")
print(f"{type(y_test) = } / {y_test.shape =}\n")
model = DecisionTreeClassifier()
#attribute(method) 뽑아내는 코드
for attr in dir(model):
if not attr.startswith("__"):
print(attr)
model.fit(X_train, y_train)
print("depth:", model.get_depth())
print("number of leaves", model.get_n_leaves)
accuracy = model.score(X_test, y_test)
#print(f"{accuracy = :.4f}")
#지니 이용했음.
import matplotlib.pyplot as plt
from sklearn import tree
plt.figure(figsize = (20, 15))
tree.plot_tree(model, class_names = iris.target_names,
feature_names = iris.feature_names,
impurity = True, filled = True,
rounded = True)
plt.show()
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
#머신러닝 학습시, dataset은 train과 test로 나눠짐. train이 잘 됐는지 test로 확인
#머신러닝은 7~80%가 데이터와 연관되어 있음
#아이리스 데이터 불러오기, dir함수를 통해 아이리스가 어떤 데이터를 가지고 있는지 확인가능
iris = load_iris()
data, targets = iris.data, iris.target
print(data.shape, targets.shape, '\n')
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size = 0.2, random_state = 11)
#random_state : 랜덤하게 뽑힌 값을 고정해서 계속 쓰도록 설정
print(f"{type(X_train) = } / {X_train.shape = }")
print(f"{type(X_test) = } / {X_test.shape = }")
print(f"{type(y_train) = } / {y_train.shape = }")
print(f"{type(y_test) = } / {y_test.shape =}\n")
model = DecisionTreeRegressor()
#attribute(method) 뽑아내는 코드
for attr in dir(model):
if not attr.startswith("__"):
print(attr)
model.fit(X_train, y_train)
print("depth:", model.get_depth())
print("number of leaves", model.get_n_leaves)
accuracy = model.score(X_test, y_test)
#print(f"{accuracy = :.4f}")
#지니 이용했음.
import matplotlib.pyplot as plt
from sklearn import tree
plt.figure(figsize = (20, 15))
tree.plot_tree(model, class_names = iris.target_names,
feature_names = iris.feature_names,
impurity = True, filled = True,
rounded = True)
plt.show()