Data 분할

素人·2022년 1월 3일
0

Data

목록 보기
2/30
from sklearn.model_selection import train_test_split

train_set , test_set 
= train_test_split(train, train_size = 0.8, shuffle = True)

print(train_set.shape, test_set.shape)
print('train: ',train_set.shape)
print('test: ',test_set.shape)
test_X = test_set[test_set.columns[:-1]]
test_y = test_set[test_set.columns[-1]]

print(test_X.shape,test_y.shape)
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=False)

for train_idx, valid_idx in kf.split(train_set.index):

  print('validation set이 될 데이터 index \n',valid_idx,'\n')
  print('train set이 될 데이터 index \n',train_idx,'\n')
kf = KFold(n_splits=5, shuffle=True, random_state = 777)

for train_idx, valid_idx in kf.split(train_set.index):

  print('validation set이 될 데이터 index \n',valid_idx,'\n')
  print('train set이 될 데이터 index \n',train_idx,'\n')
profile
매일 조금씩:)

0개의 댓글