고객사는 ## 은행입니다. 신용평가 업무를 인공지능으로 전환하고자 여러분에게 모델링을 의뢰하였습니다.
대출업무는
현장의 요구
신용평가 업무를 인공지능으로 전환
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.mosaicplot import mosaic
import scipy.stats as spst
path = 'https://raw.githubusercontent.com/DA4BAM/dataset/master/credit_all.csv'
df = pd.read_csv(path)
df.loc[df['Payment'] == 4 , 'Payment'] = 3 # 신용등급 4 -> 3 으로 변경
df.drop(['Telephone','ForeignWorker','Dependents'], axis = 1, inplace = True) # 열을 삭제
df.head()
def univariate_fuction(target , df , bins = 30):
display(df[[target]].describe().T)
print('-' * 70)
plt.figure(figsize = (8,16))
plt.subplot(3,1,1)
sns.histplot(x = target , data = df , bins = bins)
plt.title(f'histogram of {target}')
plt.xlabel(target)
plt.subplot(3,1,2)
sns.kdeplot(df[target])
plt.title(f'kdechart of {target}')
plt.xlabel(target)
plt.subplot(3,1,3)
sns.boxplot(df[target])
plt.title(f'boxchart of {target}')
plt.xlabel(target)
plt.grid()
plt.show()
def univariate_function2(target , df):
print(f'{target} 의 빈도')
display(df[target].value_counts())
print('-' * 70)
print(f'{target} 의 빈도율')
display(df[target].value_counts(normalize = True))
print('-' * 70)
plt.figure(figsize = (8,16))
plt.subplot(2,1,1)
sns.countplot(x = target , data = df)
plt.title(f'countplot of {target}')
plt.grid()
plt.subplot(2,1,2)
value = df[target].value_counts()
plt.pie(value ,labels = value.index,autopct = '%.2f%%')
def bivariate_function(var , target , df):
plt.figure(figsize = (8,16))
plt.subplot(2,1,1)
sns.kdeplot(x = var , data = df , hue = target , common_norm=False)
plt.grid()
plt.subplot(2,1,2)
sns.kdeplot(x = var , data = df , hue = target , multiple = 'fill')
plt.axhline(df[target].mean() , color = 'r')
plt.show()
def bivariate_function2(var,target ,data):
table = pd.crosstab(df[var] , df[target])
print(table)
mosaic(df , [var , target])
plt.axhline(df[target].mean() , color = 'r')
plt.show()
print(spst.chi2_contingency(table))