transcription
Zillow EDA On Missing Values & Multicollinearity
dataTypeDf = pd.DataFrame(merged.dtypes.value_counts()).reset_index().rename(columns={"index":"variableType", 0:"count"})
fig, ax = plt.subplots()
fig.set_size_inches(20,5)
sn.barplot(data=dataTypeDf, x="variableType", y="count", ax=ax, color="#34495e")
ax.set(xlabel="varialbe Type", ylabel="Count", title="Variable Count Across Datatype")
missingValueColumns = merged.columns[merged.isnull().any()].tolist()
msno.bar(merged[missingValueColumns], \
figsize=(20,8), color="#34495e", fontsize=12, labels=True,)
msno.matrix(merged[missingValueColumns],width_ratios=(10,1),\
figsize=(20,8),color=(0,0, 0),fontsize=12,sparkline=True,labels=True)
msno.heatmap(merged[missingValueColumns], figsize=(20,20))
TypeError: Expecting a sequence of strings for feature names, got: <class 'numpy.ndarray'>
dtrain = xgb.DMatrix(train_X, train_y, feature_names=train_X.columns.values)
원인 분석
해결
dtrain = xgb.DMatrix(train_X, train_y, feature_names=train_X.columns.values.tolist())
ValueError: could not convert string to float: 'LARS'
corrMatt = merged[topFeatures].corr()
numMerged = merged[topFeatures].select_dtypes(include=['number'])
corrMatt = numMerged.corr()
AttributeError: 'Series' object has no attribute 'ix'
이전 코드
merged["logerror"].ix[merged["logerror"] > ulimit] = ulimit
merged["logerror"].ix[merged["logerror"] < ulimit] = ulimit
merged.loc[merged["logerror"] > ulimit, "logerror"] = ulimit
merged.loc[merged["logerror"] < llimit, "logerror"] = llimit
mergedFiltered[col].ix[mergedFiltered[col]>ulimit] = ulimit
mergedFiltered[col].ix[mergedFiltered[col]<llimit] = llimit
mergedFiltered.loc[mergedFiltered[col] > ulimit, col] = ulimit
mergedFiltered.loc[mergedFiltered[col] < llimit, col] = llimit
sn.jointplot(x = mergedFiltered.calculatedfinishedsquarefeet.values,
y = mergedFiltered.logerror.values,
size = 10,
sn.jointplot(x=mergedFiltered.calculatedfinishedsquarefeet.values,
y=mergedFiltered.logerror.values,
height=10,
pylab
부분을 plt
로 변경fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111, projection='3d')