import pandas as pd
# create a sample dataframe with missing values
df = pd.DataFrame({
'A': [1, 2, None, 4],
'B': [5, None, None, 8],
'C': [9, 10, 11, 12],
'D': [None, None, None, None]
})
# drop rows with missing values
df = df.dropna()
output :
A B C D
0 1.0 5.0 9 NaN
import pandas as pd
# create a sample dataframe with missing values
df = pd.DataFrame({
'A': [1, 2, None, 4],
'B': [5, None, None, 8],
'C': [9, 10, 11, 12],
'D': [None, None, None, None]
})
# fill missing values with a default value
df = df.fillna(0)
output :
A B C D
0 1.0 5.0 9 0
1 2.0 0.0 10 0
2 0.0 0.0 11 0
3 4.0 8.0 12 0
# 0으로 대체하기
df['col'] = df['col'].fillna(0)
# 컬럼의 평균으로 대체하기
df['B'] = df['B'].fillna(df['B'].mean())
# 결측치 이전값으로 채우기
df.fillna(method = 'pad')
# 결측치 이후의 값으로 채우기
df.fillna(method = 'bfill')
import pandas as pd
# create a sample DataFrame with missing values
df = pd.DataFrame({
'A': [1, 2, None, 4, None],
'B': [5, None, None, 8, 10],
'C': [9, 10, 11, None, 13],
})
# interpolate missing values using spline interpolation
df_interpolated = df.interpolate(method='spline', order=3)
output :
A B C
0 1.000000 5.000000 9.000000
1 2.000000 4.867844 10.000000
2 2.821188 4.769428 11.106773
3 4.000000 8.000000 12.247345
4 4.430571 10.000000 13.000000