2025.1.21 작성
OS : Window
개발환경: Google Colab
개발언어: Python
프레임워크: Pandas (데이터 분석에 주로 사용)
from collections.abc import ValuesView
import pandas as pd
df1 = pd.read_csv('AMZN.csv')
df1.values
df1.Close
df1.Open
df1["new1"] = df1.Close > df1.Open
df1.head()
df1.new1.value_counts()/252
df1.to_csv("amzn1.csv")
df1[["Open", "Close"]]
df1.columns
df1.drop(["Date", "new1"], axis=1, inplace=True)
df1.head()
df1.Close.diff()
df2 = df1.diff()
df2.isnull().mean() # 컬럼별 결측율 확인
df2.dropna(inplace=True) # 결측이 하나라도 있는 행 삭제 / NaN 값 있는 행 삭제 됨
df3 = pd.read_csv("weatherAUS.csv")
df3.isnull().mean()
df3.dropna(inplace=True)
df3.isnull().mean()
def func1(x1):
if x1 >= 75 :
return "D"
elif x1 >= 50 :
return "C"
elif x1 >= 25 :
return "B"
else :
return "A"
func1 (-10)
df3.MinTemp.apply(func1)
df3["Date1"] = pd.to_datetime(df3.Date)
df3.Date1.dt.time
df3.Date1.dt.year
df3.Date1.dt.month
df3.Date1.dt.day
df4 = pd.read_csv("AMZN.csv")
df4["Date1"] = pd.to_datetime(df4.Date)
df5 = df4.select_dtypes(include="float64")
df5[["Open", "High"]][0:10] + df5[["High", "Low"]][5:15]
df5.iloc[0:10, 0:2] + df5.iloc[5:15, 1:3]
df5.index = df4.Date1
df5.plot()
df14 = pd.read_csv("creditset2.csv")
df14[["loan", "income", "age"]].query("age > 30 and age < 40").corr() #correlation
df14 = pd.read_csv("creditset2.csv")
for i in [30, 40, 50]:
j = i + 10
print(df14[["loan", "income", "age"]].query("age > @i and age < @j").corr().iloc[0, 1]) #correlation
import seaborn as sns # Seaborn 라이브러리 import
y = df14.income
X = df14.loan
sns.jointplot(x=X, y=y)
sns.pairplot(df14[["loan", "income", "age"]])
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np
scaler = StandardScaler()
scaled = scaler.fit_transform(df15)
df16 = pd.DataFrame (scaled, columns=df15.columns )