import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import koreanize_matplotlib
%config InlineBackend.figure_format = 'retina'
pd.Series([1,-1]).plot(title="한글", figsize=(5,1))

from glob import glob
file_name = glob("제주*.CSV")
file_name

df = pd.read_csv(file_name[0], encoding="cp949")
df.shape

df.head(2)

df.info()

df.nunique()

df.isnull().sum()

df = df.drop(columns = "데이터기준일자")
df.shape

df['기준년월'] = pd.to_datetime(df["기준년월"])
df['기준연도'] = df['기준년월'].dt.year
df['월'] = df['기준년월'].dt.month
df.head(2)

df["관광객 유형"] = df["관광객 유형"].str.replace(" 관광객","").str.strip()
df.head(2)

df.describe()

df.describe(include="object")

corr = df.corr()
mask = np.triu(np.ones_like(corr))
plt.figure(figsize=(10, 10))
sns.heatmap(corr, annot=True, fmt = '.2f', cmap ="coolwarm", vmin=-1, vmax=1, mask=mask)

df.groupby(["업종명","성별"])["건당이용금액"].sum().plot.bar(rot=20, figsize=(20,5))

month_local = df.groupby(["월","제주 중분류"])["건당이용금액"].mean().unstack()
month_local.plot(figsize=(15,4))
plt.legend(bbox_to_anchor=(1,1))

plt.figure(figsize=(20, 4))
sns.countplot(data=df, x="기준연도")

plt.figure(figsize=(20, 4))
sns.countplot(data=df, x="월")

plt.figure(figsize=(15, 4))
sns.countplot(data=df.sort_values("연령대별"), x="연령대별", hue = "성별")

plt.figure(figsize=(15, 4))
sns.countplot(data=df.sort_values("연령대별"), x="연령대별", hue = "성별")

데이터 출처 : https://www.data.go.kr/data/15046091/fileData.do