int(5)
import numpy as np
import pandas as pd
crime_raw_data = pd.read_csv("../data/02. crime_in_Seoul.csv", thousands=",", encoding="euc-kr") # thousands 숫자값을 문자로 인식할 수 있어서 설정
crime_raw_data.head()
crime_raw_data.info()
df.pivot_table(index="Name")
df.pivot_table(index=["Name", "Rep", "Manager"])
df.pivot_table(index=["Manager", "Rep"])
df.head()
df.pivot_table(index=["Manager", "Rep"], values="Price")
df.pivot_table(index=["Manager", "Rep"], values="Price", aggfunc=np.sum)
df.pivot_table(index=["Manager", "Rep"], values="Price", aggfunc=[np.sum, len])
df.head()
df.pivot_table(index=["Manager", "Rep"], values="Price", columns="Product", aggfunc=np.sum)
df.pivot_table(index=["Manager", "Rep"], values="Price", columns="Product", aggfunc=np.sum, fill_value=0)
df.pivot_table(index=["Manager", "Rep", "Product"], values=["Price", "Quantity"], aggfunc=np.sum, fill_value=0)
df.pivot_table(
index=["Manager", "Rep", "Product"],
values=["Price", "Quantity"],
aggfunc=[np.sum, np.mean],
fill_value=0,
margins=True) # 총계(All) 추가
crime_raw_data.head()
crime_station = crime_raw_data.pivot_table(
crime_raw_data,
index="구분",
columns=["죄종", "발생검거"],
aggfunc=[np.sum])
crime_station.head()
crime_station.columns # Multiindex
crime_station["sum", "건수", "강도", "검거"][:5]
crime_station.columns = crime_station.columns.droplevel([0, 1]) # 다중 컬럼에서 특정 컬럼 제거
crime_station.columns
crime_station.head()
crime_station.index
get_ipython().system("pip list")
for n in [1, 2, 3, 4]:
print("Number is", n)
for n in range(0, 10):
print(n ** 2)
[n ** 2 for n in range(0, 10)]
import googlemaps
gmaps_key = "AIzaSyC0rNHG0J4CCtvHUWhbw8pq4bu7v1UJBdo"
gmaps = googlemaps.Client(key=gmaps_key)
gmaps.geocode("서울영등포경찰서", language="ko") # 단순 테스트 코드
tmp = gmaps.geocode("서울영등포경찰서", language="ko")
len(tmp)
type(tmp[0].get("geometry")["location"])
print(tmp[0].get("geometry")["location"]["lat"])
print(tmp[0].get("geometry")["location"]["lng"])
tmp[0].get("formatted_address").split()[2]
crime_station.head()
for idx, rows in crime_station.iterrows():
station_name = "서울" + str(idx) + "경찰서"
tmp = gmaps.geocode(station_name, language="ko")
tmp[0].get("formatted_address")
tmp_gu = tmp[0].get("formatted_address")
lat = tmp[0].get("geometry")["location"]["lat"]
lng = tmp[0].get("geometry")["location"]["lng"]
crime_station.loc[idx, "lat"] = lat
crime_station.loc[idx, "lng"] = lng
crime_station.loc[idx, "구별"] = tmp_gu.split()[2]
print(count)
count = count + 1
crime_station.head()
crime_station.columns.get_level_values(0)[2] + crime_station.columns.get_level_values(1)[2]
len(crime_station.columns.get_level_values(0))
tmp = [
crime_station.columns.get_level_values(0)[n] + crime_station.columns.get_level_values(1)[n]
for n in range(0, len(crime_station.columns.get_level_values(0)))
]
tmp
tmp, len(tmp), len(crime_station.columns.get_level_values(0))
crime_station.columns = tmp
crime_station.head()
crime_station.to_csv("../data/02. crime_in_Seuol_raw.csv", sep=",", encoding="utf-8")
pd.read_csv("../data/02. crime_in_Seoul_raw.csv").head(2)
crime_anal_station = pd.read_csv(
"../data/02. crime_in_Seoul_raw.csv", index_col=0, encoding="utf-8") # index_col "구분"을 인덱스 컬럼으로 설정
crime_anal_station.head()
crime_anal_gu = pd.pivot_table(crime_anal_station, index="구별", aggfunc=np.sum)
del crime_anal_gu["lat"]
crime_anal_gu.drop("lng", axis=1, inplace=True)
crime_anal_gu.head()
crime_anal_gu["강도검거"] / crime_anal_gu["강도발생"]
crime_anal_gu[["강도검거", "살인검거"]].div(crime_anal_gu["강도발생"], axis=0).head(3)
num = ["강간검거", "강도검거", "살인검거", "절도검거", "폭력검거"]
den = ["강간발생", "강도발생", "살인발생", "절도발생", "폭력발생"]
crime_anal_gu[num].div(crime_anal_gu[den].values).head()
target = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율"]
num = ["강간검거", "강도검거", "살인검거", "절도검거", "폭력검거"]
den = ["강간발생", "강도발생", "살인발생", "절도발생", "폭력발생"]
crime_anal_gu[target] = crime_anal_gu[num].div(crime_anal_gu[den].values) * 100
crime_anal_gu.head()
del crime_anal_gu["강간검거"]
del crime_anal_gu["강도검거"]
crime_anal_gu.drop(["살인검거", "절도검거", "폭력검거"], axis=1, inplace=True)
crime_anal_gu.head()
crime_anal_gu[crime_anal_gu[target] > 100] = 100
crime_anal_gu.head()
crime_anal_gu.rename(columns={"강간발생": "강간", "강도발생": "강도", "살인발생": "살인", "절도발생": "절도", "폭력발생": "폭력"},
inplace=True)
crime_anal_gu.head()
crime_anal_gu.head()
crime_anal_gu["강도"] / crime_anal_gu["강도"].max()
col = ["살인", "강도", "강간", "절도", "폭력"]
crime_anal_norm = crime_anal_gu[col] / crime_anal_gu[col].max()
crime_anal_norm.head()
crime_anal_gu.head(1)
col2 = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율"]
crime_anal_norm[col2] = crime_anal_gu[col2]
crime_anal_norm.head()
result_CCTV = pd.read_csv("../data/01. CCTV_result.csv", index_col="구별", encoding="utf-8")
result_CCTV.head()
crime_anal_norm[["인구수", "CCTV"]] = result_CCTV[["인구수", "소계"]]
crime_anal_norm.head()
col = ["강간", "강도", "살인", "절도", "폭력"]
crime_anal_norm["범죄"] = np.mean(crime_anal_norm[col], axis=1) # axis=1 행, axis=0 열
import numpy as np
np.array([0.357143, 1.000000, 1.000000, 0.977118, 0.733773])
np.mean(np.array([0.357143, 1.000000, 1.000000, 0.977118, 0.733773]))
np.array(
[[0.357143, 1.000000, 1.000000, 0.977118, 0.733773],
[0.285714, 0.358974, 0.310078, 0.477799, 0.463880]]
)
np.mean(np.array(
[[0.357143, 1.000000, 1.000000, 0.977118, 0.733773],
[0.285714, 0.358974, 0.310078, 0.477799, 0.463880]]
), axis=1) # axis=1 행, axis=0 열
np.mean(np.array(
[[0.357143, 1.000000, 1.000000, 0.977118, 0.733773],
[0.285714, 0.358974, 0.310078, 0.477799, 0.463880]]
col = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율"]
crime_anal_norm["검거"] = np.mean(crime_anal_norm[col], axis=1) # axis=1 행을 따라서 연산하는 옵션
crime_anal_norm.head()
crime_anal_norm
crime_anal_norm = pd.read_csv("../data/02. crime_in_Seoul_final.csv", index_col=0, encoding="utf-8")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
plt.rcParams["axes.unicode_minus"] = False
rc("font", family="Malgun Gothic") # Windows: Malgun Gothic / Mac: Arial Unicode MS
get_ipython().run_line_magic("matplotlib", "inline")
np.linspace(0, 14, 100)
x = np.linspace(0, 14, 100)
y1 = np.sin(x)
y2 = 2 np.sin(x + 0.5)
y3 = 3 np.sin(x + 1.0)
y4 = 4 * np.sin(x + 1.5)
plt.figure(figsize=(10, 6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()
sns.set_style("white")
plt.figure(figsize=(10, 6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()
sns.set_style("dark")
plt.figure(figsize=(10, 6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()
sns.set_style("whitegrid")
plt.figure(figsize=(10, 6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()
sns.set_style("darkgrid")
plt.figure(figsize=(10, 6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()
plt.figure(figsize=(8, 6))
sns.boxplot(x=tips["total_bill"])
plt.show()
tips["day"].unique()
plt.figure(figsize=(8, 6))
sns.boxplot(x="day", y="total_bill", data=tips)
plt.show()
tips.head(2)
plt.figure(figsize=(8, 6))
sns.boxplot(x="day", y="total_bill", data=tips, hue="smoker", palette="Set1") # Set 1 ~ 3
plt.show()
plt.figure(figsize=(8, 6))
sns.swarmplot(x="day", y="total_bill", data=tips, color="0.5")
plt.show()
plt.figure(figsize=(8, 6))
sns.boxplot(x="day", y="total_bill", data=tips)
sns.swarmplot(x="day", y="total_bill", data=tips, color="0.25")
plt.show()
tips
sns.set_style("darkgrid")
sns.lmplot(x="total_bill", y="tip", data=tips, height=7) # size => height
plt.show()
tips
sns.set_style("darkgrid")
sns.lmplot(x="total_bill", y="tip", data=tips, height=7, hue="smoker")
plt.show()
flights = flights.pivot(index="month", columns="year", values="passengers")
flights.head()
plt.figure(figsize=(10, 8))
sns.heatmap(data=flights, annot=True, fmt="d") # annot=True 데이터 값 표시, fmt="d" 정수형 표현
plt.show()
plt.figure(figsize=(10, 8))
sns.heatmap(flights, annot=True, fmt="d", cmap="YlGnBu")
plt.show()
sns.set_style("ticks")
sns.pairplot(iris)
plt.show()
iris.head(2)
iris["species"].unique()
sns.pairplot(iris, hue="species")
plt.show()
sns.pairplot(iris,
x_vars=["sepal_width", "sepal_length"],
y_vars=["petal_width", "petal_length"])
plt.show()
sns.set_style("darkgrid")
sns.lmplot(
x="x",
y="y",
data=anscombe.query("dataset == 'II'"), # 2차식
order=1, # 차수에 따라 옵션 변경
ci=None,
height=7,
scatter_kws={"s": 80}) # ci 신뢰구간 선택
plt.show()
sns.set_style("darkgrid")
sns.lmplot(
x="x",
y="y",
data=anscombe.query("dataset == 'II'"),
order=2, # 차수에 따라 옵션 변경
ci=None,
height=7,
scatter_kws={"s": 80}) # ci 신뢰구간 선택
plt.show()
sns.set_style("darkgrid")
sns.lmplot(
x="x",
y="y",
data=anscombe.query("dataset == 'III'"), # 아웃라이어 있는 데이터
ci=None,
height=7,
scatter_kws={"s": 80}) # ci 신뢰구간 선택
plt.show()
sns.set_style("darkgrid")
sns.lmplot(
x="x",
y="y",
data=anscombe.query("dataset == 'III'"),
robust=True, # 원본에서 많이 떨어진 데이터는 없는 셈 친다
ci=None,
height=7,
scatter_kws={"s": 80}) # ci 신뢰구간 선택
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
plt.rcParams["axes.unicode_minus"] = False
get_ipython().run_line_magic("matplotlib", "inline")
rc("font", family="Malgun Gothic") # Windows: Malgun Gothic Mac: Arial Unicode MS
crime_anal_norm.head()
sns.pairplot(data=crime_anal_norm, vars=["살인", "강도", "폭력"], kind="reg", height=3);
crime_anal_norm.head(1)
def drawGraph():
sns.pairplot(
data=crime_anal_norm,
x_vars=["인구수", "CCTV"],
y_vars=["살인", "강도"],
kind="reg",
height=4
)
plt.show()
drawGraph()
def drawGraph():
sns.pairplot(
data=crime_anal_norm,
x_vars=["인구수", "CCTV"],
y_vars=["살인검거율", "폭력검거율"],
kind="reg",
height=4
)
plt.show()
drawGraph()
def drawGraph():
sns.pairplot(
data=crime_anal_norm,
x_vars=["인구수", "CCTV"],
y_vars=["절도검거율", "강도검거율"],
kind="reg",
height=4
)
plt.show()
drawGraph()
crime_anal_norm.head(3)
def drawGraph():
# 데이터 프레임 생성
target_col = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율", "검거"]
crime_anal_norm_sort = crime_anal_norm.sort_values(by="검거", ascending=False) # 내림차순
# 그래프 설정
plt.figure(figsize=(10, 10))
sns.heatmap(
data=crime_anal_norm_sort[target_col],
annot=True, # 데이터값 표현
fmt="f", # d: 정수, f: 실수
linewidths=0.5, # 간격설정
cmap="RdPu",
)
plt.title("범죄 검거 비율(정규화된 검거의 합으로 정렬")
plt.show()
drawGraph()
crime_anal_norm.head(1)
def drawGraph():
# 데이터 프레임 생성
target_col = ["살인", "강도", "강간", "절도", "폭력", "범죄"]
crime_anal_norm_sort = crime_anal_norm.sort_values(by="범죄", ascending=False) # 내림차순
# 그래프 설정
plt.figure(figsize=(10, 10))
sns.heatmap(
data=crime_anal_norm_sort[target_col],
annot=True, # 데이터값 표현
fmt="f", # 실수값으로 표현
linewidths=0.5, # 간격설정
cmap="RdPu",
)
plt.title("범죄 비율(정규화된 발생 건수로 정렬)")
plt.show()
drawGraph()
crime_anal_norm.to_csv("../data/02. crime_in_Seoul_final.csv", sep=",", encoding="utf-8")
import folium
import pandas as pd
import json
location: tuple or list, default None
Latitude and Longitude of Map (Northing, Easting).
m = folium.Map(location=[37.544564958079896, 127.05582307754338], zoom_start=14) # 0 ~ 18
m
m.save("./folium.html")
!ls
- "OpenStreetMap"
- "Mapbox Bright" (Limited levels of zoom for free tiles)
- "Mapbox Control Room" (Limited levels of zoom for free tiles)
- "Stamen" (Terrain, Toner, and Watercolor)
- "Cloudmade" (Must pass API key)
- "Mapbox" (Must pass API key)
- "CartoDB" (positron and dark_matter)
m = folium.Map(
location=[37.544564958079896, 127.05582307754338],
zoom_start=14,
tiles="OpenStreetMap"
) # 0 ~ 18
m
folium.Marker((37.54712311308356, 127.04721916917774)).add_to(m)
folium.Marker(
location=[37.544564958079896, 127.05582307754338],
popup="Subway"
).add_to(m)
folium.Marker(
location=[37.544564958079896, 127.05582307754338],
popup="Subway",
tooltip="성수역"
).add_to(m)
folium.Marker(
location=[37.54558642069953, 127.05729705810472],
popup="<a href='https://zero-base.co.kr/' target=_'blink'>제로베이스",
tooltip="Zerobase"
).add_to(m)
m
folium.Marker(
(37.54712311308356, 127.04721916917774),
icon=folium.Icon(color="black", icon='info-sign')
).add_to(m)
folium.Marker(
location=[37.544564958079896, 127.05582307754338],
popup="Subway",
tooltip="icon color",
icon=folium.Icon(
color="red",
icon_color="blue",
icon="cloud")
).add_to(m)
folium.Marker(
location=[37.54035903907497, 127.06913328776446], # 건대입구역
popup="건대입구역",
tooltip="Icon custom",
icon=folium.Icon(
color="purple",
icon_color="white",
icon="glyphicon glyphicon-cloud",
angle=50,
prefix="glyphicon") # glyphicon
).add_to(m)
m
m.add_child(folium.ClickForMarker(popup="ClickForMarker"))
m.add_child(folium.LatLngPopup())
m = folium.Map(
location=[37.55068861733562, 127.04420997492151],
zoom_start=14,
tiles="OpenStreetMap"
) # 0 ~ 18
folium.Circle(
location=[37.555243442409406, 127.04370422643919], # 한양대학교
radius=100,
fill=True,
color="#eb9e34",
fill_color="red",
popup="Circle Popup",
tooltip="Circle Tooltip"
).add_to(m)
folium.CircleMarker(
location=[37.54347089498245, 127.04439204503049], # 한양대학교
radius=100,
fill=True,
color="#34ebc6",
fill_color="#c634eb",
popup="CircleMarker Popup",
tooltip="CircleMarker Tooltip"
).add_to(m)
m
import json
state_data = pd.read_csv("../data/02. US_Unemployment_Oct2012.csv")
state_data.tail(2)
m = folium.Map([43, -102], zoom_start=3)
folium.Choropleth(
geo_data="../data/02. us-states.json", # 경계선 좌표값이 담긴 데이터
data=state_data, # Series or DataFrame
columns=["State", "Unemployment"], # DataFrame columns
key_on="feature.id",
fill_color="BuPu",
fill_opacity=0.5, # 0~1
line_opacity=0.2, # 0~1
legend_name="Unemployment rate (%)"
).add_to(m)
m
df.tail(2)
df.info()
df = df.dropna()
df.info()
df = df.reset_index(drop=True)
df.tail(2)
df.columns
df = df.rename(columns={"연번 ": "연번", "분류 ": "분류"})
df.연번[:10]
del df["연번"]
df.tail(2)
df.위도[0]
df.describe()
m = folium.Map(location=[37.50589466533131, 126.93450729567374], zoom_start=13)
for idx, rows in df.iterrows():
# location
lat, lng = rows.위도, rows.경도
# Marker
folium.Marker(
location=[lat, lng],
popup=rows.주소,
tooltip=rows.분류,
icon=folium.Icon(
icon="home",
color="lightred" if rows.세대수 >= 199 else "lightblue",
icon_color="darkred" if rows.세대수 >= 199 else "darkblue",
)
).add_to(m)
# CircleMarker
folium.Circle(
location=[lat, lng],
radius=rows.세대수 * 0.5,
fill=True,
color="pink" if rows.세대수 >= 518 else "green",
fill_color="pink" if rows.세대수 >= 518 else "green",
).add_to(m)
m
import json
crime_anal_norm = pd.read_csv(
"../data/02. crime_in_Seoul_final.csv", index_col=0, encoding="utf-8"
)
geo_path = "../data/02. skorea_municipalities_geo_simple.json"
geo_str = json.load(open(geo_path, encoding="utf-8"))
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles="Stamen Toner")
my_map.choropleth(
geo_data=geo_str,
data=crime_anal_norm["살인"],
columns=[crime_anal_norm.index, crime_anal_norm["살인"]],
fill_color="PuRd",
key_on="feature.id",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="정규화된 살인 발생 건수",
)
my_map
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles="Stamen Toner")
my_map.choropleth(
geo_data=geo_str,
data=crime_anal_norm["강간"],
columns=[crime_anal_norm.index, crime_anal_norm["강간"]],
fill_color="PuRd",
key_on="feature.id",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="정규화된 강간 발생 건수",
)
my_map
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles="Stamen Toner")
my_map.choropleth(
geo_data=geo_str,
data=crime_anal_norm["범죄"],
columns=[crime_anal_norm.index, crime_anal_norm["범죄"]],
fill_color="PuRd",
key_on="feature.id",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="정규화된 범죄 발생 건수",
)
my_map
tmp_criminal = crime_anal_norm["범죄"] / crime_anal_norm["인구수"] # 인구수 대비 범죄발생 비율
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles="Stamen Toner")
my_map.choropleth(
geo_data=geo_str,
data=tmp_criminal,
columns=[crime_anal_norm.index, tmp_criminal],
fill_color="PuRd",
key_on="feature.id",
fill_opacity=0.7,
line_opacity=0.2,
legend_name="정규화된 범죄 발생 건수",
)
my_map
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11)
for idx, rows in crime_anal_station.iterrows():
folium.Marker([rows["lat"], rows["lng"]]).add_to(my_map)
my_map
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11)
for idx, rows in crime_anal_station.iterrows():
folium.CircleMarker(
[rows["lat"], rows["lng"]],
radius=rows["검거"] * 50,
popup=rows["구분"] + " : " + "%.2f" % rows["검거"],
color="#3186cc",
fill=True,
fill_color="#3186cc",
).add_to(my_map)
my_map
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11)
my_map.choropleth(
geo_data=geo_str,
data=crime_anal_norm["범죄"],
columns=[crime_anal_norm.index, crime_anal_norm["범죄"]],
fill_color="PuRd",
key_on="feature.id",
fill_opacity=0.7,
line_opacity=0.2,
)
for idx, rows in crime_anal_station.iterrows():
folium.CircleMarker(
[rows["lat"], rows["lng"]],
radius=rows["검거"] * 50,
popup=rows["구분"] + " : " + "%.2f" % rows["검거"],
color="#3186cc",
fill=True,
fill_color="#3186cc",
).add_to(my_map)
my_map
def drawGraph():
plt.figure(figsize=(10, 10))
sns.heatmap(crime_loc_norm_sort, annot=True, fmt="f", linewidths=0.5, cmap="RdPu")
plt.title("범죄와 발생 장소")
plt.show()
drawGraph()