from haversine import haversine
def dist(x):
res_point = x[["위도","경도"]]
dist_list = [99999]
for i in range(len(subway)):
sub_point = subway.loc[i,["lat","lng"]]
distance = haversine(res_point,sub_point)
if distance < min(dist_list):
min_dist_name = subway.loc[i,"name"]
dist_list.append(distance)
print("{} {} : {}".format(x["상호명"],min_dist_name,distance))
return min_dist_name, min(dist_list)*1000
data_nowon[["역 명","거리"]] = pd.DataFrame(list(data_nowon.apply(lambda x : dist(x),axis=1)))
import numpy as np
def haversine_np(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km * 1000
def cal_dist(lat,lng):
dist = haversine_np(lat,lng, subway['lat'].values, subway['lng'].values)
station = subway.loc[np.argmin(dist),"name"]
distance = dist[np.argmin(dist)]
return pd.Series([station, distance])
data[["역명","거리"]] = data.apply(lambda x : cal_dist(x["위도"], x["경도"]), axis=1)
→ 연산량이 많아질수록 Vectorize가 더 큰 힘을 발휘하는 것 같다.