[EDA] mini project 12 _ 세계 테러 분석

jaam._.mini·2024년 1월 28일
0

전처리


데이터 불러오기

import pandas as pd

raw_data = pd.read_csv('./globalterrorismdb_0718dist.csv', encoding = 'latin-1')
raw_data.head()

모든 컬럼 확인

raw_data.columns.values

컬럼_데이터 확인

raw_data['summary'].values

보기 좋게 컬럼명 변경

# 필요한 컬럼들 : 날짜, 이슈, 국가, 사망자수, 부상자수, 지역구분, 공격형태(테러양상)

terr_df = raw_data.copy()

terr_df.rename(columns={
    'eventid':'eventid', 'iyear':'Year','imonth':'Month','iday':"day",
    'country_txt':'Country','region_txt':'Region','provstate':'State','city':'City',
    'latitude':'lat',  'longitude':'lng',
    'targtype1_txt':'Targettype','attacktype1_txt':'Attacktype','weaptype1_txt':'Weapon',
    'nkill':'Kill','nwound':'Wound',
    'gname':'Group','summary':'Summary','motive':'Motive',
}, inplace=True)

terr_df.reset_index()
terr_df.tail(2)

사용할 컬럼으로 변경

terr_df = terr_df[[
    'eventid', 'Year', 'Month', 'day', 
    'Country', 'Region', 'State', 'City', 
    'lat', 'lng',
    'Targettype', 'Attacktype', 'Weapon', 
    'Kill', 'Wound', 'Group', 'Summary', 'Motive'
]]

비어 있는 데이터 확인 : isnull().sum()

terr_df.isnull().sum()

데이터 타입 확인

terr_df.info()




분석 시작!


# 연도 컬럼에 몇해연도가 있는지 확인
year = terr_df['Year'].unique()
year
# 그래프(시각화) 전에 각 연도별 데이터 수 확인
year_count = terr_df['Year'].value_counts(dropna=False).sort_index()
year_count[:4]

연간 테러 발생 건수

import matplotlib.pyplot as plt
import seaborn as sns

year = terr_df['Year'].unique()
year_count = terr_df['Year'].value_counts(dropna=False).sort_index()


plt.figure(figsize=(12, 4))
ax = sns.barplot(x=year, y=year_count, palette='YlOrBr')
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='baseline', fontsize=10, color='black', xytext=(0, 5),
                textcoords='offset points',rotation = 90)
plt.xlabel('Attack Year')
plt.xticks(rotation=50, fontsize=7)
plt.ylabel('Number of attacks cases')
plt.title('Attacks In Years', fontsize=15)
plt.show()

# 1번 방법
# # 어떤 데이터를?
# year = terr_df['Year'].unique()
# year_count = terr_df['Year'].value_counts(dropna=False).sort_index()

# # 그래프 (쿼리 순서 중요)
# plt.figure(figsize=(12, 4))
# sns.barplot(x=year, y=year_count, palette='YlOrBr')
# plt.xlabel('Attack Year')
# plt.xticks(rotation=50, fontsize=7)
# plt.ylabel('Number of attacks cases')
# plt.title('Attacks In Years', fontsize=15)
# plt.show()


# 2번 방법
# # sns.countplot 사용
# plt.figure(figsize = (15,5))
# sns.countplot(x='Year',data=terr_df)
# plt.xticks(rotation=90)
# plt.xlabel('year', fontsize=10)
# plt.ylabel('counts', fontsize=10)
# plt.title('Number of terrorist activites each year', fontsize=15)
# plt.show()

# 3번 방법
# sns.countplot + counts text 추가
# plt.figure(figsize = (15,5))
# ax = sns.countplot(x='Year',data=terr_df)
# plt.xticks(rotation=90)
# # Adding annotations to the chart
# for p in ax.patches:
#     ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
#                 ha='center', va='baseline', fontsize=10, color='black', xytext=(0, 5),
#                 textcoords='offset points',rotation = 90)
    
# plt.title('Attacks In Years')
# plt.show

테러발생 상위 10개국

terr_rank_10 = terr_df['Country'].value_counts()[:10]
terr_rank_10
terr_counts = terr_df.Country.value_counts()[:10].unique()
terr_counts
terr_rank = terr_df['Country'].value_counts()[:10].index
terr_rank
plt.figure(figsize=(12, 4))
top_10_country = terr_df['Country'].value_counts().head(10)

# Remove 'Unknown'
# top_10_cities = top_10_cities[top_10_cities.index != 'Unknown']

sns.barplot(x=top_10_country.index, y=top_10_country.values, palette='rocket')
plt.title('Most attacks Country Top10')
plt.xlabel('Country')
plt.ylabel('Counts')
plt.xticks(rotation=30)

plt.show()

# 또 다른 방법
# terr_rank = terr_df['Country'].value_counts()[:10].index
# terr_counts = terr_df.Country.value_counts()[:10].unique()

# plt.figure(figsize=(12, 4))
# sns.barplot(x=terr_rank, y=terr_counts, palette='YlOrBr_r')

# plt.xlabel('Countries')
# plt.xticks(fontsize=7)
# plt.ylabel('Count')
# plt.title('Most attacks Country Top10')
# plt.show()

fig,ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=terr_df.Country.value_counts()[:10].values,y = terr_df.Country.value_counts()[:10].index, palette='RdYlGn')
ax.set_title('Most attacks Country Top10')

# columns ['Country','Terror_cases']으로 상위 10개국 DataFrame 만들기
terr_rank_10 = terr_df.groupby('Country').size().to_frame(name='Terror_cases')
terr_rank_10.sort_values('Terror_cases', ascending=False, inplace=True)
terr_rank_10 = terr_rank_10.head(10).reset_index()
terr_rank_10

사상자 상위 10개국

coun_terror=terr_df['Country'].value_counts()[:10].to_frame()
coun_terror.columns=['Wound']
coun_kill=terr_df.groupby('Country')['Kill'].sum().to_frame()
coun_terror.merge(coun_kill,left_index=True,right_index=True,how='left').plot.bar()
fig=plt.gcf()
fig.set_size_inches(12,4)
plt.xticks(rotation=0)
plt.title('Wound & Kill County Top10 ')
plt.show()

지역별 테러 특성 확인

resion_counts = terr_df['Region'].value_counts()

fig = plt.figure(figsize=(12,8))

plt.pie(
    resion_counts,
    labels=None,
    autopct='%.1f%%',
    startangle=90,
    textprops={'fontsize':10}
)

centre_circle = plt.Circle((0,0), 0.4, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

plt.axis('equal')
plt.title('Terrorist attack by Region')
plt.legend(resion_counts.index, loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()

#  df.loc[ '행이름':'행이름', '열이름': '열이름']
#  df.iloc[ 행번호:행번호, 열번호:열번호]
#  .unstack(fill_value=0) : 데이터 프레임으로 만들고 측정 결측치를 0 값으로 지정
region_year_counts = terr_df.groupby(['Region', 'Year']).size().unstack(fill_value=0)
region_year_counts.index
region_year_counts.columns
pd.crosstab(terr_df.Year,terr_df.Region).plot(figsize=(12,4))
plt.title('Terrorist Attack By Region',size=10)
plt.ylabel('counts')

# region_year_counts = terr_df.groupby(['Region', 'Year']).size().unstack(fill_value=0)

# plt.figure(figsize=(12, 4))

# for region in region_year_counts.index:
#     plt.plot(region_year_counts.columns, region_year_counts.loc[region], label=region)

# plt.title('Terrorist attack by Region')
# plt.xlabel('Year')
# plt.ylabel('counts')
# plt.legend(loc='upper left')
# plt.grid(True)

# plt.show()

plt.figure(figsize=(12,4))
sns.countplot(x = terr_df['Region'], order = terr_df['Region'].value_counts().index)
plt.xticks(rotation=30, fontsize=8)
plt.xlabel('region')
plt.title('counts')
plt.show()

pd.crosstab(terr_df.Region,terr_df.Attacktype).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()

coun_terror=terr_df['Region'].value_counts().to_frame()
coun_terror.columns=['Wound']
coun_kill=terr_df.groupby('Region')['Kill'].sum().to_frame()
coun_terror.merge(coun_kill,left_index=True,right_index=True,how='left').plot.bar()
fig=plt.gcf()
fig.set_size_inches(18,6)
plt.xticks(rotation=30, fontsize=8)
plt.show()

연도별 테러 양상 분석

import folium
import pandas as pd
import json
from folium.plugins import MarkerCluster 

terr_topYear = terr_df['Year'] == 2014
filterData = terr_df[terr_topYear] # filter data

# filterData.info()
filterData_info = filterData.loc[:,'City':'lng'] #We are getting the required fields
filterData_info = filterData_info.dropna() # drop NaN values in latitude and longitude
filterData_list = filterData_info.values.tolist()

# reqFilterDataList
map = folium.Map(location = [0, 30], tiles='CartoDB positron', zoom_start=2)

# clustered marker
markerCluster = folium.plugins.MarkerCluster().add_to(map)
for point in range(0, len(filterData_list)):
    folium.Marker(location=[filterData_list[point][1],filterData_list[point][2]],
                  popup = filterData_list[point][0]).add_to(markerCluster)
map

coun_terror=terr_topRate['Region'].value_counts().to_frame()
coun_terror.columns=['Wound']
coun_kill=terr_topRate.groupby('Region')['Kill'].sum().to_frame()
coun_terror.merge(coun_kill,left_index=True,right_index=True,how='left').plot.bar()
fig=plt.gcf()
fig.set_size_inches(18,6)
plt.xticks(rotation=30, fontsize=8)
plt.show()

plt.figure(figsize=(12, 4))
terr_topRate = terr_topRate['Country'].value_counts().head(10)

# Remove 'Unknown'
# top_10_cities = top_10_cities[top_10_cities.index != 'Unknown']

sns.barplot(x=terr_topRate.index, y=terr_topRate.values, palette='rocket')
plt.title('Most attacks Country Top10')
plt.xlabel('Country')
plt.ylabel('Counts')
plt.xticks(rotation=30)

plt.show()

terr_topRate = terr_df.copy()
terr_topRate = terr_topRate.loc[(terr_topRate['Year']==2012)|(terr_topRate['Year']==2013)|(terr_topRate['Year']==2014)]

plt.figure(figsize=(12, 4))
top_5_weapon_types = terr_topRate['Weapon'].value_counts().head()

sns.barplot(x=top_5_weapon_types.index, y=top_5_weapon_types.values, palette='flare')
plt.title('Top 5 Most Used Weapon Types', fontsize=10)
plt.xlabel('Weapon Types')
plt.ylabel('counts')
plt.show()
![](https://velog.velcdn.com/images/jaam_mini/post/738a0a6a-6060-4ef8-a3a5-7035eb311324/image.png)

terr_topRate = terr_df.copy()
terr_topRate = terr_topRate.loc[(terr_topRate['Year']==2012)|(terr_topRate['Year']==2013)|(terr_topRate['Year']==2014)]

coun_terror=terr_topRate['Country'].value_counts()[:10].to_frame()
coun_terror.columns=['Attacks']
coun_kill=terr_topRate.groupby('Country')['Kill'].sum().to_frame()
coun_terror.merge(coun_kill,left_index=True,right_index=True,how='left').plot.bar()
fig=plt.gcf()
fig.set_size_inches(12,4)
plt.title('Attacks & Killed (2012-2014)')
plt.show()

fig,ax = plt.subplots(figsize=(12,4))
# Unkown 삭제를 위해 _counts()[1:10]
ax = sns.barplot(x=terr_topRate.Group.value_counts()[1:10].values,y = terr_topRate.Group.value_counts()[1:10].index, palette='mako')
ax.set_title('Terrorist Groups with Highest Terror Attacks')

df_Iraq = terr_topRate[terr_topRate['Country'] == 'Iraq']

fig,ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=df_Iraq.Group.value_counts()[1:6].values,y = df_Iraq.Group.value_counts()[1:6].index, palette='Blues')
ax.set_title('Terrorist Groups, Iraq (2012-2014)')

plt.figure(figsize=(4, 2))
sns.barplot(x=df_Iraq_Bag['Year'].value_counts().index, y=df_Iraq_Bag['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks by ISIL, Iraq (2012-2014)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

df_Iraq['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightblue')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Iraq(2012-2014)", fontsize=15)
plt.show()

df_Iraq['Attacktype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='steelblue')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Attacktype")
plt.ylabel("Number of attack")
plt.title("Top 5 Attacktype, Iraq (2012-2014)",fontsize=15)
plt.show()

df_Iraq['Targettype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='steelblue')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Targettype")
plt.ylabel("count")
plt.title("Top 5 Targettype, Iraq (2012-2014)",fontsize=15)
plt.show()

df_Af = terr_topRate[terr_topRate['Country'] == 'Afghanistan']

fig,ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=df_Af.Group.value_counts()[:5].values,y = df_Af.Group.value_counts()[:5].index, palette='crest')
ax.set_title('Terrorist Groups, Afghanistan (2012-2014)')

plt.figure(figsize=(4, 2))
sns.barplot(x=df_Af_Ta['Year'].value_counts().index, y=df_Af_Ta['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks by Taliban, Afghanistan (2012-2014)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

df_Af['City'].value_counts()[1:11].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='seagreen')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Afghanistan(2012-2014)", fontsize=15)
plt.show()

df_Af['Attacktype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='cadetblue')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Attacktype")
plt.ylabel("Number of attack")
plt.title("Top 5 Attacktype, Afghanistan (2012-2014)",fontsize=15)
plt.show()

df_Af['Targettype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='cadetblue')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Targettype")
plt.ylabel("count")
plt.title("Top 5 Targettype, Afghanistan (2012-2014)",fontsize=15)
plt.show()

df_Pakistan = terr_topRate[terr_topRate['Country'] == 'Pakistan']

fig,ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=df_Pakistan.Group.value_counts()[1:6].values,y = df_Pakistan.Group.value_counts()[1:6].index, palette='YlOrBr')
ax.set_title('Terrorist Groups, Pakistan (2012-2014)')

plt.figure(figsize=(4, 2))
sns.barplot(x=df_Paki_TTP['Year'].value_counts().index, y=df_Paki_TTP['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks by TTP, Pakistan (2012-2014)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

df_Pakistan['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='khaki')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Pakistan (2012-2014)", fontsize=15)
plt.show()

df_Pakistan['Attacktype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='darkkhaki')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Attacktype")
plt.ylabel("Number of attack")
plt.title("Top 5 Attacktype, Pakistan (2012-2014)",fontsize=15)
plt.show()

df_Pakistan['Targettype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='darkkhaki')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Targettype")
plt.ylabel("count")
plt.title("Top 5 Targettype, Pakistan (2012-2014)",fontsize=15)
plt.show()

df_Nigeria = terr_topRate[terr_topRate['Country'] == 'Nigeria']

fig,ax = plt.subplots(figsize=(12,4))
ax = sns.barplot(x=df_Nigeria.Group.value_counts()[:5].values,y = df_Nigeria.Group.value_counts()[:5].index, palette='ch:start=.2,rot=-.3')
ax.set_title('Terrorist Groups, Nigeria (2012-2014)')

plt.figure(figsize=(4, 2))
sns.barplot(x=df_Ni_Bo['Year'].value_counts().index, y=df_Ni_Bo['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks by Taliban, Iraq (2012-2014)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

df_Nigeria['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightslategray')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Nigeria (2012-2014)", fontsize=15)
plt.show()

df_Nigeria['Attacktype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='darkgrey')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Attacktype")
plt.ylabel("Number of attack")
plt.title("Top 5 Attacktype, Nigeria (2012-2014)",fontsize=15)
plt.show()

df_Nigeria['Targettype'].value_counts()[:5].plot(kind='bar',figsize=(12, 4),color='darkgrey')
plt.xticks(rotation=0, fontsize=8)
plt.xlabel("Targettype")
plt.ylabel("count")
plt.title("Top 5 Targettype, Nigeria (2012-2014)",fontsize=15)
plt.show()

연도별(10년 단위) 특성 분석

terr_df['Year'].unique()
df_70s = terr_df.loc[(terr_df['Year']==1970) | (terr_df['Year']==1971) | (terr_df['Year']==1972) | (terr_df['Year']==1973) | (terr_df['Year']==1974) | (terr_df['Year']==1975) | (terr_df['Year']==1976) | (terr_df['Year']==1977) | (terr_df['Year']==1978) | (terr_df['Year']==1979)]
df_80s = terr_df.loc[(terr_df['Year']==1980) | (terr_df['Year']==1981) | (terr_df['Year']==1982) | (terr_df['Year']==1983) | (terr_df['Year']==1984) | (terr_df['Year']==1985) | (terr_df['Year']==1986) | (terr_df['Year']==1987) | (terr_df['Year']==1988) | (terr_df['Year']==1989)]
df_90s = terr_df.loc[(terr_df['Year']==1990) | (terr_df['Year']==1991) | (terr_df['Year']==1992) | (terr_df['Year']==1993) | (terr_df['Year']==1994) | (terr_df['Year']==1995) | (terr_df['Year']==1996) | (terr_df['Year']==1997) | (terr_df['Year']==1998) | (terr_df['Year']==1999)]
df_00s = terr_df.loc[(terr_df['Year']==2000) | (terr_df['Year']==2001) | (terr_df['Year']==2002) | (terr_df['Year']==2003) | (terr_df['Year']==2004) | (terr_df['Year']==2005) | (terr_df['Year']==2006) | (terr_df['Year']==2007) | (terr_df['Year']==2008) | (terr_df['Year']==2009)]
df_10s = terr_df.loc[(terr_df['Year']==2010) | (terr_df['Year']==2011) | (terr_df['Year']==2012) | (terr_df['Year']==2013) | (terr_df['Year']==2014) | (terr_df['Year']==2015) | (terr_df['Year']==2016) | (terr_df['Year']==2017)]
pd.crosstab(df_70s.Year,df_70s.Region).plot(figsize=(12,4))
# plt.title('Terrorist Attack By Region')
plt.title('Terrorism By Region')
plt.ylabel('counts')

70년대

plt.figure(figsize=(12,4))
sns.barplot(x = df_70s['Region'].value_counts().values[:10], y = df_70s['Region'].value_counts()[:10].index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Top 10 Attacks in 70s',size=15)

pd.crosstab(df_70s.Region,df_70s.Attacktype).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()

pd.crosstab(df_70s.Region,df_70s.Weapon).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()

pd.crosstab(df_70s.Region,df_70s.Targettype).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()ㅊ

attack_data = df_70s.groupby('Region')[['Kill', 'Wound']].sum()
attack_data.plot(kind='bar', stacked=True,figsize = (12,4))
plt.xlabel('Region')
plt.ylabel('Count')
plt.title('Kill & Wound in 70s')
plt.show()

80년대

plt.figure(figsize=(12,4))
sns.barplot(x = df_80s['Region'].value_counts().values[:10], y = df_80s['Region'].value_counts()[:10].index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Top 10 Attacks in 80s',size=15)

pd.crosstab(df_80s.Region,df_80s.Attacktype).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()

pd.crosstab(df_80s.Region,df_80s.Weapon).plot.barh(stacked=True)
fig=plt.gcf()
fig.set_size_inches(12,7)
plt.show()

df_80s['Weapon'].value_counts()[:5].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightslategray')
plt.xlabel("Weapon")
plt.ylabel("Number of attack")
plt.title("Top 5 Weapons in 80s", fontsize=15)
plt.show()

df_80s['Targettype'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightslategray')
plt.xlabel("Targettype")
plt.ylabel("Number of attack")
plt.title("Top 10 Targettype in 80s", fontsize=15)
plt.show()

90년대

plt.figure(figsize=(12,4))
sns.barplot(x = df_90s['Region'].value_counts().values[:10], y = df_90s['Region'].value_counts()[:10].index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Top 10 Attacks in 90s',size=15)

pd.crosstab(df_90s.Region,df_90s.Attacktype).plot.barh(stacked=True)
fig.set_size_inches(12,4)
plt.show()

df_90s['Attacktype'].value_counts()[:5].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightslategray')
plt.xlabel("Weapon")
plt.ylabel("Number of attack")
plt.title("Top 5 Attacktype in 90s", fontsize=15)
plt.xticks(rotation=0)
plt.show()

df_90s['Targettype'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='lightslategray')
plt.xlabel("Targettype")
plt.ylabel("Number of attack")
plt.title("Top 10 Targettype in 90s", fontsize=15)
plt.xticks(rotation=30)
plt.show()

2000년대

plt.figure(figsize=(12,4))
sns.barplot(x = df_00s['Region'].value_counts().values[:10], y = df_00s['Region'].value_counts()[:10].index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Top 10 Attacks in 2000',size=15)

attack_data = df_00s.groupby('Region')[['Kill', 'Wound']].sum()
attack_data.plot(kind='bar', stacked=True,figsize = (12,4))
plt.xlabel('Region')
plt.ylabel('Count')
plt.title('Kill & Wound in 90s')
plt.xticks(rotation=30)
plt.show()

df_2000_MN = df_00s[df_00s['Region'] == 'Middle East & North Africa']

plt.figure(figsize=(12, 4))
sns.barplot(x=df_2000_MN['Year'].value_counts().index, y=df_2000_MN['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks in Middle East & North Africa (2000s)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

a = df_2000_MN[df_2000_MN['Year'] == 2008]
a['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='cadetblue')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Middle East & North Africa (2008y)", fontsize=15)
plt.show()

a = df_00s[(df_00s['City'] == 'Baghdad') | (df_00s['Year'] == 2008)]
plt.figure(figsize=(12, 4))
sns.barplot(x=a['Targettype'].value_counts()[:5].index, y=a['Targettype'].value_counts()[:5].values, palette='viridis')
plt.title('Targettype in Baghdad(Middle East & North Africa, 2008y)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

df_2000_SA = df_00s[df_00s['Region'] == 'South Asia']

plt.figure(figsize=(12, 4))
sns.barplot(x=df_2000_SA['Year'].value_counts().index, y=df_2000_SA['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks in South Asia (2000s)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

b = df_2000_SA[df_2000_SA['Year'] == 2009]
b['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='cadetblue')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in South Asia (2009y)", fontsize=15)
plt.show()

b = df_00s[(df_00s['City'] == 'Quetta') | (df_00s['Year'] == 2009)]
plt.figure(figsize=(12, 4))
sns.barplot(x=b['Targettype'].value_counts()[:5].index, y=b['Targettype'].value_counts()[:5].values, palette='viridis')
plt.title('Targettype in Quetta(South Asia, 2009y)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

b = df_00s[(df_00s['City'] == 'Peshawar') | (df_00s['Year'] == 2009)]
plt.figure(figsize=(12, 4))
sns.barplot(x=b['Targettype'].value_counts()[:5].index, y=b['Targettype'].value_counts()[:5].values, palette='YlOrBr')
plt.title('Targettype in Peshawar(South Asia, 2009y)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

2010년

plt.figure(figsize=(12,4))
sns.barplot(x = df_10s['Region'].value_counts().values[:10], y = df_10s['Region'].value_counts()[:10].index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Top 10 Attacks in 2010s',size=15)

attack_data = df_10s.groupby('Region')[['Kill', 'Wound']].sum()
attack_data.plot(kind='bar', stacked=True,figsize = (12,4))
plt.xlabel('Region')
plt.ylabel('Count')
plt.title('Kill & Wound in 2010s')
plt.xticks(rotation=90)
plt.show()

df_2010_MN = df_10s[df_10s['Region'] == 'Middle East & North Africa']

plt.figure(figsize=(12, 4))
sns.barplot(x=df_2010_MN['Year'].value_counts().index, y=df_2010_MN['Year'].value_counts().values, palette='viridis')
plt.title('Terror Attacks in Middle East & North Africa (2010s)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

c = df_2010_MN[df_2010_MN['Year'] == 2014]
c['City'].value_counts()[:10].to_frame().sort_values('count', ascending=False).plot(kind='bar',figsize=(12,4),color='cadetblue')
plt.xlabel("City")
plt.ylabel("Number of attack")
plt.title("Top 10 most effected city in Middle East & North Africa (2014y)", fontsize=15)
plt.show()

d = df_10s[(df_10s['City'] == 'Baghdad') | (df_10s['Year'] == 2014)]
plt.figure(figsize=(12, 4))
sns.barplot(x=d['Targettype'].value_counts()[:5].index, y=d['Targettype'].value_counts()[:5].values, palette='viridis')
plt.title('Targettype in Baghdad(Middle East & North Africa, 2014)')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

한국 집계

#Preparing the data for analysis
Ko = terr_df[terr_df.Country == 'South Korea']
Ko_cities = Ko.groupby(by='City',as_index=False).count().sort_values(by='eventid',ascending=False).iloc[:5,]

Ko_kill_size = Ko['Kill'].sum() / len(Ko)
labels = ['Kill', 'Not Kill']

Ko_year = Ko.groupby(by='Year', as_index=False).sum().loc[:, ['Year', 'Kill']]

Iraq_weapon = Ko.groupby(by='Weapon',as_index=False).count().sort_values(by='eventid',ascending=False).iloc[:,:2]
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

# Plot 1 - Top 5 terrorism cities    
sns.barplot(x='eventid', y='City', data=Ko_cities, ci=None, ax=axs[0, 0],palette='summer')
axs[0, 0].set_title(f'Top 5 South Korea Cities With Most Terrorism Occurences')
axs[0, 0].set_ylabel('City')
axs[0, 0].set_xlabel('Victims')

# Plot 2 - Suicide Rate
center_circle = plt.Circle((0,0), 0.75, color='white')
axs[0, 1].pie((Ko_kill_size, 1-Ko_kill_size), labels=labels,colors=['crimson','green'] , autopct='%1.1f%%')
axs[0, 1].add_artist(center_circle)
axs[0, 1].set_title('South Korea Terrorism kill Rate')
axs[0, 0].set_ylabel('Victims')

# Plot 3 - Victims through the years
sns.lineplot(x='Year', y='Kill', data=Ko_year, ax=axs[1, 0],color='crimson')
axs[1, 0].set_xlim([1970, 2017])
axs[1, 0].set_title('South Korea Number of Victims Over Time')
axs[1, 0].set_ylabel('Victims')

# Plot 4 - Terrorism Weapons
sns.barplot(x='Weapon', y='eventid', data=Iraq_weapon, ci=None, ax=axs[1, 1],palette='summer')
axs[1, 1].set_xticklabels(axs[1, 1].get_xticklabels(), rotation=90)
axs[1, 1].set_xlabel('')
axs[1, 1].set_ylabel('Count')
axs[1, 1].set_title('South Korea Weapons Used in Attacks')

plt.suptitle('Terrorism Analysis in South Korea between 1970 and 2017', size=16)    
plt.subplots_adjust(top=0.90)
plt.show()

pd.crosstab(Ko.Year,Ko.City).plot(figsize=(12,4))
# plt.title('Terrorist Attack By Region')
plt.title('Terrorism in Korea')
plt.ylabel('counts')

plt.figure(figsize=(12, 4))
sns.barplot(x=Ko['City'].value_counts().index, y=Ko['City'].value_counts().values, palette='viridis')
plt.title('Attacks by Student Radicals, Korea')
plt.xlabel('Years')
plt.ylabel('Number of Attacks')
plt.show()

plt.figure(figsize=(12,4))
sns.barplot(x = Ko['City'].value_counts().values, y = Ko['City'].value_counts().index,palette = 'autumn')
plt.xlabel('Number of Attacks')
plt.ylabel('Region')
plt.title('Attacks in Korea',size=15)

제로베이스 데이터 스쿨

profile
비전공자의 데이터 공부법

0개의 댓글