DynamoDB (Re)Create Table

ewillwin·2023년 1월 25일
0

TSMtech Record

목록 보기
34/39
  • 유실데이터를 최대한 복구하여 DB Table을 재구축
  • 읍면동단위 Table명: energy
  • 시군구단위 Table명: energy_sigungu
  • 시도단위 Table명: energy_sido
  • 세 Table 모두 On-Demand로 설정
  • IAM 권한은 Table 마다 설정하는게 아니라 따로 건들 필요 없음
  • Table 생성 후 모든 Lambda function에 생성한 Table을 다시 연결해줘야함

아래는 DB insert 코드

#-*-coding:utf-8 -*-
from urllib.request import urlopen
import requests as rq
from bs4 import BeautifulSoup as bs
import time
from datetime import datetime
import zipfile
import os
import boto3
from typing import final
from venv import create
import pandas as pd
import chunk
import warnings
from multiprocessing import Process
import bisect
import pandas as pd
from typing import final
from venv import create
import requests
from decimal import Decimal
import json
import shutil
import boto3
import json
from boto3.dynamodb.conditions import Key, Attr
from decimal import Decimal

def mul424(x):
    x = x * 424
    return x
def addr_truncate_sido(addr):
    new_addr = addr[:2] + '00000000'
    return new_addr
def addr_truncate_sigungu(addr):
    new_addr = addr[:5] + '00000'
    return new_addr

warnings.simplefilter(action='ignore', category=FutureWarning)
dynamodb = boto3.resource('dynamodb', region_name='ap-northeast-2', aws_access_key_id='비', aws_secret_access_key='밀')



print("db insert start")

df0 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/buildingenergy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count'],low_memory=False)
df0 = df0.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int'}, errors='ignore')
df0['gas'] = df0['gas'].astype('Int64')
df0 = df0.fillna(0)
df0['gas_co2'] = df0['gas'].apply(mul424)
df0['electricity_co2'] = df0['electricity'].apply(mul424)
df0['energy_sum'] = df0['gas'] + df0['electricity']
df0['co2_sum'] = df0['gas_co2'] + df0['electricity_co2']
df0 = df0.fillna(0)
df0 = df0.sort_values(by=['date','addr'])
df0.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv", mode='w', index=False, header=None)

table0 = dynamodb.Table('energy') ##### 읍면동단위 table #####
with table0.batch_writer() as batch:
    for index, row in df0.iterrows():
        batch.put_item(json.loads(row.to_json(), parse_float=Decimal))



df1 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
df1 = df1.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
df1['gas'] = df1['gas'].astype('Int64')
df1 = df1.fillna(0)

df1['addr'] = df1['addr'].apply(addr_truncate_sido)
df1 = df1.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
df1['count'] = df1['count'].astype('Int64')
df1['gas_co2'] = df1['gas'].apply(mul424)
df1['electricity_co2'] = df1['electricity'].apply(mul424)
df1['energy_sum'] = df1['gas'] + df1['electricity']
df1['co2_sum'] = df1['gas_co2'] + df1['electricity_co2']
df1 = df1.fillna(0)
df1 = df1.sort_values(by=['date','addr'])
df1.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy_sido.csv", mode='w', index=False, header=None)

table1 = dynamodb.Table('energy_sido') ##### 시도단위 table #####
with table1.batch_writer() as batch:
    for index, row in df1.iterrows():
        batch.put_item(json.loads(row.to_json(), parse_float=Decimal))



df2 = pd.read_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy.csv",sep =',',encoding='utf-8',names=['date','addr','electricity','gas','count','gas_co2','electricity_co2','energy_sum','co2_sum'],header=0,low_memory=False)
df2 = df2.astype({'date':'str','addr':'str','electricity':'int','gas':'int','count':'int','gas_co2':'int','electricity_co2':'int','energy_sum':'int','co2_sum':'int'}, errors='ignore')
df2['gas'] = df2['gas'].astype('Int64')
df2 = df2.fillna(0)

df2['addr'] = df2['addr'].apply(addr_truncate_sigungu)
df2 = df2.groupby(['date', 'addr'], as_index=False)['electricity', 'gas', 'count'].sum()
df2['count'] = df2['count'].astype('Int64')
df2['gas_co2'] = df2['gas'].apply(mul424)
df2['electricity_co2'] = df2['electricity'].apply(mul424)
df2['energy_sum'] = df2['gas'] + df2['electricity']
df2['co2_sum'] = df2['gas_co2'] + df2['electricity_co2']
df2 = df2.fillna(0)
df2 = df2.sort_values(by=['date','addr'])
df2.to_csv("C:/Users/TSM/OneDrive/바탕 화면/energy/energy_sigungu.csv", mode='w', index=False, header=None)

table2 = dynamodb.Table('energy_sigungu') ##### 시군구단위 table #####
with table2.batch_writer() as batch:
    for index, row in df2.iterrows():
        batch.put_item(json.loads(row.to_json(), parse_float=Decimal))

print("complete db insert")
profile
Software Engineer @ LG Electronics

0개의 댓글