vs code์ conda ๊ฐ์ํ๊ฒฝ์ ๋ํํ ํ๊ฒฝ์ธ jupyter notebook์ ์ค์ ํ๊ณ bigquery๋ฅผ ์ฐ๋ํ์ฌ ์ด์ฉํด๋ณด์
์ค๋น๋ฌผ: GCP ํ๋ก์ ํธ, ๋น ์ฟผ๋ฆฌ์ ์ฐ๋๋ ๊ตฌ๊ธ ์์ด๋
โผ read_gbq๋ฅผ ์ด์ฉํ๊ธฐ ์ํด ํ์ํ ๋ชจ๋pandas-gbq
์ ์ค์นํ๋ค
pip3 install --upgrade --force-reinstall pandas-gbq
โผ python ๋ํํ ํ๊ฒฝ์์ read_gbq ํจ์๋ฅผ ์คํํ๊ณ ์ถ๋ ฅ๋๋ ๋งํฌ์ ์ ์ํ์ฌ ๊ตฌ๊ธ ์์ด๋๋ฅผ ์ธ์ฆ ๋ฐ ์ฟผ๋ฆฌ๋ฅผ ๋ถ๋ฌ์จ๋ค
import pandas as pd
df = pd.read_gbq( query = query , project_id = 'project_id', dialect = 'standard' )
( ์๋ ๋ฐฉ๋ฒ์ผ๋ก๋ ๊ฐ๋ฅํ๋ค )
from google.cloud import bigquery
#from google.oauth2 import service_account
#GCP ํ๋ก์ ํธ ์
๋ ฅ
project_id = 'PROJECT_ID'
#authorization
client = bigquery.Client(project = project_id)
์ค๋น๋ฌผ
โผ google-cloud-bigquery
, google-cloud-bigquery-storage
package ์ค์น
pip3 install --upgrade --force-reinstall google-cloud-bigquery google-cloud-bigquery-storage
#conda install -c conda-forge google-cloud-bigquery
โผ JSON์ ์ ์ฅ๋ KEY๋ฅผ ๋ถ๋ฌ์์ client๋ก ์ฐ๊ฒฐํ๋ค
from google.cloud import bigquery
#from google.oauth2 import service_account
#Local์ ์ ์ฅ๋ SERVICE_ACCOUNT_KEY JSON ํ์ผ ๊ฒฝ๋ก ์
๋ ฅ
credentials = service_account.Credentials.from_service_account_file(r'SERVICE_ACCOUNT_KEY.JSON')
#GCP ํ๋ก์ ํธ ์
๋ ฅ
project_id = 'PROJECT_ID'
#authorization
client = bigquery.Client(credentials = credentials, project = project_id)
#QUERY
query_job = client.query("""BGQUERY""")
df = query_job.to_dataframe()
#LOAD
PROJECT_ID = "project_id"
DATASET = "dataset"
TABLE_NAME = "table_name"
TABLE_ID = f"{PROJECT_ID}.{DATASET}.{TABLE_NAME}"
job_config = bigquery.LoadJobConfig(
schema=[
bigquery.SchemaField("f1", bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField("f2", bigquery.enums.SqlTypeNames.DATE),
bigquery.SchemaField("f3", bigquery.enums.SqlTypeNames.FLOAT)],
write_disposition="WRITE_APPEND" | "WRITE_TRUNCATE"
)
# Make an API request
job = client.load_table_from_dataframe(
dataframe, table_id, job_config = job_config)
# Activate Job
job.result()
from google.cloud import bigquery
#from google.oauth2 import service_account
#CASE1: ๋ํํ ํ๊ฒฝ
#GCP ํ๋ก์ ํธ ์
๋ ฅ
project_id = 'PROJECT_ID'
#authorization
client = bigquery.Client(project = project_id)
##CASE2: ๋ก์ปฌ ํ๊ฒฝ
##Local์ ์ ์ฅ๋ SERVICE_ACCOUNT_KEY JSON ํ์ผ ๊ฒฝ๋ก ์
๋ ฅ
#credentials = service_account.Credentials.from_service_account_file(r'SERVICE_ACCOUNT_KEY.JSON')
##GCP ํ๋ก์ ํธ ์
๋ ฅ
#project_id = 'PROJECT_ID'
##authorization
#client = bigquery.Client(credentials = credentials, project = project_id)
#QUERY
query_job = client.query("""BGQUERY""")
df = query_job.to_dataframe()
#LOAD
PROJECT_ID = "project_id"
DATASET = "dataset"
TABLE_NAME = "table_name"
TABLE_ID = f"{PROJECT_ID}.{DATASET}.{TABLE_NAME}"
job_config = bigquery.LoadJobConfig(
schema=[
bigquery.SchemaField("f1", bigquery.enums.SqlTypeNames.STRING),
bigquery.SchemaField("f2", bigquery.enums.SqlTypeNames.DATE),
bigquery.SchemaField("f3", bigquery.enums.SqlTypeNames.FLOAT)],
write_disposition="WRITE_APPEND" | "WRITE_TRUNCATE"
)
# Make an API request
job = client.load_table_from_dataframe(
dataframe, table_id, job_config = job_config)
# Activate Job
job.result()
python --version
conda --version
conda update