누적 계산
- id별로 term sart를 오름차순 했을 때 첫 번째 term type값
- id별로 가장 작은 term start 날짜
- id별로 가장 작은 term start 날짜 + 10년
- legislators_term 테이블 JOIN
- B 테이블의 term start가 A 테이블의 first term과 10년 후 사이에 있는 날짜
- century, first type별 그루핑
SELECT
DATE_PART('century', A.first_term) AS century,
first_type,
COUNT(DISTINCT A.id_bioguide) AS cohort,
COUNT(B.term_start) AS terms
FROM (
SELECT
DISTINCT id_bioguide,
FIRST_VALUE(term_type) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_type,
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
MIN(term_start) OVER(PARTITION BY id_bioguide) + INTERVAL '10 years' AS first_plus_10
FROM legislators_terms
) AS A LEFT JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
AND B.term_start BETWEEN A.first_term AND A.first_plus_10
GROUP BY 1, 2;
- id 개수로 나누어 평균 계산
- CASE 구문을 이용해 피봇
SELECT
century,
MAX(CASE WHEN first_type = 'rep' THEN cohort END) AS rep_cohort,
MAX(CASE WHEN first_type = 'rep' THEN terms_per_leg END) AS avg_rep_terms,
MAX(CASE WHEN first_type = 'sen' THEN cohort END) AS sen_cohort,
MAX(CASE WHEN first_type = 'sen' THEN terms_per_leg END) AS avg_sen_terms
FROM (
SELECT
DATE_PART('century', A.first_term) AS century,
first_type,
COUNT(DISTINCT A.id_bioguide) AS cohort,
COUNT(B.term_start) AS terms,
ROUND(COUNT(B.term_start) * 1.0 / COUNT(DISTINCT A.id_bioguide), 3) AS terms_per_leg
FROM (
SELECT
DISTINCT id_bioguide,
FIRST_VALUE(term_type) OVER(PARTITION BY id_bioguide ORDER BY term_start ASC) AS first_type,
MIN(term_start) OVER(PARTITION BY id_bioguide) AS first_term,
MIN(term_start) OVER(PARTITION BY id_bioguide) + INTERVAL '10 years' AS first_plus_10
FROM legislators_terms
) AS A LEFT JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
AND B.term_start BETWEEN A.first_term AND A.first_plus_10
GROUP BY 1, 2
) AS result
GROUP BY 1