미국 의회 입법가 데이터셋을 사용한다.
여러번 임기한 사람들이 있다.
SELECT id_bioguide, COUNT(*)
FROM legislators_terms
GROUP BY id_bioguide;
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide;
SELECT
DATE_PART('year', AGE(B.term_start, A.first_term)) AS period,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained
FROM(
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A
JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
GROUP BY DATE_PART('year', AGE(B.term_start, A.first_term))
SELECT
period,
cohort_retained,
FIRST_VALUE(cohort_retained) OVER(ORDER BY period) AS cohort_size,
ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(ORDER BY period), 2) AS pct_retained
FROM(
SELECT
DATE_PART('year', AGE(B.term_start, A.first_term)) AS period,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained
FROM(
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A
JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
GROUP BY DATE_PART('year', AGE(B.term_start, A.first_term))
) AS sub;
WITH result AS (
SELECT
period,
cohort_retained,
FIRST_VALUE(cohort_retained) OVER(ORDER BY period) AS cohort_size,
ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(ORDER BY period), 2) AS pct_retained
FROM(
SELECT
DATE_PART('year', AGE(B.term_start, A.first_term)) AS period,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained
FROM(
SELECT id_bioguide, MIN(term_start) AS first_term
FROM legislators_terms
GROUP BY id_bioguide
) AS A
JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
GROUP BY DATE_PART('year', AGE(B.term_start, A.first_term))
) AS sub
)
SELECT
cohort_size,
MAX(CASE WHEN period=0 THEN pct_retained END) AS yr0,
MAX(CASE WHEN period=1 THEN pct_retained END) AS yr1,
MAX(CASE WHEN period=2 THEN pct_retained END) AS yr2,
MAX(CASE WHEN period=3 THEN pct_retained END) AS yr3,
MAX(CASE WHEN period=4 THEN pct_retained END) AS yr4
FROM result
GROUP BY cohort_size;