이전에는 각 id별 첫 번째 임기 시작 날짜를 기준으로 코호트를 정의했다. 이번에는 처음 날짜 대신 다른 날짜를 기준으로 코흐트를 정의한다.
SELECT
DISTINCT(id_bioguide),
term_type,
DATE('2000-01-01') AS first_term,
MIN(term_start) AS min_start
FROM legislators_terms
WHERE term_start <= '2000-12-31' AND term_end >= '2000-01-01'
GROUP BY 1, 2, 3
SELECT
A.term_type,
COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained
FROM
(
SELECT
DISTINCT(id_bioguide),
term_type,
DATE('2000-01-01') AS first_term,
MIN(term_start) AS min_start
FROM legislators_terms
WHERE term_start <= '2000-12-31' AND term_end >= '2000-01-01'
GROUP BY 1, 2, 3
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
AND C.month_name = 'December' AND C.day_of_month = 31 AND C.year >= 2000
GROUP BY 1, 2
SELECT
term_type,
period,
cohort_retained,
FIRST_VALUE(cohort_retained) OVER(PARTITION BY term_type ORDER BY period) AS cohort_size,
ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY term_type ORDER BY period), 2) AS pct_retained
FROM
(
SELECT
A.term_type,
COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
COUNT(DISTINCT(A.id_bioguide)) AS cohort_retained
FROM
(
SELECT
DISTINCT(id_bioguide),
term_type,
DATE('2000-01-01') AS first_term,
MIN(term_start) AS min_start
FROM legislators_terms
WHERE term_start <= '2000-12-31' AND term_end >= '2000-01-01'
GROUP BY 1, 2, 3
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
AND C.month_name = 'December' AND C.day_of_month = 31 AND C.year >= 2000
GROUP BY 1, 2
) AS result
ORDER BY 2, 1