리텐션 - 다른 테이블의 속성으로 코호트 분석

ddoddo·2023년 1월 17일
0

다른 테이블 속성으로 코호트 분석

legislators 테이블

SELECT * 
FROM legislators

성별

성별과 기간에 따른 코호트 숫자

앞서 진행한 JOIN 조건들과 동일하되, legislators 테이블을 추가로 JOIN 한다. 날짜와 state로 살펴본 것과 달리 성별에 따른 코호트 숫자를 알아본다.

SELECT
	D.gender,
	COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
	COUNT(DISTINCT A.id_bioguide) AS cohort_retained
FROM
(
	SELECT id_bioguide, MIN(term_start) AS first_term
	FROM legislators_terms
	GROUP BY id_bioguide
) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
	LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
		AND C.month_name = 'December' AND C.day_of_month = 31
	JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
GROUP BY 1, 2
ORDER BY 1, 2;

성별과 기간에 따른 코호트 비율

SELECT
	gender,
	period,
	cohort_retained,
	FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC) AS cohort_size,
	ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC), 2) AS pct_retained
FROM
(
	SELECT
		D.gender,
		COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
		COUNT(DISTINCT A.id_bioguide) AS cohort_retained
	FROM
	(
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		GROUP BY id_bioguide
	) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
			AND C.month_name = 'December' AND C.day_of_month = 31
		JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
	GROUP BY 1, 2
) AS sub
ORDER BY 2, 1;

A 테이블의 first_term이 1917-01-01부터 1999-12-31까지인 경우만 코호트 숫자와 비율을 계산한다.

SELECT
	gender,
	period,
	cohort_retained,
	FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC) AS cohort_size,
	ROUND(cohort_retained * 1.0 / FIRST_VALUE(cohort_retained) OVER(PARTITION BY gender ORDER BY period ASC), 2) AS pct_retained
FROM
(
	SELECT
		D.gender,
		COALESCE(DATE_PART('year', AGE(C.date, A.first_term)), 0) AS period,
		COUNT(DISTINCT A.id_bioguide) AS cohort_retained
	FROM
	(
		SELECT id_bioguide, MIN(term_start) AS first_term
		FROM legislators_terms
		GROUP BY id_bioguide
	) AS A JOIN legislators_terms AS B ON A.id_bioguide = B.id_bioguide
		LEFT JOIN date_dim AS C ON C.date BETWEEN B.term_start AND B.term_end
			AND C.month_name = 'December' AND C.day_of_month = 31
		JOIN legislators AS D ON A.id_bioguide = D.id_bioguide
	WHERE A.first_term BETWEEN '1917-01-01' AND '1999-12-31'
	GROUP BY 1, 2
) AS sub
ORDER BY 2, 1;

0개의 댓글