SQL HW #3

더기덕·2022년 10월 25일
0

1-a -> use CTEs?
1-c -> what is the hint for?
1-b -> 298 vs 303

1

a)

SELECT MS.user_id, MONTH(MS.step_date) AS month, DAY(MS.step_date) AS argmax 
FROM 
(SELECT TS.user_id, TS.step_date, total_steps, MAX(total_steps) OVER (PARTITION BY user_id, MONTH(TS.step_date)) AS max_steps
FROM 
(SELECT user_id, DATE_FORMAT(tstamp, '%Y-%m-%d') AS step_date, SUM(steps) AS total_steps 
FROM step 
GROUP BY user_id, DATE_FORMAT(tstamp, '%Y-%m-%d')
) AS TS
)AS MS
WHERE MS.total_steps = MS.max_steps;

b)

SELECT DISTINCT ACS.user_id, MIN(tstamp) OVER (PARTITION BY ACS.user_id, DATE_FORMAT(ACS.tstamp, '%m-%d')) AS reached_100_tstamp
FROM
(SELECT user_id, tstamp, SUM(steps) OVER (PARTITION BY user_id,DATE_FORMAT(tstamp,'%Y-%m-%d') ORDER BY tstamp) as accum_step
FROM step
WHERE steps != 0) AS ACS
WHERE ACS.accum_step >= 10000;

c)

SELECT user_id, tstamp, heartrate AS original_reading, AVG(heartrate) OVER (PARTITION BY user_id ORDER BY tstamp ASC ROWS BETWEEN 4 PRECEDING AND 4 FOLLOWING) AS smoothed_rate
FROM heartrate;

d)

SELECT  BT.bike_id, BT.end_station_name
FROM biketrip AS BT, 
(SELECT bike_id, MAX(end_date) as latest_return
FROM biketrip
WHERE DATE_FORMAT(end_date, '%Y-%m-%d')='2019-08-23'
GROUP BY bike_id) AS BMT
WHERE BMT.bike_id = BT.bike_id
AND BT.end_date = BMT.latest_return;

e)

WITH RECURSIVE cte (rc, bike_id_r, end_date_r, start_station_name_r, end_station_name_r) AS(
	SELECT 
		1, bike_id, end_date, start_station_name, end_station_name
	FROM biketrip 
    WHERE DATE_FORMAT(end_date, '%Y-%m-%d')='2019-08-23'
    UNION ALL
    SELECT 
		rc+1 AS rc, 
        L.bike_id,
        L.end_date,
        L.start_station_name,
        L.end_station_name
	FROM biketrip L 
    INNER JOIN cte R 
    ON L.bike_id = R.bike_id_r
    AND DATE_FORMAT(L.end_date, '%Y-%m-%d') = '2019-08-23'
    WHERE R.rc <= 20
    AND L.start_station_name = R.end_station_name_r
    AND L.end_date > R.end_date_r
) 
SELECT cte.bike_id_r AS bike_id, cte.end_station_name_r AS end_station_name
FROM cte,
(SELECT bike_id_r, MAX(rc) AS mrc FROM cte GROUP BY bike_id_r)  max_cte
WHERE 1=1
AND cte.rc=max_cte.mrc 
AND cte.bike_id_r= max_cte.bike_id_r;

f)

SELECT bike_id
FROM biketrip AS BT
WHERE DATE_FORMAT(end_date, '%Y-%m-%d')='2019-08-23'
GROUP BY BT.bike_id 
ORDER BY SUM(duration) DESC
LIMIT 1;

2

a)

1) No.
Duration is decided based on start_date, and end_date which are not the candidate key.

The non-trivial functional dependencies are

{start_station -> start_station_name,
start_station_name->start_station,
end_station -> end_station_name,
end_station_name -> end_station,
start_date, end_date -> duration,
start_date, duration -> end_date,
end_date, duration -> start_date}

2) Each Bracket represents one table

  • station 은 그냥 별도로 하나의 테이블만
    {start_station, start_station_name}
    {end_station, end_station_name}
    {bike_id, user_type, start_station, end_station, start_date, end_date}

{start_station, start_station_name}
{end_station, end_station_name}
{trip_id, bike_id, user_type, start_station, end_station}
{trip_id, start_date, end_date}

3)

b)

F = {A->B, C->A, C->B}

c)

->attnedee따지면 1NF 도 안 되는 것 같긴한데...
1NF ABC -> candidate key O
2NF ABC -> 따로따로는 식별 X O
3NF ABC -> O (ABC -> 관객 수) O
BCNF -> f not trivial but a is superkey O

0개의 댓글