Tarlen Tarlen - 3 months ago 17
SQL Question

Aggregated columns return zero

I want to aggregate statistics for each app in the table

apps


I have the following query, but for some reason all the results return
0


select
a.id,
'support' as domain,
'summary' as type,
90 as interval,
json_build_object(
'new', count(new),
'closed', count(closed_c),
'reply_rate', count(reply_rate),
'median_response', max(median_response.response_time)
) as data
from apps a
full join (
SELECT * from conversations c
WHERE c.started_at::date > (current_date - (90 || ' days')::interval)::date
) as new on new.app_id = a.id
full join (
SELECT * from conversations c
WHERE c.closed_at::date > (current_date - (90 || ' days')::interval)::date
) as closed_c on closed_c.app_id = a.id
full join (
SELECT * from conversations c
WHERE c.started_at::date > (current_date - (90 || ' days')::interval)::date AND c.first_response_at is not null
) as reply_rate on reply_rate.app_id = a.id
full join (
SELECT c.app_id, extract(epoch from (c.first_response_at - c.started_at)) as response_time, ntile(2) OVER (ORDER BY (c.first_response_at - c.started_at)) AS bucket FROM conversations c
WHERE c.started_at::date > (current_date - (90 || ' days')::interval)::date AND c.first_response_at is not null
) as median_response on median_response.app_id = a.id
where a.test = false
group by a.id

Answer

I can't tell exactly why everything is zero, but

#1: full join should be replaced by left join (due to the where a.test = false)

#2: as you access the same table four times with different conditions this can be probably replaced by a single Select using conditional aggregation.

Check if this returns the correct counts and then Left Join it to apps.

select
    app_id,
    sum(new),
    sum(closed_c),
    sum(reply_rate),
    max(case when bucket = 1 then response_time end)
from
 (
    SELECT app_id,

       1 as new,

       case when c.closed_at::date > (current_date - (90  || ' days')::interval)::date then 1 else 0 end as closed_c,

       case when c.first_response_at is not null then 1 else 0 end as reply_rate,

       extract(epoch from (c.first_response_at - c.started_at)) as response_time, 

       ntile(2) OVER (ORDER BY (c.first_response_at - c.started_at)) AS bucket
    FROM conversations c
    -- assuming that closed_at is always after started_at
    WHERE c.started_at::date > (current_date - (90  || ' days')::interval)::date 
 ) as dt
group by app_id