Işık Işık - 6 months ago 16
SQL Question

Repetitive entries in SQL JOIN query

I have three tables posts, comments, and users. posts table includes two types of posts, question and answer. Comments are made on questions and answers. My purpose is to get a single question with comments on it, answers on it and comments on that answers. Also I need username from the users table as author on each question, answer and comment I fetch. I am using Postgres 9.5, and making use of

json_agg()
function.

While example output I need should be something similar to the first following, I get repeated entries.

What is the thing I am missing here? Correct group by clauses may be. Or the subquery to gather the answers with their comments is not the way to do it. When I comment out the left join from posts on the comments table I get desired results without comments on questions. Also when I cancel the left join containing the subquery I get non-repetative results as I expect, and again that is not complete dataset I desire. These are things I have collected so far to solve my issue.

What I need:

[
{
"post_id": "10",
"created_at": "2016-05-10T00:16:54.469Z",
"post_type": "question",
"post_title": "qwerty",
"post_text": "asdasd asda sdasd",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": null,
"is_accepted": null,
"acceptor_id": null,
"answers": [
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 17
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"author": "isikfsc",
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 14
}
]
}
],
"comments": [
{
"id": 1,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"author": "isikfsc",
"parent_post_id": 10
}
]
}
]


My query is:

SELECT
q.*,
json_agg(ac.*) AS answers,
json_agg(c.*) AS comments --comments on posts of post_id questions
FROM posts q

LEFT JOIN
(
SELECT
a.*,
json_agg(c.*) AS comments -- comments on posts of post_id answers
FROM posts a
LEFT JOIN comments c
ON a.post_id = c.parent_post_id

GROUP BY a.post_id
) ac
ON q.post_id = ac.parent_post_id

LEFT JOIN comments c
ON q.post_id = c.parent_post_id

WHERE q.post_id = 10
GROUP BY q.post_id


What I get:

[
{
"post_id": "10",
"created_at": "2016-05-10T00:16:54.469Z",
"post_type": "question",
"post_title": "qwerty",
"post_text": "asdasd asda sdasd",
"post_author_id": 1,
"parent_post_id": null,
"is_accepted": null,
"acceptor_id": null,
"answers": [
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 17,
"created_at": "2016-05-10T04:58:56.350229",
"post_type": "answer",
"post_title": null,
"post_text": "222asda dasdad asdada",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 5,
"created_at": "2016-05-10T10:56:30.220128",
"text": "qweqwe",
"author_id": 1,
"parent_post_id": 17
},
{
"id": 8,
"created_at": "2016-05-10T11:00:00.182991",
"text": "sasasd",
"author_id": 1,
"parent_post_id": 17
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
},
{
"post_id": 14,
"created_at": "2016-05-10T04:19:19.005556",
"post_type": "answer",
"post_title": null,
"post_text": "asdasdasdasd",
"post_author_id": 1,
"parent_post_id": 10,
"is_accepted": null,
"acceptor_id": null,
"comments": [
{
"id": 2,
"created_at": "2016-05-10T05:25:34.671008",
"text": "qeqweqwe",
"author_id": 1,
"parent_post_id": 14
}
]
}
],
"comments": [
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 1,
"created_at": "2016-05-10T05:25:28.200327",
"text": "asadasdad",
"author_id": 1,
"parent_post_id": 10
},
{
"id": 4,
"created_at": "2016-05-10T10:25:23.381177",
"text": "werwer",
"author_id": 1,
"parent_post_id": 10
}
]
}
]

Answer

Grouping happens once all of the parties have been joined, so aggregates will depend on the resulting cardinality. Joining posts with answers AND comments causes a full join between them, duplicating all values. They need to be separated and performed individually, one way you can do it is the following:

SELECT
    q.*,
    (SELECT json_agg(ac.*)
     FROM (
       SELECT a.*, json_agg(c.*) AS comments
       FROM posts a
       LEFT JOIN comments c ON (a.post_id = c.parent_post_id)
       WHERE a.parent_post_id = q.post_id
       GROUP BY a.post_id
       ) ac
    ) AS answers,
    json_agg(c.*) AS comments --comments on posts of post_id questions
FROM posts q
LEFT JOIN comments c ON (q.post_id = c.parent_post_id)
WHERE q.post_id = 10
GROUP BY q.post_id;

Alternatively:

SELECT q.*, qa.answers, qc.comments
FROM posts q
LEFT JOIN (
  SELECT ac.parent_post_id, json_agg(ac.*) AS answers
  FROM (
    SELECT ac.*, json_agg(c.*) AS comments
    FROM posts ac
    LEFT JOIN comments c ON (c.parent_post_id = ac.post_id)
    GROUP BY ac.post_id
    ) ac
  GROUP BY ac.parent_post_id
  ) qa ON (qa.parent_post_id = q.post_id)
LEFT JOIN (
  SELECT c.parent_post_id, json_agg(c.*) AS comments
  FROM comments c
  GROUP BY c.parent_post_id
  ) qc ON (qc.parent_post_id = q.post_id)
WHERE q.post_id = 10;
Comments