Eiji Eiji - 6 months ago 226
JSON Question

Generate nested json with couting in Postgresql

I created a simple database (in latest stable postgresql), like this:

create table table_a(id int primary key not null, name char(10));
create table table_b(id int primary key not null, name char(10), parent_a_id int);
create table table_c(id int primary key not null, name char(10), parent_a_id int, parent_b_id int, parent_c_id int, c_number int);
create table table_d(id int primary key not null, name char(10), parent_c_id int, d_number int);


with some example data like this:

insert into table_a(id, name) values(1, "a");

insert into table_b(id, name, parent_a_id) values(1, "b", 1);

insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(1, "c1", 1, 1, null, 1);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(2, "c1.1", 1, 1, 1, 5);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(3, "c1.1.1", 1, 1, 2, 2);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(4, "c1.2", 1, 1, 1, 8);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(5, "c2", 1, 1, null, 4);

insert into table_d(id, name, parent_c_id, d_number) values(1, "c1_d1", 1, 5);
insert into table_d(id, name, parent_c_id, d_number) values(2, "c1.1_d1", 2, 6);
insert into table_d(id, name, parent_c_id, d_number) values(3, "c1.1_d2", 2, 1);
insert into table_d(id, name, parent_c_id, d_number) values(4, "c1.1.1_d1", 3, 2);
insert into table_d(id, name, parent_c_id, d_number) values(5, "c2_d1", 5, 4);
insert into table_d(id, name, parent_c_id, d_number) values(6, "c2_d2", 5, 3);
insert into table_d(id, name, parent_c_id, d_number) values(7, "c2_d3", 5, 7);


Now I want to generate json like this: http://codebeautify.org/jsonviewer/cb9bc2a1

With relation rules:


  1. table_a has many table_b

  2. table_b has one table_a and has many table_c (select only where table_c_id is null)

  3. table_c has one table_a and has one table_b and has many table_c (children) and has one table_c (parent)



and couting rules:


  1. table_c has d_numbers_sum (sum of d_number in table_d and sum of d_numbers_sum in table_c relation )

  2. table_b has d_numbers_sum (sum of d_numbers_sum in table_c relation )

  3. table_a has d_numbers_sum (sum of d_numbers_sum in table_b relation )

  4. table_c has real_c_number (if has children_c then sum of real_c_number in table_c relation else c_number)

  5. table_b has real_c_number_sum (sum of real_c_number in table_c relation )

  6. table_a has real_c_number_sum (sum of real_c_number_sum in table_b relation )



Is it possible to generate that JSON with that rules in pure postgresql code?

Is it possible to generate shourtcat function for this like:

select * from my_shourtcat where id = ?;


or whitout id (generate json array):

select * from my_shourtcat;


Can you show me an example with description (how to generate nested json and couting), so I could use relations similar, but more complex that these in my app?

EDIT:

I wrote something interesting, but it's not 100% nested hash - here all leaf has own tree and result is an array of these trees I need to deep merge that array to create array of unique trees:

with recursive j as (
SELECT c.*, json '[]' children -- at max level, there are only leaves
FROM test.table_c c
WHERE (select count(1) from test.table_c where parent_c_id = c.id) = 0
UNION ALL
-- a little hack, because PostgreSQL doesn't like aggregated recursive terms
SELECT (c).*, array_to_json(array_agg(j)) children
FROM (
SELECT c, j
FROM j
JOIN test.table_c c ON j.parent_c_id = c.id
) v
GROUP BY v.c
)
SELECT json_agg(row_to_json(j)) json_tree FROM j WHERE parent_c_id is null;

Answer

The answer consists of two parts. First to rig up a basic json structure, and then to build up nested json objects from self-referencing column in table_c.

Both parts requires Postgres 9.5

The first code sets up a json object with most joins except for the nested children in table_c. The counting part is mostly left out.

In the second code example I wrote a "merge" function in pure plpgsql, which should solve the nested json problem. This solution requires only PG9.5 and no extensions, since plpgsql is built in.

As an alternative, I found one other solution that requires plv8 installed which does a deep merge in javascript ).

Creating nested json is not trivial to do in pure sql, where the challenge is to merge the separate json trees we can get from a recursive CTE.

Code example 1

Creating the query as a view makes it easy to reuse the query to either return a json array of all objects from table_a, or return only one object with a given id.

I made some small changes to the data model and data. The code for a self-contained example follows:

--TABLES
DROP SCHEMA IF EXISTS TEST CASCADE;
CREATE SCHEMA test;

-- Using text instead of char(10), to avoid padding. For most     databases text is the best choice. 
-- Postgresql uses the same implementation the hood (char vs text)
-- Source: https://www.depesz.com/2010/03/02/charx-vs-varcharx-vs-varchar-vs-text/

create table test.table_a(id int primary key not null, name text);
create table test.table_b(id int primary key not null, name text, parent_a_id int);
create table test.table_c(id int primary key not null, name text, parent_a_id int, parent_b_id int, parent_c_id int, c_number int);
create table test.table_d(id int primary key not null, name text, parent_c_id int, d_number int);

--DATA
insert into test.table_a(id, name) values(1, 'a');

-- Changed: parent_a_id=1 (instead of null)
insert into test.table_b(id, name, parent_a_id) values(1, 'b', 1);

insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(1, 'c1', 1, 1, null, 1);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(2, 'c1.1', 1, 1, 1, 5);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(3, 'c1.1.1', 1, 1, 2, 2);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(4, 'c1.2', 1, 1, 1, 8);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(5, 'c2', 1, 1, null, 4);

insert into test.table_d(id, name, parent_c_id, d_number) values(1, 'c1_d1', 1, 5);
insert into test.table_d(id, name, parent_c_id, d_number) values(2, 'c1.1_d1', 2, 6);
insert into test.table_d(id, name, parent_c_id, d_number) values(3, 'c1.1_d2', 2, 1);
insert into test.table_d(id, name, parent_c_id, d_number) values(4, 'c1.1.1_d1', 3, 2);
insert into test.table_d(id, name, parent_c_id, d_number) values(5, 'c2_d1', 5, 4);
insert into test.table_d(id, name, parent_c_id, d_number) values(6,'c2_d2', 5, 3);
insert into test.table_d(id, name, parent_c_id, d_number) values(7, 'c2_d3', 5, 7);


CREATE OR REPLACE VIEW json_objects AS
--Root object
SELECT ta.id, json_build_object(
    'id', ta.id,
    'name', ta.name,
    'd_numbers_sum', (SELECT sum(d_number) FROM test.table_d),
    'real_c_number_sum', null,
    'children_b', (

        -- table_b
        SELECT json_agg(json_build_object(
            'id', tb.id,
            'name', tb.name,
            'd_numbers_sum', null,
            'real_c_number_sum', null,
            'children_c', (

                -- table_c
                SELECT json_agg(json_build_object(
                   'id', tc.id,
                   'name', tc.name,
                   'd_numbers_sum', null,
                   'real_c_number_sum', null,
                   'children_d', (

                        -- table_d
                        SELECT json_agg(json_build_object(
                           'id', td.id,
                           'name', td.name,
                           'd_numbers_sum', null,
                           'real_c_number_sum', null
                        ))
                        FROM test.table_d td
                        WHERE td.parent_c_id = tc.id
                    )
                ))
                FROM test.table_c tc
                WHERE tc.parent_b_id = tb.id
            )
        ))
        FROM test.table_b tb
        WHERE tb.parent_a_id = ta.id
    )
) AS object
FROM test.table_a ta


-- Return json array of all objects
SELECT json_agg(object) FROM json_objects;

-- Return only json object with given id
SELECT object FROM json_objects WHERE id = 1

Code example 2

Here we map the data from table_c so we can insert it directly into a recursive CTE from the documentation, for readability and educational purposes. Then prepares the data as input to the "merge" function. For simplicity I just aggregated the rows into a big json object. The performance should be ok. We can choose to get the parent object, or only its children as an (json)array in the third function parameter.

Which node to get the children for is specified in the last query in the last lines of the example. This query can be used all places where we need the children for a table_c node. I did test this on a more complex example and it looks like I sorted out most rough edges.

The three parts of the CTE (graph, search_graph and filtered_graph) can be refactored into one for performance, since CTE's are optimization fences for the database planner, but I kept this version for readability and debugging.

This example utilizes jsonb instead of json, see the documentation. The reason for using jsonb here is not having to reparse the json each time we manipulate it in the function. When the function is done, the result is casted back to json so it can be inserted directly into the code in example 1.

--DROP VIEW test.tree_path_list_v  CASCADE;
CREATE OR REPLACE VIEW test.tree_path_list_v AS 
WITH RECURSIVE
    -- Map the source data so we can use it directly in a recursive query from the documentation:
    graph AS
    (
        SELECT id AS id, parent_c_id AS link, name, jsonb_build_object('id', id, 'name', name, 'parent_c_id', parent_c_id, 'parent_a_id', parent_a_id, 'parent_b_id', parent_b_id) AS data
        FROM test.table_c
    ),
    -- Recursive query from documentation.
    -- http://www.postgresql.org/docs/current/static/queries-with.html
    search_graph(id, link, data, depth, path, cycle) AS (
        SELECT g.id, g.link, g.data, 1,
          ARRAY[g.id],
          false
        FROM graph g
      UNION ALL
        SELECT g.id, g.link, g.data, sg.depth + 1,
          path || g.id,
          g.id = ANY(path)
        FROM graph g, search_graph sg
        WHERE g.id = sg.link AND NOT cycle
    ),
    -- Decorate/filter the result so it can be used as input to the "test.create_jsonb_tree" function
    filtered_graph AS (
        SELECT
            sg.path[1] AS id,
            sg.path[2] AS parent_id,
            sg.depth AS level,
            sg.id AS start_id,
            d.name,
            sg.path,
            d.data::jsonb AS json
        FROM search_graph sg
        INNER JOIN graph d ON d.id = sg.path[1]
        ORDER BY level DESC
    )
    -- "Main" query
    SELECT * FROM filtered_graph
;


-- Returns a json object with all children merged into its parents.
-- Parameter 1 "_tree_path_list": A json document with rows from the view "test.tree_path_list_v" aggregates as one big json.
-- Parameter 2 "_children_keyname": Choose the name for the children
CREATE OR REPLACE FUNCTION test.create_jsonb_tree(_tree_path_list jsonb, _children_keyname text DEFAULT 'children', _get_only_children boolean DEFAULT false)
    RETURNS jsonb AS
$$
DECLARE
    node_map jsonb :=  jsonb_build_object();
    node_result jsonb := jsonb_build_array();
    parent_children jsonb := jsonb_build_array();
    node jsonb;
    relation jsonb;
BEGIN
    FOR node IN SELECT * FROM jsonb_array_elements(_tree_path_list)
    LOOP
        RAISE NOTICE 'Input (per row): %', node;
        node_map := jsonb_set(node_map, ARRAY[node->>'id'], node->'json');
    END LOOP;

    FOR relation IN SELECT * FROM jsonb_array_elements(_tree_path_list)
    LOOP
        IF ( (relation->>'level')::int > 1 ) THEN
            parent_children := COALESCE(node_map->(relation->>'parent_id')->_children_keyname, jsonb_build_array()) || jsonb_build_array(node_map->(relation->>'id'));
            node_map := jsonb_set(node_map, ARRAY[relation->>'parent_id', _children_keyname], parent_children);
            node_map := node_map - (relation->>'id');
        ELSE
            IF _get_only_children THEN
                node_result := node_map->(relation->>'id')->_children_keyname;
            ELSE
                node_result := node_map->(relation->>'id');
            END IF;
        END IF;
    END LOOP;
    RETURN node_result;
END;
$$ LANGUAGE plpgsql
;


-- Aggregate the rows from the view into a big json object. The function
SELECT test.create_jsonb_tree(
    (   SELECT jsonb_agg( (SELECT x FROM (SELECT id, parent_id, level, name, json) x) )
        FROM test.tree_path_list_v
        WHERE start_id = 1  --Which node to get children for
    ),
    'children'::text,
    true
)::json
;

Output for example 2

[
 {
    "id": 2,
    "name": "c1.1",
    "children": [
      {
        "id": 3,
        "name": "c1.1.1",
        "parent_a_id": 1,
        "parent_b_id": 1,
        "parent_c_id": 2
      }
    ],
    "parent_a_id": 1,
    "parent_b_id": 1,
    "parent_c_id": 1
  },
  {
    "id": 4,
    "name": "c1.2",
    "parent_a_id": 1,
    "parent_b_id": 1,
    "parent_c_id": 1
  }
]