spaine spaine - 4 months ago 18
JSON Question

Flatten double nested JSON

I am trying to flatten a JSON file that looks like this:

{
"teams": [
{
"teamname": "1",
"members": [
{
"firstname": "John",
"lastname": "Doe",
"orgname": "Anon",
"phone": "916-555-1234",
"mobile": "",
"email": "john.doe@wildlife.net"
},
{
"firstname": "Jane",
"lastname": "Doe",
"orgname": "Anon",
"phone": "916-555-4321",
"mobile": "916-555-7890",
"email": "jane.doe@wildlife.net"
}
]
},
{
"teamname": "2",
"members": [
{
"firstname": "Mickey",
"lastname": "Moose",
"orgname": "Moosers",
"phone": "916-555-0000",
"mobile": "916-555-1111",
"email": "mickey.moose@wildlife.net"
},
{
"firstname": "Minny",
"lastname": "Moose",
"orgname": "Moosers",
"phone": "916-555-2222",
"mobile": "",
"email": "minny.moose@wildlife.net"
}
]
}
]


}

I wish to export this to an excel table.
My current code is this:

from pandas.io.json import json_normalize
import json
import pandas as pd

inputFile = 'E:\\teams.json'
outputFile = 'E:\\teams.xlsx'

f = open(inputFile)
data = json.load(f)
f.close()

df = pd.DataFrame(data)

result1 = json_normalize(data, 'teams' )
print result1


results in this output:

members teamname
0 [{u'firstname': u'John', u'phone': u'916-555-... 1
1 [{u'firstname': u'Mickey', u'phone': u'916-555-... 2


There are 2 members's data nested within each row. I would like to have an output table that displays all 4 members' data plus their associated teamname.

Answer

This is one way to do it. Should give you some ideas.

df = pd.concat(
    [
        pd.concat([pd.Series(m) for m in t['members']], axis=1) for t in data['teams']
    ], keys=[t['teamname'] for t in data['teams']]
)

                                     0                         1
1 email          john.doe@wildlife.net     jane.doe@wildlife.net
  firstname                       John                      Jane
  lastname                         Doe                       Doe
  mobile                                            916-555-7890
  orgname                         Anon                      Anon
  phone                   916-555-1234              916-555-4321
2 email      mickey.moose@wildlife.net  minny.moose@wildlife.net
  firstname                     Mickey                     Minny
  lastname                       Moose                     Moose
  mobile                  916-555-1111                          
  orgname                      Moosers                   Moosers
  phone                   916-555-0000              916-555-2222

To get a nice table with team name and members as rows, all attributes in columns:

df.index.levels[0].name = 'teamname'
df.columns.name = 'member'

df.T.stack(0).swaplevel(0, 1).sort_index()

enter image description here

To get team name and member as actual columns, just reset the index.

df.index.levels[0].name = 'teamname'
df.columns.name = 'member'

df.T.stack(0).swaplevel(0, 1).sort_index().reset_index()

enter image description here

The whole thing

import json
import pandas as pd

json_text = """{
"teams": [
  {
    "teamname": "1",
    "members": [
      {
        "firstname": "John", 
        "lastname": "Doe",
        "orgname": "Anon",
        "phone": "916-555-1234",
        "mobile": "",
        "email": "john.doe@wildlife.net"
      },
      {
        "firstname": "Jane",
        "lastname": "Doe",
        "orgname": "Anon",
        "phone": "916-555-4321",
        "mobile": "916-555-7890",
        "email": "jane.doe@wildlife.net"
      }
    ]
  },
  {
    "teamname": "2",
    "members": [
      {
        "firstname": "Mickey",
        "lastname": "Moose",
        "orgname": "Moosers",
        "phone": "916-555-0000",
        "mobile": "916-555-1111",
        "email": "mickey.moose@wildlife.net"
      },
      {
        "firstname": "Minny",
        "lastname": "Moose",
        "orgname": "Moosers",
        "phone": "916-555-2222",
        "mobile": "",
        "email": "minny.moose@wildlife.net"
      }
    ]
  }       
]
}"""


data = json.loads(json_text)

df = pd.concat(
    [
        pd.concat([pd.Series(m) for m in t['members']], axis=1) for t in data['teams']
    ], keys=[t['teamname'] for t in data['teams']]
)

df.index.levels[0].name = 'teamname'
df.columns.name = 'member'

df.T.stack(0).swaplevel(0, 1).sort_index().reset_index()
Comments