DataJanitor DataJanitor - 29 days ago 11
JSON Question

Converting Json to SQL table

I'm pretty new to python/json/& coding.. trying to learn how to get the following format of json to sql table. I used python pandas and it is converting the json nodes to dictionary. can someone please help me understand how to do this please.

Same json:

{
"Volumes": [
{
"AvailabilityZone": "us-east-1a",
"Attachments": [
{
"AttachTime": "2013-12-18T22:35:00.000Z",
"InstanceId": "i-1234567890abcdef0",
"VolumeId": "vol-049df61146c4d7901",
"State": "attached",
"DeleteOnTermination": true,
"Device": "/dev/sda1"
}
],
"Tags": [
{
"Value": "DBJanitor-Private",
"Key": "Name"
},
{
"Value": "DBJanitor",
"Key": "Owner"
},
{
"Value": "Database",
"Key": "Product"
},
{
"Value": "DB Janitor",
"Key": "Portfolio"
},
{
"Value": "DB Service",
"Key": "Service"
}
],
"VolumeType": "standard",
"VolumeId": "vol-049df61146c4d7901",
"State": "in-use",
"SnapshotId": "snap-1234567890abcdef0",
"CreateTime": "2013-12-18T22:35:00.084Z",
"Size": 8
},
{
"AvailabilityZone": "us-east-1a",
"Attachments": [],
"VolumeType": "io1",
"VolumeId": "vol-1234567890abcdef0",
"State": "available",
"Iops": 1000,
"SnapshotId": null,
"CreateTime": "2014-02-27T00:02:41.791Z",
"Size": 100
}
]
}


until now.. this what I was trying... in python:

asg_list_json_Tags=asg_list_json["AutoScalingGroups"]
Tags=pandas.DataFrame(asg_list_json_Tags)
n = []
for i in Tags.columns:
n.append(i)
print n

engine = create_engine("mysql+mysqldb://user:"+'pwd'+"@mysqlserver/dbname")
Tags.to_sql(name='TableName', con=engine, if_exists='append', index=True)


Any help is deeply appreacated.. Thanks!

Answer

I would do it this way:

fn = r'D:\temp\.data\40450591.json'

with open(fn) as f:
    data = json.load(f)

# some of your records seem NOT to have `Tags` key, hence `KeyError: 'Tags'`
# let's fix it
for r in data['Volumes']:
    if 'Tags' not in r:
        r['Tags'] = []

v = pd.DataFrame(data['Volumes']).drop(['Attachments', 'Tags'],1)
a = pd.io.json.json_normalize(data['Volumes'], 'Attachments', ['VolumeId'], meta_prefix='parent_')
t = pd.io.json.json_normalize(data['Volumes'], 'Tags', ['VolumeId'], meta_prefix='parent_')

v.to_sql('volume', engine)
a.to_sql('attachment', engine)
t.to_sql('tag', engine)

Output:

In [179]: v
Out[179]:
                      AvailabilityZone                CreateTime    Iops  Size              SnapshotId      State VolumeType
VolumeId
vol-049df61146c4d7901       us-east-1a  2013-12-18T22:35:00.084Z     NaN     8  snap-1234567890abcdef0     in-use   standard
vol-1234567890abcdef0       us-east-1a  2014-02-27T00:02:41.791Z  1000.0   100                    None  available        io1

In [180]: a
Out[180]:
                 AttachTime DeleteOnTermination     Device           InstanceId     State               VolumeId        parent_VolumeId
0  2013-12-18T22:35:00.000Z                True  /dev/sda1  i-1234567890abcdef0  attached  vol-049df61146c4d7901  vol-049df61146c4d7901
1  2013-12-18T22:35:11.000Z                True  /dev/sda1  i-1234567890abcdef1  attached  vol-049df61146c4d7111  vol-049df61146c4d7901

In [217]: t
Out[217]:
         Key              Value        parent_VolumeId
0       Name  DBJanitor-Private  vol-049df61146c4d7901
1      Owner          DBJanitor  vol-049df61146c4d7901
2    Product           Database  vol-049df61146c4d7901
3  Portfolio         DB Janitor  vol-049df61146c4d7901
4    Service         DB Service  vol-049df61146c4d7901

Test JSON file:

{
    "Volumes": [
        {
            "AvailabilityZone": "us-east-1a",
            "Attachments": [
                {
                    "AttachTime": "2013-12-18T22:35:00.000Z",
                    "InstanceId": "i-1234567890abcdef0",
                    "VolumeId": "vol-049df61146c4d7901",
                    "State": "attached",
                    "DeleteOnTermination": true,
                    "Device": "/dev/sda1"
                },
                {
                    "AttachTime": "2013-12-18T22:35:11.000Z",
                    "InstanceId": "i-1234567890abcdef1",
                    "VolumeId": "vol-049df61146c4d7111",
                    "State": "attached",
                    "DeleteOnTermination": true,
                    "Device": "/dev/sda1"
                }
            ],
            "Tags": [
                {
                    "Value": "DBJanitor-Private",
                    "Key": "Name"
                },
                {
                    "Value": "DBJanitor",
                    "Key": "Owner"
                },
                {
                    "Value": "Database",
                    "Key": "Product"
                },
                {
                    "Value": "DB Janitor",
                    "Key": "Portfolio"
                },
                {
                    "Value": "DB Service",
                    "Key": "Service"
                }
            ],
            "VolumeType": "standard",
            "VolumeId": "vol-049df61146c4d7901",
            "State": "in-use",
            "SnapshotId": "snap-1234567890abcdef0",
            "CreateTime": "2013-12-18T22:35:00.084Z",
            "Size": 8
        },
        {
            "AvailabilityZone": "us-east-1a",
            "Attachments": [],
            "VolumeType": "io1",
            "VolumeId": "vol-1234567890abcdef0",
            "State": "available",
            "Iops": 1000,
            "SnapshotId": null,
            "CreateTime": "2014-02-27T00:02:41.791Z",
            "Size": 100
        }
    ]
}
Comments