Michael T Johnson Michael T Johnson - 6 months ago 59
JSON Question

Scraping different style of Json

I am familiar with scraping data in this format.

{"data":[{"assists":0,"assistsPerGame":0.0000,"evAssists":0,"evPoints":0,"gamesPlayed":1,"goals":0,"penaltyMinutes":0,"playerBirthCity":"Windsor","playerBirthCountry":"CAN","playerBirthDate":"1996-02-07",

import csv
import requests

outfile = open("NHL_Recent.csv","a",newline='')
writer = csv.writer(outfile)
writer.writerow(["Player","Pos","GP","G","A","P","+/-","PIM","PPG","PPP","SHG","SHP","GWG","OTG","S","S%","TOI","Shifts/PG","FOW%"])

req = requests.get('http://www.nhl.com/stats/rest/skaters?isAggregate=true&reportType=basic&isGame=true&reportName=skatersummary&sort=[{%22property%22:%22shots%22,%22direction%22:%22DESC%22}]&cayenneExp=gameDate%3E=%222017-11-4%22%20and%20gameDate%3C=%222017-11-10%22%20and%20gameTypeId=2')
data = req.json()['data']
for item in data:
Player = item['playerName']
Pos = item['playerPositionCode']
GP = item['gamesPlayed']


But not in this manner.

"totalItems" : 600,
"totalEvents" : 0,
"totalGames" : 600,
"totalMatches" : 0,
"wait" : 10,
"dates" : [ {
"date" : "2017-10-04",
"totalItems" : 4,
"totalEvents" : 0,
"totalGames" : 4,
"totalMatches" : 0,
"games" : [ {
"gamePk" : 2017020001,
"link" : "/api/v1/game/2017020001/feed/live",
"gameType" : "R",
"season" : "20172018",
"gameDate" : "2017-10-04T23:00:00Z",
"status" : {
"abstractGameState" : "Final",
"codedGameState" : "7",
"detailedState" : "Final",
"statusCode" : "7",
"startTimeTBD" : false
},
"teams" : {
"away" : {
"leagueRecord" : {
"wins" : 1,
"losses" : 0,
"ot" : 0,
"type" : "league"
},
"score" : 7,
"team" : {
"id" : 10,
"name" : "Toronto Maple Leafs",
"link" : "/api/v1/teams/10",
"venue" : {
"name" : "Air Canada Centre",
"link" : "/api/v1/venues/null",
"city" : "Toronto",
"timeZone" : {
"id" : "America/Toronto",
"offset" : -5,
"tz" : "EST"
}
},
"abbreviation" : "TOR",
"teamName" : "Maple Leafs",
"locationName" : "Toronto",
"firstYearOfPlay" : "1926",
"division" : {
"id" : 17,
"name" : "Atlantic",
"link" : "/api/v1/divisions/17"
},
"conference" : {
"id" : 6,
"name" : "Eastern",
"link" : "/api/v1/conferences/6"
},
"franchise" : {
"franchiseId" : 5,
"teamName" : "Maple Leafs",
"link" : "/api/v1/franchises/5


This is what I have so far with no success.

import csv
import requests
import os

outfile = open("NHL DIF JSON.csv","a",newline='')
writer = csv.writer(outfile)
writer.writerow(["Date","Game","gamep"])

req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2017-10-04&endDate=2018-04-30&expand=schedule.teams,schedule.linescore,schedule.broadcasts.all,schedule.ticket,schedule.game.content.media.epg,schedule.radioBroadcasts,schedule.metadata,schedule.game.seriesSummary,seriesSummary.series&leaderCategories=&leaderGameTypes=R&site=en_nhl&teamId=&gameType=&timecode=')

data = req.json()['dates']


for item in data:
Date = item['date']
##for item in games:
Game = item['0']
gamep = item['gamePk']



print(Date,Game)

writer.writerow([Date,Game,gamep])
outfile.close()

os.system("taskkill /f /im pythonw.exe")


I Would like to pull the "gamePk", "gameDate" from totalGames along with the teamNames within "teams" and other categories. I eventually would like to put that into a csv with the gamePk, gameDate, teams, score, etc. I'm just not sure how to get through the individual categories, any help would be greatly appreciated! Thanks!

Answer Source

It's normal json data, just a bit complicated. You can get the date from data['dates'][i]['date']. For the teams, score, etc you have to iterate over data['dates'][i]['games'].

req = requests.get('https://statsapi.web.nhl.com/api/v1/schedule?startDate=2017-10-04&endDate=2018-04-30&expand=schedule.teams,schedule.linescore,schedule.broadcasts.all,schedule.ticket,schedule.game.content.media.epg,schedule.radioBroadcasts,schedule.metadata,schedule.game.seriesSummary,seriesSummary.series&leaderCategories=&leaderGameTypes=R&site=en_nhl&teamId=&gameType=&timecode=') 
data = req.json()

my_data =[]
for item in data['dates']:
    date = item['date']
    games = item['games']
    for game in games:
        gamePk = game['gamePk']
        gameDate = game['gameDate']
        team_away, team_home  = game['teams']['away'], game['teams']['home']
        team_away_score = team_away['score']
        team_home_score = team_home['score']
        team_away_name = team_away['team']['name']
        team_home_name = team_home['team']['name']
        my_data.append([date, gamePk, gameDate, team_away_name, team_home_name, team_away_score, team_home_score])

headers = ["Date","Game","gamep","gameDate","team_away_name","team_home_name","team_away_score","team_home_score"]

with open("my_file.csv", "a", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    writer.writerows(my_data)

As for your last question, you can get the 'pk' from data['gameData']['game']['pk']. The player, event, triCode and coordinates values are a little harder to get because some items don't have 'players' and 'team' keys, or the 'coordinates' dict is empty.

In this case the dict.get method can be helpful because it will return None (or you can set a default value) if you try to access a non-existent key.
Still you have to design your code according to the structure of the json data, example:

req = requests.get('https://statsapi.web.nhl.com/api/v1/game/2017020001/feed/live?site=en_nhl')
data = req.json()
my_data = []

pk = data['gameData']['game']['pk']
for item in data['liveData']['plays']['allPlays']:
    players = item.get('players')
    if players:
        player_a = players[0]['player']['fullName'] if len(players) > 0 else None
        player_b = players[1]['player']['fullName'] if len(players) > 1 else None
    else:
        player_a, player_b = None, None
    event = item['result']['event']
    triCode = item.get('team', {}).get('triCode')
    coordinates_x, coordinates_y = item['coordinates'].get('x'), item['coordinates'].get('y')
    my_data.append([pk, player_a, player_b, event, triCode, coordinates_x, coordinates_y])

for row in my_data:
    print(row)
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download