work 1 | Python Fiddle

# -*- coding: utf-8 -*-
# File: download_data.py
# Description: Download data from ESPN's NBA database
# Author:
# Email:

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import pandas as pd
import time

###############################################################################
# UTILITY FUNCTIONS
###############################################################################

def BS(html):
    return BeautifulSoup(html, "lxml")

def soup_link(link):
    content = urlopen(link).read()
    soup = BS(content)
    return soup

def parse_distance(string):
    pp = "r'[0-9]*-foot jumper'"
    result = re.search(pp, string)
    if result != " ":
        dist = re.search(r'[0-9]+', string)
        return dist.group()
    else:
        return None

def parse_points_scored(string):
    if "three point" in string:
        return 3
    elif "free throw" in string:
        return 1
    else:
        return 2

def parse_shot_success(string):
    if "makes" in string:
        return "scores"
    elif ("blocks" or "misses") in string:
        return "misses"
    else:
        return "not a shot"

def get_player_in_action(string, list_players):
        for player in list_players:

begin_pattern = '([A-Z][a-z]+\s+[A-Z][a-z]+)'
            if begin_pattern.search(string):
                return player
            if "blocks" in string:
                pattern = '([A-Z][a-z]+\s+[A-Z][a-z]+)'

trimmed_string = string.find('blocks')
                index = trimmed_string
                trimmed_string = trimmed_string[index:]
                trimmed_string = trimmed_string[1:]
                if pattern.search(trimmed_string):
                    return player

return "Not in team"

###############################################################################
# SCRAPING FUNCTIONS
###############################################################################

def get_players(team_id, year):

link = "http://espn.go.com/nba/team/stats/_/name/{}/year/{}/".format(team_id, year)
    soup = soup_link(link)

game_stat_row = soup.findAll("tr", {"class": "colhead"})
    player_names = []
    for name in game_stat_row:
        if name == "Totals...":
            player_names.append()

player_names.append(name)

return player_names

def extract_gamelog_row(row, list_players, home_or_away):

pieces = list(row.children)
    info_dict = {}

if len(pieces) < 3:
        return info_dict

if home_or_away == "away":
        game_event = pieces[1]
    else:
        game_event = pieces[3]

text = game_event.get_text()

info_dict["text"] = text

info_dict["player"] = get_player_in_action(text,list_players)
    info_dict["shot_success"] = parse_shot_success(text)

if bool(parse_points_scored(text) == "scores"):
        info_dict["points_scored"] = parse_points_scored(text)
    else:
        info_dict["points_scored"] = 0

# distance
    info_dict["distance"] = parse_distance(text)

# certain shot types are assigned a fixed distance
    if not(info_dict["distance"]):
        if "dunk" in text:
            info_dict["distance"] = 0
        if "layup" in text:
            info_dict["distance"] = 1
        if "tip shot" in text:
            info_dict["distance"] = 1
        if "three point" in text:
            info_dict["distance"] = 23

return info_dict

def structure_gamelog_info(game_id, list_players, home_or_away):
    """
    Parse information from gamelog.
    Inputs:
        game_id: string, the id of the game
        list_players: a list of strings, the players of the team
        home_or_away: string, "home" or "away"
    Returns:
        Pandas data frame that contains information from the gamelog's rows
    """
    game_link = "http://espn.go.com/nba/playbyplay?gameId={}&period=0".format(game_id)
    gamelog = soup_link(game_link)

report_lines = gamelog.findAll("tr",{"class":"even"}) # fill in here

list_info_dicts = []

for line in report_lines:
        info_dict = {}

# filter lines that are not meaningful or relevant
        if bool(len(info_dict) == 0):
            continue
        if line in list_players:
            continue

info_dict["game_id"] = game_id
        # fill in here: add info_dict to list_info_dicts
        list_info_dicts.append(info_dict)

df = pd.DataFrame(list_info_dicts)
    return df

def parse_all_games(df_game_ids, list_players):
    list_dfs = []
    for ix, row in df_game_ids.iterrows():
        game_id = row["game_id"]
        home_or_away = row["home_or_away"]
        print(game_id)
        df = structure_gamelog_info(game_id, list_players, home_or_away)
        list_dfs.append(df)
        time.sleep(1)    # wait for one second

df_all = pd.concat(list_dfs)
    return df_all

###############################################################################
# RUN THE SCRAPING
###############################################################################

# ---- define parameters

team_id = "por"
year = 2015
DATA_FOLDER = "../data"

# ---- get the players
list_players = get_players(team_id, year)

# ---- scrape one game for testing purposes
GAME_ID = 400579510
home_or_away = "away"
df_game = structure_gamelog_info(GAME_ID, list_players, home_or_away)
# sum the points scored. Is it really the same that the team scored in the game?

# ---- scrape all games
# -------- read the game ids
df_game_ids = pd.read_csv(DATA_FOLDER + "/gameids_{}_{}.csv".format(team_id, year))
# -------- run the parser
df_all = parse_all_games(df_game_ids, list_players)
# -------- output the result
df_all.to_csv(DATA_FOLDER + "/all_games_{}_{}.csv".format(team_id, year),
              index = False)

Python Fiddle

Python Cloud IDE