### Load Libraries ### import pandas as pd import numpy as np import matplotlib.pyplot as plt import re from funcy import flatten, isa %matplotlib inline ### Spotify Settings ### import spotipy import spotipy.util as util import os import sys os.environ["SPOTIPY_CLIENT_ID"] = "xxx" os.environ["SPOTIPY_CLIENT_SECRET"] = "xxx" os.environ["SPOTIPY_REDIRECT_URI"] = "http://localhost:8888/callback" scopes = 'playlist-read-private playlist-read-collaborative playlist-modify-public playlist-modify-private user-follow-modify user-follow-read user-library-read user-library-modify user-read-private user-read-birthdate user-read-email user-top-read' spotify_username = sys.argv[1] token = util.prompt_for_user_token(spotify_username, scopes) sp = spotipy.Spotify(auth=token) #### START DATA RETRIEVAL -- Find Labels Associated with Tracks #### ### Read in CSV file & Replace Spotify URL with Spotify Track ID ### se_regional = pd.read_csv('regional-se-daily-2017-09-27.csv') se_regional['URL'] = se_regional['URL'].str.replace('https://open.spotify.com/track/','') # Pull Label, Artist, Track Name & Copyright from Spotify based on Track ID & Create Lists ### all_regional = { 'label': [], 'artist': [], 'name': [], 'copyright': [] } # Subbed top 10 for all in code for speed # for sp_track in se_regional['URL'][1:10]: try: all_regional['label'].append(sp.album(sp.track(sp_track)['album']['id'])['label']) all_regional['artist'].append(sp.track(sp_track)['artists'][0]['name']) all_regional['name'].append(sp.track(sp_track)['name']) all_regional['copyright'].append(sp.album(sp.track(sp_track)['album']['id'])['copyrights'][0]['text']) except: pass ### Clean Copyright Data (Remove Year, Symbols & Trim) ### regex = '20[0-9][0-9]' regional_copyright_clean = all_regional['copyright'] regional_copyright_clean = [c.replace('1981, 1992, 2005, 2006 ', '') for c in regional_copyright_clean] regional_copyright_clean = [c.replace('2013, 2014 ', '') for c in regional_copyright_clean] regional_copyright_clean = [c.replace('2017, ', '') for c in regional_copyright_clean] regional_copyright_clean = [re.sub(regex, '', c) for c in regional_copyright_clean] regional_copyright_clean = [c.replace('(P)', '') for c in regional_copyright_clean] regional_copyright_clean = [c.replace('(C)', '') for c in regional_copyright_clean] regional_copyright_clean = [c.encode('utf-8') for c in regional_copyright_clean] regional_copyright_clean = [c.replace('\xc2\xa9', '') for c in regional_copyright_clean] regional_copyright_clean = [c.strip() for c in regional_copyright_clean] ### Pull Data from Original CSV into new list ### all_regional_df = pd.DataFrame(all_regional) all_regional_df['copyright'] = regional_copyright_clean all_regional_df['streams'] = se_regional["Streams"] all_regional_df['url'] = se_regional["URL"] #### START EXTRA -- WORD CLOUD FOR COPYRIGHT DATA #### copyright_indiv = [c.replace('/', ' ') for c in regional_copyright_clean] copyright_indiv1 = [] for i in copyright_indiv: copyright_indiv1.append(i.lower().split()) copyright_indiv1_clean = [] for i in flatten(copyright_indiv1): copyright_indiv1_clean.append(re.sub('[^A-Za-z0-9]+', '', i)) ### Save CSVs ### # all_regional_df.to_csv('regional_se.csv', index=True, header=True, encoding='utf-8') # pd.DataFrame(copyright_indiv1_clean).to_csv('copyright_words_clean.csv', index=True, header=False, encoding='utf-8')
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question