print("Tutorial 4") print# Insert information to define the following objects Last = "" First = "" Month = 1 Day = 1 Year = 1 CourseName = "" CourseNumber = 1 print("") print("Name: " + Last + ", " + First) print("Course: " + CourseName + "-" + str(CourseNumber)) print("Date: " + str(Month) + "/" + str(Day) + "/" + str(Year)) print("") print("") # THIS PROGRAM SHOULD RUN WITHOUT ERROR--Change the values of variables LAST, FIRST, # COURSE, and DATE. Next make THREE changes to the code: # (1) round "Correlation of Height and Weight" to 2 decimal places # (2) round "Median height of relief pitchers" to 2 decimal places # (3) round "Mean height of relief pitchers" to 2 decimal places # Do not worry about rounding the correlation values in the correlation matrix # Webscrape the data from U of Michigan's wiki page using the following libraries from bs4 import BeautifulSoup import numpy as np from urllib.request import urlopen # Get the data (soup) from the HTML table (bowl) and store it in the output file, but first we have to define a row # of empty values to stack the baseball rows to, which gets delated when the rows of baseball data have been scrapped baseball = np.array(["","","","","",""]) html= BeautifulSoup(urlopen('http://wiki.socr.umich.edu/index.php/SOCR_Data_MLB_HeightsWeights').read(), "html.parser") bowls = html.find("table", {"class" : "wikitable"}) print(baseball) ''' for row in bowls.findAll('tr')[1:]: soup = row.findAll('td') rowK = np.array([soup[0].text,soup[1].text,soup[2].text,soup[3].text,soup[4].text,soup[5].text]) baseball = np.vstack((baseball,rowK)) baseball = np.delete(baseball, (0), axis=0) # This deletes that row of missing observations # Convert the last three columns of numbers from strings to floats (age) or integers (height and weight) name = baseball[:,0] team = baseball[:,1] position = baseball[:,2] height = baseball[:,3].astype(int) weight = baseball[:,4].astype(int) age = baseball[:,5].astype(float) # Convert height to metric meters height_m = height*0.0254 print("The metric heights of the players are") print(height_m) print("") # Create a Numpy array from weight, and then convert it to metric KG weight_m = weight*0.453592 print("The metric weights of the players are") print(weight_m) print("") # Calculate the BMI bmi = np.round(weight_m/height_m**2,1) print("The BMI for each players is") print(weight) print(height) print(bmi) print("") # Print the name of the 314th player, his team, his position, and his BMI i = 313 print("The " + str(i+1) + "th player in the list is " + name[i] + ". He plays for " + team[i] + ". He is a " print(position[i] + "and his BMI is " + str(bmi[i])) print("") # Create the really light BMI array light = bmi < 20.5 print("List the values of LIGHT BMI:") print(light) print("") # Print out BMIs of all baseball players whose BMI is below 21 print("List the BMIs of the players who have a BMI less than 21:") print(bmi[light]) print("") # Select the entire fourth column of baseball: weight print("The weights of the players on the list are given below. Why is the first a set of integers, and the") print("second a set of strings?") print(weight) print(baseball[:,4]) print("") # Print out the mean of height Havg = np.mean(height) print("The average height of the players: " + str(round(Havg,1))) print("") # Print out the median of height Hmed = np.median(height) print("The median height of the players: " + str(Hmed)) print("") # Print out the standard deviation on height. Replace 'None' Hstd = np.std(height) print("Standard Deviation of player height is " + str(Hstd)) print("") # Print out correlation between player height and weight HWcorr = np.corrcoef(height,weight) print("Correlation of Height and Weight is") print(HWcorr) print("") print("Correlation of Height and Weight is " + str(HWcorr[0,1])) print("") # Heights of relief pitchers heights_rp = np.array(height[position == 'Relief_Pitcher']) print("Median height of relief pitchers: " + str(np.median(heights_rp))) print("Mean height of relief pitchers: " + str(np.mean(heights_rp))) print("") # Heights of the other players heights_op = np.array(height[position != 'Relief_Pitcher']) print("Median height of other players: " + str(np.median(heights_op))) print("Mean height of other players: " + str(np.mean(heights_op))) print("") # Heights of starting pitchers heights_sp = np.array(height[position == 'Starting_Pitcher']) print("Median height of starting pitchers: " + str(np.median(heights_sp))) print("Mean height of starting pitchers: " + str(np.mean(heights_sp))) print("") # Heights of the other players heights_op = np.array(height[position != 'Relief_Pitcher']) position_op = np.array(position[position != 'Relief_Pitcher']) heights_op = np.array(heights_op[position_op != 'Starting_Pitcher']) print("Median height of other players: " + str(np.median(heights_op))) print("Mean height of other players: " + str(np.mean(heights_op))) color = position for i in range(len(color)): if color[i] == 'Relief_Pitcher': color[i] = 'red' if color[i] == 'Starting_Pitcher': color[i] = 'red' if color[i] == 'Designated_Hitter': color[i] = 'blue' if color[i] == 'First_Baseman': color[i] = 'green' if color[i] == 'Outfielder': color[i] = 'green' if color[i] == 'Catcher': color[i] = 'green' if color[i] == 'Second_Baseman': color[i] = 'green' if color[i] == 'Third_Baseman': color[i] = 'green' if color[i] == 'Shortstop': color[i] = 'green' import statsmodels.api as sm import matplotlib.pyplot as plt m = age**5/250000 title = "Fig 1--MLB Players (age=size) (F=green,P=red,DH=blue)" xlab = 'height' ylab = 'weight' F1 = sm.OLS(weight,sm.add_constant(height)).fit() # estimate a line of best fit X_plot = np.linspace(np.min(height),np.max(height)) # create the line for the graph plt.plot(X_plot, X_plot*F1.params[1] + F1.params[0], c ='black') # plots the line in the graph plt.scatter(height, weight, s = m, c = color, alpha = .4) plt.title(title) plt.xlabel(xlab) plt.ylabel(ylab) plt.show() title = "Fig 2--MLB Players (F=green,P=red,DH=blue)" xlab = 'age' ylab = 'BMI' F2 = sm.OLS(bmi,sm.add_constant(age)).fit() # estimate a line of best fit X_plot = np.linspace(np.min(age),np.max(age)) # create the line for the graph plt.plot(X_plot, X_plot*F2.params[1] + F2.params[0], c ='black') # plots the line in the graph plt.scatter(age, bmi, c = color, alpha = .4) plt.title(title) plt.xlabel(xlab) plt.ylabel(ylab) plt.show() '''
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question