### Data Preprocessing Template ### # Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv('Data.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 3].values ### MISSING DATA ### # Replace NaN with means of columns Age and Salary from sklearn.preprocessing import Imputer imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis=0) imputer = imputer.fit(X[:, 1:3]) #all rows, columns 1 and 2. X[:, 1:3] = imputer.transform(X[:, 1:3]) ### CATEGORICAL DATA ### # Encoding categorical data (Turning Country column [0] into 0, 1, and 2) from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) ### DUMMY ENCODING ### # Dummy Encoding turns Country column [0] into three columns # because Country [0] shouldn't be attributed an order such as 0,1,2 # but rather three columns with 1 and 0s onehotencoder = OneHotEncoder(categorical_features = [0]) X = onehotencoder.fit_transform(X).toarray() # Encoding the dependent variable y (Purchased). Change from Yes/No to 0/1 labelencoder_y = LabelEncoder() y = labelencoder_y.fit_transform(y) ### Splitting the dataset into the Training set and Test set ### from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) ### Feature Scaling ### """ from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train) """
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question