""" Complete the code in ClassifyNB.py with the sklearn Naive Bayes classifier to classify the terrain data. The objective of this exercise is to recreate the decision boundary found in the lesson video, and make a plot that visually shows the decision boundary """
from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify
### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii inrange(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii inrange(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii inrange(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii inrange(0, len(features_train)) if labels_train[ii]==1]
# You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train)
### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
# Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. h = .01# step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot Z = Z.reshape(xx.shape) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max())
plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)
# Plot also the test points grade_sig = [X_test[ii][0] for ii inrange(0, len(X_test)) if y_test[ii]==0] bumpy_sig = [X_test[ii][1] for ii inrange(0, len(X_test)) if y_test[ii]==0] grade_bkg = [X_test[ii][0] for ii inrange(0, len(X_test)) if y_test[ii]==1] bumpy_bkg = [X_test[ii][1] for ii inrange(0, len(X_test)) if y_test[ii]==1]
plt.scatter(grade_sig, bumpy_sig, color = "b", label="fast") plt.scatter(grade_bkg, bumpy_bkg, color = "r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade")
# -*- coding: UTF-8 -*- #!/usr/bin/python import random
def makeTerrainData(n_points=1000): ############################################################################### ### make the toy dataset random.seed(42) grade = [random.random() for ii in range(0,n_points)] bumpy = [random.random() for ii in range(0,n_points)] error = [random.random() for ii in range(0,n_points)] y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)] for ii in range(0, len(y)): if grade[ii]>0.8 or bumpy[ii]>0.8: y[ii] = 1.0
### split into train/test sets X = [[gg, ss] for gg, ss in zip(grade, bumpy)] split = int(0.75*n_points) X_train = X[0:split] X_test = X[split:] y_train = y[0:split] y_test = y[split:]
grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0] bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0] grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1] bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]
grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0] bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0] grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1] bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]
# -*- coding: UTF-8 -*- def classify(features_train, labels_train): ### import the sklearn module for GaussianNB ### create classifier ### fit the classifier on the training features and labels ### return the fit classifier ### your code goes here! def classify(traindata,trainlabel): from sklearn.naive_bayes import GaussianNB classifier=GaussianNB() classifier.fit(traindata, trainlabel) return classifier