# jupyter nbconvert HW8.ipynb --TagRemovePreprocessor.remove_cell_tags='{"remove-cell"}' --to pdf 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from qpsolvers import solve_qp
from scipy.optimize import minimize

%matplotlib inline
%config InlineBackend.figure_format = 'png'

from pylab import rcParams
rcParams.update({"axes.grid" : True})
rcParams['figure.figsize'] = (6,4)
rcParams['lines.linewidth'] = 1
rcParams['image.cmap'] = 'Greys'
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
rcParams['font.weight'] = 400
rcParams['font.size'] = 9
rcParams['xtick.color'] = '#111111'
rcParams['ytick.color'] = '#111111'
rcParams['grid.color'] = '#dddddd'
rcParams['grid.linestyle'] = '-'
rcParams['grid.linewidth'] = 0.5
rcParams['axes.titlesize'] = 12
rcParams['axes.titleweight'] = 500
rcParams['axes.labelsize'] = 10
rcParams['axes.labelweight'] = 400
rcParams['axes.linewidth'] = 0.5
rcParams['axes.edgecolor'] = [.25,.25,.25]


train = np.array(pd.read_csv('./data/SVM-Q1.csv'))
x = train[:,0:2]
y = train[:,2]
n = np.shape(x)[0]
p = np.shape(x)[0]
plt.scatter(x[y==1,0], x[y==1,1])
plt.scatter(x[y==-1,0], x[y==-1,1])
plt.show()


P = np.diag((1e-10,1,1)).astype('double')
q = np.array([0,0,0]).astype('double')
h = -1 * np.ones(n).astype('double')
G = np.vstack((-1*y,-1*y*x[:,0],-1*y*x[:,1])).T.astype('double')

beta_and_b0 = solve_qp(P=P, q=q, h=h, G=G)
b0_primal = beta_and_b0[0]
beta_primal = beta_and_b0[1:]


print(f'Beta 0: {b0_primal}')
print(f'Beta 1 and 2: {beta_primal}')

a = -beta_primal[0] / beta_primal[1]
xx = np.linspace(-5, 5)
yy = a * xx - (b0_primal) / beta_primal[1]

plt.scatter(x[y==1,0], x[y==1,1], label='y=1 examples')
plt.scatter(x[y==-1,0], x[y==-1,1], label='y=-1 examples')
plt.plot(xx, yy, 'k-', label='decision boundary')

sv_pos = x[y==1,:][np.argmin(x[y==1,:]@beta_primal)]
sv_neg = x[y==-1,:][np.argmax(x[y==-1,:]@beta_primal)]
svs = np.array(sv_pos.tolist()+sv_neg.tolist())

b_pos = sv_pos[1] - a * sv_pos[0]
b_neg = sv_neg[1] - a * sv_neg[0]

yy_pos = a * xx + b_pos
yy_neg = a * xx + b_neg
plt.plot(xx, yy_pos, '--', label='positive margin line')
plt.plot(xx, yy_neg, '--', label='negative margin line')
plt.scatter(svs[0],svs[1],s=150, label='support vector', marker='o',facecolors='none', edgecolors='b')
plt.scatter(svs[0],svs[1],s=150, label='support vector',facecolors='none', edgecolors='b')
plt.legend(loc='upper right')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()

Beta 0: 3.419238439968422
Beta 1 and 2: [-1.0457306  -0.99907938]


#Initializing values and computing H. Note the 1. to force to float type
H = np.dot(y.reshape((-1,1))*x, (y.reshape((-1,1))*x).T)
q = -1*np.ones(n)
G = -1*np.diag(np.ones(n))
h = np.zeros(n)
A = y.reshape(1,-1)
b = np.zeros(1)

for i in range(n):
    H[i,i] = H[i,i]+1e-10
#Run solver
alphas = solve_qp(H, q, G, h, A, b)
beta_dual = ((y * alphas).T @ x)
b0_dual = -(np.max(x[y==-1,:]@beta_dual) + np.min(x[y==1,:]@beta_dual))/2


svs = x[alphas>1e-4]

a = -beta_dual[0] / beta_dual[1]
xx = np.linspace(-5, 5)
yy = a * xx - (b0_primal) / beta_dual[1]

plt.scatter(x[y==1,0], x[y==1,1], label='y=1 examples')
plt.scatter(x[y==-1,0], x[y==-1,1], label='y=-1 examples')
plt.plot(xx, yy, 'k-', label='decision boundary')

sv_pos = x[y==1,:][np.argmin(x[y==1,:]@beta_dual)]
sv_neg = x[y==-1,:][np.argmax(x[y==-1,:]@beta_dual)]

b_pos = sv_pos[1] - a * sv_pos[0]
b_neg = sv_neg[1] - a * sv_neg[0]

yy_pos = a * xx + b_pos
yy_neg = a * xx + b_neg
plt.plot(xx, yy_pos, '--', label='positive margin line')
plt.plot(xx, yy_neg, '--', label='negative margin line')
# plt.scatter(sv_pos[0],sv_pos[1],s=150, label='positive (y=1) support vector', marker='o',facecolors='none', edgecolors='b')
# plt.scatter(sv_neg[0],sv_neg[1],s=150, label='negative (y=-1) support vector',facecolors='none', edgecolors='orange')

plt.title('title')
plt.xlabel('x1')
plt.ylabel('x2')
plt.scatter(svs[:,0], svs[:,1], s=150, label='support vector', marker='o',facecolors='none', edgecolors='b')
#plt.scatter(svs_neg[:,0], svs_neg[:,1], s=150, label='negative (y=-1) support vector', marker='o',facecolors='none', edgecolors='orange')
plt.legend(loc='upper right')
plt.show()

all_primal_betas = [b0_primal]+beta_primal.tolist()
all_dual_betas = [b0_dual]+beta_dual.tolist()

all_primal_betas_formatted = [format(beta, ".3f") for beta in all_primal_betas]
all_dual_betas_formatted = [format(beta, ".3f") for beta in all_dual_betas]
dict = {
        'Betas primal' : all_primal_betas_formatted,
        'Betas dual' : all_dual_betas_formatted,
       }
df = pd.DataFrame(dict)
display(df.T)
l1_norm = np.abs(np.array(all_primal_betas)-np.array(all_dual_betas)).sum()
print(f'L1 norm between primal and dual betas: {l1_norm}')

L1 norm between primal and dual betas: 0.0005407776270179854


train = np.array(pd.read_csv('./data/SVM-Q2.csv'))
x = train[:,0:2]
y = train[:,2]
n = np.shape(x)[0]
p = np.shape(x)[0]
plt.scatter(x[y==1,0], x[y==1,1])
plt.scatter(x[y==-1,0], x[y==-1,1])
plt.show()


#Initializing values and computing H. Note the 1. to force to float type
H = np.dot(y.reshape((-1,1))*x, (y.reshape((-1,1))*x).T)*1.
q = -1*np.ones(n)*1.
lb = np.zeros(n)*1.
ub = np.ones(n)*1.
A = y.reshape(1,-1)*1.
b = np.zeros(1)*1.

for i in range(n):
    H[i,i] = H[i,i]+1e-10
#Run solver
non_sep_dual_alphas = solve_qp(P=H, q=q, A=A, b=b, lb=lb, ub=ub)
non_sep_dual_alphas = non_sep_dual_alphas.clip(0.0,1.0)


def missclassification(x, betas, y_true):
    x_with_ones = np.hstack( (np.ones(x.shape[0]).reshape(-1,1), x) )
    pred = x_with_ones@betas
    pred = np.where(pred >0, 1, -1)
    return np.mean(pred != y_true)


w = ((y * non_sep_dual_alphas).T @ x).reshape(-1,1)
S = ((non_sep_dual_alphas > 1e-4) & (non_sep_dual_alphas < 1.0))
b = y[S] - np.dot(x[S], w)


b=-1*((np.max(x[S,:][y[S]==-1]@w) + np.min(x[S,:][y[S]==1]@w))/2)

print('beta 1 and 2 = ', w.flatten())
print('b0 = ', b)
print(f'Misclassification rate: {missclassification(x, np.array([b,w.flatten()[0], w.flatten()[1]]), y)}')

a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (b) / w[1]

# plt.scatter(x[y==1,0], x[y==1,1], label='y=1 examples')
# plt.scatter(x[y==-1,0], x[y==-1,1], label='y=-1 examples')
plt.plot(xx, yy, 'k-', label='decision boundary')
plt.scatter(x[y==1,0], x[y==1,1], label='y=1 examples')
plt.scatter(x[y==-1,0], x[y==-1,1], label='y=1 examples')

sv_x = x[S]
sv_y = y[S]
plt.scatter(sv_x[:,0], sv_x[:,1], s=150, label='support vector', marker='o',facecolors='none', edgecolors='g')


intercept = -(b-1)/w[1]
slope = -w[0]/w[1]
yy1 = slope * xx + intercept

intercept = -(b+1)/w[1]
yy2 = slope * xx + intercept
plt.plot(xx,yy1, '--', label='margin line')
plt.plot(xx,yy2,'--', label='margin line')

plt.legend()
plt.show()

beta 1 and 2 =  [1.15205219 1.11165171]
b0 =  -1.3845806593328565
Misclassification rate: 0.1525


def logistic(x):
    return (np.log(1 + np.exp(-x)))


def SVMfn(b, x, y, lam):
    res = 0
    
    for i in range(x.shape[0]):
        res = res + logistic(y[i]*x[i,:]@b)
    
    res = res + lam*np.sum(b*b)
    return res


def SVMgn(b, x, y, lam):
    res = np.array([0.,0.,0.])
    for i in range(x.shape[0]):
        denom = 1 + np.exp(y[i]*x[i,:]@b)
        num1 = -y[i]
        res[0] = res[0] + num1/denom
        num2 = -y[i]*x[i,1:3]
        res[1:3] = res[1:3] + num2/denom
    
    res[1:3] = res[1:3] + 2*lam*b[1:3]
    return res

def SVMfnAndgn(b,x,y,lam):
    return (SVMfn(b,x,y,lam), SVMgn(b,x,y,lam))


x_with_ones = np.hstack( (np.ones(x.shape[0]).reshape(-1,1), x) )
minimization_results = minimize(SVMfnAndgn, np.array([0.,0.,0.]), args=(x_with_ones, y, 1), method='L-BFGS-B', jac=True)
a = -minimization_results.x[1] / minimization_results.x[2]
xx = np.linspace(-5, 5)
yy = a * xx - (minimization_results.x[0]) / minimization_results.x[2]

print(f'Beta 0: {minimization_results.x[0]}')
print(f'Beta 1 and 2: {minimization_results.x[1:]}')
print(f'Loss: {minimization_results.fun}')
print(f'Misclassification rate: {missclassification(x, np.array([minimization_results.x[0],minimization_results.x[1], minimization_results.x[2]]), y)}')

plt.scatter(x[y==1,0], x[y==1,1], label='y=1 examples')
plt.scatter(x[y==-1,0], x[y==-1,1], label='y=-1 examples')
plt.plot(xx, yy, 'k-', label='decision boundary')
plt.legend()
plt.show()

Beta 0: -2.262448679021386
Beta 1 and 2: [1.57150924 1.5041386 ]
Loss: 134.9817055914514
Misclassification rate: 0.1475

About HW8¶

Question 1 [50 Points] Sovling SVM using Quadratic Programming¶

a) [25 points] The Primal Form¶

b) [25 points] The Dual Form¶

Question 2 [20 Points] Linearly nonseparable SVM¶

Question 3 [30 Points] Penalized Loss Linear SVM¶

	0	1	2
Betas primal	3.419	-1.046	-0.999
Betas dual	3.420	-1.046	-0.999