Logistic回归python实现
1.算法python代码
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
class Logistic(object):
def __init__(self):
self._history_w = []
self._likelihood = []
def load_input_data(self, data_file):
with open(data_file) as f:
input_x = []
input_y = []
for line in f:
[x1, x2, y] = line.split()
input_x.append([1.0, float(x1), float(x2)])
input_y.append(int(y))
self._input_x = np.array(input_x, dtype=np.float128)
self._input_y = np.array(input_y, dtype=np.float128).T
def sigmoid(self, x, w): # sigmoid函数
return 1.0/(1+ np.exp(-np.inner(w,x)))
def likelihood_function(self, w): # 目标极大似然函数
temp = np.inner(self._input_x, w)
a = np.inner(temp.T, self._input_y)
b = np.sum(np.log(1+np.exp(temp)))
return b-a
def batch_gradient_descent(self, iter_num, iter_rate): #批量梯度下降
(data_num, features) = np.shape(self._input_x)
w = np.ones(features) #初始化w为全1向量
for i in range(iter_num):
theta = self.sigmoid(self._input_x, w)
delta = theta - self._input_y
w = w - iter_rate * np.inner(self._input_x.T, delta) # 迭代更新w
self._history_w.append(w)
self._likelihood.append(self.likelihood_function(w))
self._final_w = w
return w
def stochastic_gradient_descent(self, iter_num, iter_rate): #随机梯度下降
(data_num, features) = np.shape(self._input_x)
w = np.ones(features) #初始化w为全1向量
iter_range = range(iter_num)
data_range = range(data_num)
for i in range(iter_num):
for j in data_range:
iter_rate = 4/(1.0+j+i) + 0.01 # 学习率随着迭代的次数而不断变小
theta = self.sigmoid(self._input_x[j], w)
delta = theta - self._input_y[j]
w = w - iter_rate * delta* self._input_x[j] # 迭代更新w
self._history_w.append(w)
self._likelihood.append(self.likelihood_function(w))
self._final_w = w
return w
2. python数据显示
在类中添加如下函数:
def draw_result(self, title):
total_data = np.shape(self._input_y)[0]
self._nagtive_x = []
self._positive_x = []
for i in range(total_data):
if self._input_y[i] > 0:
self._positive_x.append(self._input_x[i])
else:
self._nagtive_x.append(self._input_x[i])
plt.figure(1)
x1 = [x[1] for x in self._positive_x]
x2 = [x[2] for x in self._positive_x]
plt.scatter(x1, x2, label='positive', color='g', s=20, marker="o") # 显示值为1的数据
x1 = [x[1] for x in self._nagtive_x]
x2 = [x[2] for x in self._nagtive_x]
plt.scatter(x1, x2, label='nagtive', color='r', s=20, marker="x") # 显示值为0的数据
plt.xlabel('x1')
plt.ylabel('x2')
def f(x):
return -(self._final_w[0] + self._final_w[1]*x)/self._final_w[2]
x = np.linspace(-4, 4, 10, endpoint=True) # 显示学习到的直线
plt.plot(x, f(x), 'b-', lw=1)
plt.title(title)
plt.legend()
plt.show()
def draw_w_history(self, title):
f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)
x = np.arange(len(self._history_w))
w0 = [w[0] for w in self._history_w]
w1 = [w[1] for w in self._history_w]
w2 = [w[2] for w in self._history_w]
ax1.set_title(title+ ' w trend')
ax1.set_ylabel('w[0]')
ax1.scatter(x, w0, label='w[0]', color='b', s=10, marker=".")
ax2.set_ylabel('w[1]')
ax2.scatter(x, w1, label='w[1]', color='g', s=10, marker=".")
ax3.set_ylabel('w[2]')
ax3.scatter(x, w2, label='w[2]', color='r', s=10, marker=".")
plt.show()
def draw_likelihood_function(self, title):
plt.figure(1)
x = np.arange(len(self._likelihood))
plt.scatter(x, self._likelihood, label='Likelihood', color='g', s=10, marker=".")
plt.xlabel('x')
plt.ylabel('Likelihood function')
plt.title(title + ' Likelihood trend')
plt.legend()
plt.show()
3.数据集测试
数据集来自《机器学习实战》
https://github.com/apachecn/MachineLearning/blob/python-2.7/input/5.Logistic/TestSet.txt
3.1批量梯度下降
log = Logistic()
log.load_input_data("test.txt")
log.batch_gradient_descent(iter_num=300, iter_rate=0.001)
title = "Batch Gradient Descent"
log.draw_result(title)
log.draw_w_history(title)
log.draw_likelihood_function(title)
总计算时间复杂度为300*100*3
3.2随机梯度下降
log = Logistic()
log.load_input_data("test.txt")
log.stochastic_gradient_descent(iter_num=100, iter_rate=0.001)
title = "Stochastic Gradient Descent"
log.draw_result(title)
log.draw_w_history(title)
log.draw_likelihood_function(title)
总计算时间复杂度为100(外循环)*100(内循环)*3
参考:
《机器学习实战》第五章