线性回归模型 || Into the World

一元线性回归

使用到的python库：

import sys
import numpy as np
import matplotlib.pyplot as plt
from lab_utils_uni import plt_gradients
import copy
import math
from lab_utils_multi import  run_gradient_descent

问题陈述

一个1000平方英尺的房子以300,000美元售出，一个2000平方英尺的房子以500,000美元售出。请给出一个线性回归模型，呈现房子面积与售价的关系。

面积/1000平方英尺	价格/1000$
1	300
2	500

记录数据

1 2	x_train = np.array([1.0,2.0]) y_train = np.array([300,500])

理论依据：一元线性回归的预测函数为: 误差函数：梯度下降算法：其中，α为学习率，决定单次下降的幅度。

梯度为：

用代码实现：

#计算误差函数
def compute_cost(x,y,w,b):
    m = x.shape[0]#用m获取x数组的元素个数
    cost = 0
    for i in range(m):#从1到m累加误差
        f_wb = w*x[i]+b
        cost += (f_wb-y[i])**2
    total_cost = cost/(m*2)
    return total_cost

#计算偏导数的函数
def compute_gradient(x,y,w,b):
    m=x.shape[0]
    dj_dw=0
    dj_db=0
    for i in range(m):
        f_wb=w*x[i]+b
        #误差函数的单项分别对w和b求偏导
        dj_dw_i=(f_wb-y[i])*x[i]#变量命名中含有i，实现自动更新
        dj_db_i=(f_wb-y[i])
        #累加：
        dj_dw+=dj_dw_i
        dj_db+=dj_db_i
    #除以m，得到整个误差函数一次对w和b的偏导
    dj_dw=dj_dw/m
    dj_db=dj_db/m
    return dj_dw,dj_db

#计算梯度下降的函数
def gradient_descent(x,y,w_in,b_in,alpha,num_iters,cost_function,gradient_function):
    '''变量声明
      x (ndarray (m,))  : 解释变量
      y (ndarray (m,))  : 目标变量
      w_in,b_in (scalar): 参数w，b的初始值 
      alpha (float):     学习率
      num_iters (int):   梯度下降的次数
      cost_function:     计算误差的函数
      gradient_function: 计算偏导数的函数
      
    返回值：
      w (scalar)，b (scalar): 参数w，b的最终值，此时误差值达到局部最小
      J_history (List): 对每次梯度下降后的误差值做记录
      p_history (list): 对每次梯度下降后的参数w，b做记录
      '''
    #定义变量
    J_history = []
    p_history = []
    b = b_in
    w = w_in

    #通过循环拟合出使误差最小的w，b
    for i in range(num_iters):
        dj_dw,dj_db = gradient_function(x,y,w,b)#先计算出偏导数

        #梯度下降，同步更新w和b的值
        b = b-alpha*dj_db
        w = w-alpha*dj_dw

        #使用append添加记录每次循环得到的误差和参数w，b
        if i<100000:
            J_history.append(cost_function(x,y,w,b))
            p_history.append([w,b])
        
        #输出梯度下降过程的部分结果，看看规律，这部分并不是必要的
        if i%math.ceil(num_iters/10)==0:
            print(f"Iteration {i:4} Cost: {J_history[-1]:0.2e}",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")

    return w, b, J_history, p_history #return w and J,w history for graphing

开始运行，示例：

w_init = 1
b_init = 2
iterations = 10000
tmp_alpha = 1.0e-2
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

输出结果： Iteration 0 Cost: 7.79e+04 dj_dw: -6.445e+02, dj_db: -3.965e+02 w: 7.445e+00, b: 5.96500e+00

Iteration 1000 Cost: 3.82e+00 dj_dw: -3.930e-01, dj_db:  6.358e-01 w:  1.946e+02, b: 1.08710e+02

Iteration 2000 Cost: 8.88e-01 dj_dw: -1.894e-01, dj_db:  3.065e-01 w:  1.974e+02, b: 1.04198e+02

Iteration 3000 Cost: 2.06e-01 dj_dw: -9.130e-02, dj_db:  1.477e-01 w:  1.987e+02, b: 1.02024e+02

Iteration 4000 Cost: 4.80e-02 dj_dw: -4.401e-02, dj_db:  7.121e-02 w:  1.994e+02, b: 1.00975e+02

Iteration 5000 Cost: 1.11e-02 dj_dw: -2.121e-02, dj_db:  3.433e-02 w:  1.997e+02, b: 1.00470e+02

Iteration 6000 Cost: 2.59e-03 dj_dw: -1.023e-02, dj_db:  1.655e-02 w:  1.999e+02, b: 1.00227e+02

Iteration 7000 Cost: 6.02e-04 dj_dw: -4.929e-03, dj_db:  7.976e-03 w:  1.999e+02, b: 1.00109e+02

Iteration 8000 Cost: 1.40e-04 dj_dw: -2.376e-03, dj_db:  3.844e-03 w:  2.000e+02, b: 1.00053e+02

Iteration 9000 Cost: 3.25e-05 dj_dw: -1.145e-03, dj_db:  1.853e-03 w:  2.000e+02, b: 1.00025e+02

(w,b) found by gradient descent: (199.9924,100.0122)

注意一下上面打印的梯度下降过程的一些特征。

成本开始很大，并迅速下降，如讲座中的幻灯片所述。
偏导数dj_dw和dj_db也会变小，一开始很快，然后越来越慢。如讲座中的图表所示，当过程接近 “碗底”时，由于该点上的导数值较小，所以进展较慢。

多元线性回归

使用到的python库：

1
2
3

import copy,math
import numpy as np
import matplotlib.pyplot as plt

问题陈述

你将使用住房价格预测的例子。训练数据集包含三个例子，有四个特征（尺寸、卧室、楼层和，年龄），如下表所示。请注意，与先前的实验室不同，尺寸的单位是平方英尺而不是1000平方英尺。这导致了一个问题，你将在下一个实验中解决这个问题。

尺寸（平方英尺）	卧室数量	楼层数量	房龄	价格（1000刀）
2104	5	1	45	460
1416	3	2	40	232
852	2	1	35	178

记录数据

1 2	x_train = np.array([[2104,5,1,45],[1416,3,2,40],[852,2,1,35]]) y_train = np.array([460,232,178])

参数向量w，参数b

1 2	b_init = 0 w_init = np.zeros(4,)

理论依据:

预测函数

成本函数

多变量梯度下降

代码实现:

import copy,math
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(precision=2)

#创建特征变量和目标变量
x_train = np.array([[2104,5,1,45],[1416,3,2,40],[852,2,1,35]])
y_train = np.array([460,232,178])

#计算误差：获取元素个数，初始化误差->累加误差的平方和->除以2m
def compute_cost(x,y,w,b):
    m = x.shape[0]
    total_cost = 0
    for i in range(m):
        f_wb_i = np.dot(x[i],w)+b
        total_cost+=(f_wb_i-y[i])**2
    cost = total_cost/(2*m)
    return cost

#计算梯度：获取元素个数，初始化偏导数->累加m个例子的误差，计算b的偏导->遍历w每个元素，计算偏导数->除以m
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_dw = np.zeros(n,)
    dj_db = 0.
    for i in range(m):
        #计算误差
        err = np.dot(x[i],w)+b-y[i]

        #内循环用于逐个计算w中元素的偏导数
        for j in range(n):
            dj_dw[j] += err*x[i,j]
        
        #计算b的偏导数
        dj_db += err

    dj_dw = dj_dw / m
    dj_db = dj_db / m
    return dj_db,dj_dw

#多变量梯度下降：定义成本数组和参数w，b->逐次计算梯度，逐个梯度下降->返回
def gradient_descent(x, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    # 一个数组，用于存储每次迭代的成本J和W，主要用于以后的绘图
    J_history = []
    w = copy.deepcopy(w_in)  #避免在函数中修改全局W
    b = b_in

    for i in range(num_iters):
        #先计算梯度
        dj_db,dj_dw = gradient_function(x,y,w,b)
        #同步更新
        w = w - alpha*dj_dw
        b = b - alpha*dj_db
        #保存误差
        if i <100000:
            J_history.append(cost_function(x,y,w,b))

        # 每隔10次就打印一次成本，如果<10，则打印相同次数的迭代
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #返回最终的w,b和J的历史记录，用于制图

#开始运行:
b_init = 0
w_init = np.zeros(4,)
num_iters = 1000
alpha = 5.0e-7
w_final,b_final,J_hist = gradient_descent(x_train,y_train,w_init,b_init,compute_cost,compute_gradient,alpha,num_iters)
print(f"最终找到的w为{w_final}，b为{b_final:0.2f}")
m = x_train.shape[0]
for i in range(m):
    print(f"预测值{np.dot(x_train[i],w_final)+b_final:0.2f}，实际值:{y_train[i]}\n")


# 绘制成本与迭代的关系图
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step') 
plt.show()

输出结果:

Iteration    0: Cost  2529.46   
Iteration  100: Cost   695.99   
Iteration  200: Cost   694.92   
Iteration  300: Cost   693.86   
Iteration  400: Cost   692.81   
Iteration  500: Cost   691.77   
Iteration  600: Cost   690.73   
Iteration  700: Cost   689.71   
Iteration  800: Cost   688.70   
Iteration  900: Cost   687.69   
最终找到的w为[ 0.2   0.   -0.01 -0.07]，b为-0.00
预测值426.19，实际值:460

预测值286.17，实际值:232

预测值171.47，实际值:178

预测值和实际值地差别还是挺大的

解决办法：特征缩放

代码实现:

def zscore_normalize_features(x):
    mu = np.mean(x,axis=0)
    sigma = np.std(x,axis = 0)
    x_norm = (x-mu)/sigma
    return x_norm,mu,sigma

现对x_train进行特征缩放，再进行梯度下降得到w，b

1
2
3

x_norm,x_mu,x_sigma = zscore_normalize_features(x_train)
print(f"{x_norm}")
w_norm, b_norm, hist = run_gradient_descent(x_norm, y_train, 1000, 1.0e-1, )

输出结果：

Iteration Cost          w0       w1       w2       w3       b       djdw0    djdw1    djdw2    djdw3    djdb

---------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
        0 3.78405e+04  1.2e+01  1.2e+01 -4.1e+00  1.2e+01  2.9e+01 -1.2e+02 -1.2e+02  4.1e+01 -1.2e+02 -2.9e+02
      100 2.41945e-05  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  4.2e-05 -4.2e-05  6.3e-04  7.9e-05 -7.7e-03
      200 1.70488e-14  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  1.0e-09 -1.0e-09  1.5e-08  1.9e-09 -2.0e-07
      300 1.17254e-23  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  1.4e-14 -3.5e-14  3.7e-13  3.5e-14 -5.4e-12
      400 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
      500 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
      600 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
      700 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
      800 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
      900 3.06962e-26  3.8e+01  4.2e+01 -3.1e+01  3.6e+01  2.9e+02  2.2e-14  2.0e-14  1.3e-14  2.3e-14 -2.5e-13
w,b found by gradient descent: w: [ 38.05  41.54 -30.99  36.34], b: 290.00

现在来预测一个有1200平方英尺、3间卧室、1层楼、40年房龄的房子的价格。必须用训练数据归一化时得出的平均值和标准差对数据进行归一化。

x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - x_mu) / x_sigma
x_house_predict = np.dot(x_house_norm, w_norm) + b_norm
print(f" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.0f}")

输出结果:

 predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = $281683