前言
Here's something that might surprise you:neral networks aren't that complicated! The term "neral network" gets used as a buzzword a lot, but in reality they're often much simpler than people imagine.
This post is intended for complete beginners and assumes ZERO prior knowledge of machine learning. We'll understand how neual networks work while implementing one from scratch in Python.
Let's get started!
Deep Learning是Machine Learning的一个分支,能够让机器自动学习良好的特征,从而免去人工选取过程。
Deep Learning的基本思想来自神经生物学表明大脑神经系统处理是以分层的方式来处理信息的,即高层的特征是低层特征的组合,从低层到高层的特征表示越来越抽象,越来越能表现语义或者意图。深度学习借鉴这种分层的信息处理方式,其含有多个层,将当前层当作下一层的输入,通过这种方式实现对输入信息进行分级表达和特征提取。
第一个文件是神经网络各个部分的简单描述;第二个文件是基于向量的反向传播算法的理论知识和代码
学习笔记,LaTeX版本
An Introduction to Neural Network 学习笔记,
基于向量的反向传播算法学习笔记
Code
第一个文件附录代码:
import numpy as np
def sigmoid(x):
# Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
return 1 / (1 + np.exp(-x))
def deriv_sigmoid(x):
# Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
fx = sigmoid(x)
return fx * (1 - fx)
def mse_loss(y_true, y_pred):
# y_true and y_pred are numpy arrays of the same length.
return ((y_true - y_pred) ** 2).mean()
class OurNeuralNetwork:
'''
A neural network with:
- 2 inputs
- a hidden layer with 2 neurons (h1, h2)
- an output layer with 1 neuron (o1)
*** DISCLAIMER ***:
The code below is intended to be simple and educational, NOT optimal.
Real neural net code looks nothing like this. DO NOT use this code.
Instead, read/run it to understand how this specific network works.
'''
def __init__(self):
# Weights
self.w1 = np.random.normal()
self.w2 = np.random.normal()
self.w3 = np.random.normal()
self.w4 = np.random.normal()
self.w5 = np.random.normal()
self.w6 = np.random.normal()
# Biases
self.b1 = np.random.normal()
self.b2 = np.random.normal()
self.b3 = np.random.normal()
def feedforward(self, x):
# x is a numpy array with 2 elements.
h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
return o1
def train(self, data, all_y_trues):
'''
- data is a (n x 2) numpy array, n = # of samples in the dataset.
- all_y_trues is a numpy array with n elements.
Elements in all_y_trues correspond to those in data.
'''
learn_rate = 0.1
epochs = 1000 # number of times to loop through the entire dataset
for epoch in range(epochs):
for x, y_true in zip(data, all_y_trues):
# --- Do a feedforward (we'll need these values later)
sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
h1 = sigmoid(sum_h1)
sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
h2 = sigmoid(sum_h2)
sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
o1 = sigmoid(sum_o1)
y_pred = o1
# --- Calculate partial derivatives.
# --- Naming: d_L_d_w1 represents "partial L / partial w1"
d_L_d_ypred = -2 * (y_true - y_pred)
# Neuron o1
d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
d_ypred_d_b3 = deriv_sigmoid(sum_o1)
d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)
# Neuron h1
d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
d_h1_d_b1 = deriv_sigmoid(sum_h1)
# Neuron h2
d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
d_h2_d_b2 = deriv_sigmoid(sum_h2)
# --- Update weights and biases
# Neuron h1
self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1
# Neuron h2
self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2
# Neuron o1
self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3
# --- Calculate total loss at the end of each epoch
if epoch % 10 == 0:
y_preds = np.apply_along_axis(self.feedforward, 1, data)
loss = mse_loss(all_y_trues, y_preds)
print("Epoch %d loss: %.3f" % (epoch, loss))
# Define dataset
data = np.array([
[-2, -1], # Alice
[25, 6], # Bob
[17, 4], # Charlie
[-15, -6], # Diana
])
all_y_trues = np.array([
1, # Alice
0, # Bob
0, # Charlie
1, # Diana
])
# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)
第二个文件附录代码:
import numpy as np
# generate data
np.random.seed(1)
X = np.random.rand(12288, 200) # X is [12288 x 200]
Y = np.random.rand(1, 200)
# setup configuration of the network'
n0, m = X.shape
n1 = 20
n2 = 7
n3 = 5
n4 = 1
layers_dims = [n0, n1, n2, n3, n4] #[12288, 20, 7, 5, 1]
L = len(layers_dims) - 1 # the number of layers, excluding the input layer
# activation function
def sigmoid(z):
'''
z is the prev_activation, with sizze n1xm
'''
return 1 / (1 + np.exp(-z))
# the relu
def relu(z):
'''
z is the prev_activation, with sizze n1xm
'''
return np.maximum(0,z)
param_w = [i for i in range(L+1)]
param_b = [i for i in range(L+1)]
# neural network model
def neural_network(X, Y, learning_rate=0.01, num_iterations=2000, lambd =0):
m = X.shape[1]
# initialize forward prop
#param_w = [i for i in range(L+1)]
#param_b = [i for i in range(L+1)]
np.random.seed(10)
for l in range(1, L+1):
if l < L:
# use He initialization
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])
if l == L:
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * 0.01
param_b[l] = np.zeros((layers_dims[l], 1))
activations = [X, ] + [i for i in range(L)]
prev_activations = [i for i in range(L+1)]
dA = [i for i in range(L+1)]
dz = [i for i in range(L+1)]
dw = [i for i in range(L+1)]
db = [i for i in range(L+1)]
for i in range(num_iterations):
### forward propagation
for l in range(1, L+1):
prev_activations[l] = np.dot(param_w[l], activations[l-1]) + param_b[l]
if l < L:
activations[l] = relu(prev_activations[l])
else:
activations[l] = sigmoid(prev_activations[l])
cross_entropy_cost = -1/m * (np.dot(np.log(activations[L]), Y.T) \
+ np.dot(np.log(1-activations[L]), 1-Y.T))
regularization_cost = 0
for l in range(1, L+1):
regularization_cost += np.sum(np.square(param_w[l])) * lambd/(2*m)
cost = cross_entropy_cost + regularization_cost
### initialize backward propagation
dA[L] = np.divide(1-Y, 1-activations[L]) - np.divide(Y, activations[L])
assert dA[L].shape == (1, m)
### backward propagation
for l in reversed(range(1, L+1)):
if l == L:
dz[l] = dA[l] * activations[l] * (1-activations[l])
else:
dz[l] = dA[l].copy()
dz[l][prev_activations[l] <= 0] = 0
dw[l] = 1/m * np.dot(dz[l], activations[l-1].T) + param_w[l] * lambd/m
db[l] = 1/m * np.sum(dz[l], axis=1, keepdims=True)
dA[l-1] = np.dot(param_w[l].T, dz[l])
assert dz[l].shape == prev_activations[l].shape
assert dw[l].shape == param_w[l].shape
assert db[l].shape == param_b[l].shape
assert dA[l-1].shape == activations[l-1].shape
param_w[l] = param_w[l] - learning_rate * dw[l]
param_b[l] = param_b[l] - learning_rate * db[l]
if i % 100 == 0:
print("cost after iteration {}: {}".format(i, cost))
print(param_w)
print(param_b)
neural_network(X, Y, learning_rate=0.01, num_iterations=2000, lambd =0)
'''
# predict
def predict(X_new, parameters, threshold=0.5):
param_w = parameters["param_w"]
param_b = parameters["param_b"]
activations = [X_new, ] + [i for i in range(L)]
prev_activations = [i for i in range(L + 1)]
m = X_new.shape[1]
for l in range(1, L + 1):
prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l]
if l < L:
activations[l] = relu(prev_activations[l])
else:
activations[l] = sigmoid(prev_activations[l])
prediction = (activations[L] > threshold).astype("int")
return prediction
'''
# predict
def predict(X_new, param_w,param_b,threshold=0.5):
#param_w = parameters["param_w"]
#param_b = parameters["param_b"]
activations = [X_new, ] + [i for i in range(L)]
prev_activations = [i for i in range(L + 1)]
m = X_new.shape[1]
for l in range(1, L + 1):
prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l]
if l < L:
activations[l] = relu(prev_activations[l])
else:
activations[l] = sigmoid(prev_activations[l])
prediction = (activations[L] > threshold).astype("int")
print(prediction)
return prediction
X_new = np.random.rand(12288,200)
print(predict(X_new,param_w,param_b, threshold=0.5))
本文首次发布于 ZhaoYLng Blog, 作者 @Laqudee ,转载请保留原文链接.