import numpy as np
import sympy as sy


# 시그모이드 함수
def sigmoid(activation):
    return 1 / (1 + np.exp(-activation))

# 시그모이드 함수 미분
def sigmoid_derivative(output):
    return output * (1 - output)

np.dot([[0.1, 0.2]], np.array([[0.3, 0.25], [0.4, 0.35]]).T)

# 해당 구현에서는 b(bias)편향은 고려하지 않았습니다.
class NeuralNetwork:
  def __init__(self):
    self.w1 = np.array([
        [0.3, 0.25], #w1, w2 
        [0.4, 0.35]  #w3, w4
        ] )

    self.w2 = np.array([
        [0.45, 0.4], #w5, w6
        [0.7, 0.6]   #w7, w8
    ])

  def forward(self, X):
    
    self.layer1 = np.dot(X, np.array(self.w1).T)
    self.layer1_output = sigmoid(self.layer1)

    # Hidden Layer outpus
    self.layer2 = np.dot(self.layer1_output, np.array(self.w2).T)
    self.output = sigmoid(self.layer2)

    return self.output

  def backPropagation(self, X, y, y_hat, learning_rate):
      mse = np.sum((y-y_hat)**2)/2
      
      # OUTPUT o1, o2의 평균 오차제곱 값
      o1, o2 = sy.symbols('o1 o2')
      E_o1 = 0.5 * ((0.4 - o1)**2)
      E_o2 = 0.5 * ((0.6 - o2)**2)
      total_E = 0.5 * ((0.4 - o1)**2) + 0.5 * ((0.6 - o2)**2)
      
      # 순서대로 Error총합에 대한 o1,o2에 대한 미분, Error o1에 대한 o1 미분, Error o2에 대한 o2 미분
      total_error_derivative_o1 = float(sy.diff(total_E, o1).subs({o1 : self.output[0][0]}))
      total_error_derivative_o2 = float(sy.diff(total_E, o2).subs({o2 : self.output[0][1]}))
      o1_error_derivative = float(sy.diff(E_o1, o1).subs({o1 : self.output[0][0]}))
      o2_error_derivative = float(sy.diff(E_o2, o2).subs({o2 : self.output[0][1]}))

      # 최종 레이어로 오는 가중치 조정
      # 밑에서도 w1 업데이트 처리 중 w2값이 필요해 w1이 끝나면 업데이트
      self.dW2 = np.array([[total_error_derivative_o1], [total_error_derivative_o2]]) *\
      np.dot(sigmoid_derivative(self.output).T, self.layer1_output)


      # 전체 에러에 대한 히든 노드 결과값의 미분
      total_error_derivative_h1 = np.sum(np.array([o1_error_derivative, o2_error_derivative]) *\
      sigmoid_derivative(self.output) *\
      np.array([self.w2[0][0], self.w2[1][0]]))

      total_error_derivative_h2 = np.sum(np.array([o1_error_derivative, o2_error_derivative]) *\
      sigmoid_derivative(self.output) *\
      np.array([self.w2[0][1], self.w2[1][1]]))

      # 히든 레이어로 오는 가중치 조정
      self.dW1 = np.array([[total_error_derivative_h1], [total_error_derivative_h2]]) *\
      np.dot(sigmoid_derivative(self.layer1_output).T,  np.array(X))
      # sigmoid_derivative(self.layer1_output) * \
      # X

      self.w2 -= (self.dW2 * learning_rate)
      self.w1 -= (self.dW1 * learning_rate)

    # 모델 훈련    
  def train(self, X, y, learning_rate=0.5, epochs=1):
    for i in range(epochs):
        print("stat ",i)
        # 순전파
        y_hat = self.forward(X)
        
        # 역전파
        self.backPropagation(X, y, y_hat, learning_rate)
        
        # 손실 계산
        loss = np.mean((y_hat - y) ** 2)
        
        print(f"Epoch {i}, Loss: {loss}")
            
    return self


nn = NeuralNetwork()

X = [[0.1, 0.2]]
y = np.array([0.4, 0.6])

nn.train(X, y)

nn.output

stat  0
Epoch 0, Loss: 0.023971900751730026

array([[0.609446  , 0.66384491]])


import numpy as np


# 시그모이드 함수
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 시그모이드 함수 미분
def sigmoid_derivative(x):
    return x * (1 - x)

# 신경망 클래스
class NeuralNetwork2:
    
    def __init__(self):
        # 가중치 초기화
        self.w1 = np.array([[0.3, 0.25], [0.4, 0.35]])
        self.w2 = np.array([[0.45, 0.4], [0.7, 0.6]])

    def forward(self, X):
        # 순전파 계산
        self.layer1 = np.dot(X, self.w1.T)
        self.layer1_output = sigmoid(self.layer1)

        self.layer2 = np.dot(self.layer1_output, self.w2.T)
        self.output = sigmoid(self.layer2)

        return self.output

    def backPropagation(self, X, y, y_hat, learning_rate):
        # 출력 오차 계산
        dE_dO = y_hat - y

        # 미분 계산
        dO_dZ2 = sigmoid_derivative(self.output)
        dZ2_dW2 = self.layer1_output
        dE_dW2 = np.outer(dE_dO * dO_dZ2, dZ2_dW2)

        dZ2_dO1 = self.w2
        dO1_dZ1 = sigmoid_derivative(self.layer1_output)
        dZ1_dW1 = X
        dE_dW1 = np.outer(dE_dO * dO_dZ2 * dZ2_dO1[:, 0] * dO1_dZ1[:, 0], dZ1_dW1) + \
                  np.outer(dE_dO * dO_dZ2 * dZ2_dO1[:, 1] * dO1_dZ1[:, 1], dZ1_dW1)

        # 가중치 업데이트
        self.w2 -= (learning_rate * dE_dW2)
        self.w1 -= (learning_rate * dE_dW1)

    def train(self, X, y, learning_rate=0.5, epochs=1):
        for i in range(epochs):
            # 순전파
            y_hat = self.forward(X)

            # 역전파
            self.backPropagation(X, y, y_hat, learning_rate)

            # 손실 계산
            loss = np.mean((y_hat - y) ** 2)

            print(f"Epoch {i}, Loss: {loss}")

        return self


nn2 = NeuralNetwork2()

nn2.train(X, y)

nn2.output

Epoch 0, Loss: 0.023971900751730026

array([[0.609446  , 0.66384491]])

일	월	화	수	목	금	토
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31

QLoRA-Efficient Finetuning of Quantized LLMs (1)	2024.01.24
Pytorch - Error 정리 페이지 (0)	2024.01.18
기울기(Gradient) - 소실과 폭주(Vanishing & Exploding) (0)	2023.12.14
Tensorflow GPU사용하기 No colab (0)	2023.09.13
Optimizer - OGD, SGD, RLS (0)	2023.03.26

분석하고싶은코코

분석하고싶은코코

딥러닝에 대한 이해와 순전파, 역전파 직접 구현 본문

딥러닝에 대한 이해와 순전파, 역전파 직접 구현

순전파-역전파 직접 구현 해보자!¶

퍼셉트론(Perceptron)¶

단층 퍼셉트론(Single-Layer Perceptron)¶

다층 퍼셉트론(MultiLayer Perceptron, MLP)¶

순전파? 역전파?¶

순전파¶

역전파¶

직접 구현 - (1)¶

직접 구현 - (2)¶

'머신러닝&딥러닝' 카테고리의 다른 글

티스토리툴바