헬창 개발자

파이썬으로 구현하는 머신러닝 본문

공부방

파이썬으로 구현하는 머신러닝

찬배 2021. 12. 24. 16:31

행동분류 데이터 형식

 

필요한 모듈
import numpy as np
import pandas as pd

 

데이터 전처리
csv  = pd.read_csv('행동분류 데이터.csv')

labels = []

for label in csv['activity']:
    labels.append(label)
    
datas = []

for i in range(len(csv)):
    datas.append([0.1, 
    csv['acceleration_x'][i], 
    csv['acceleration_y'][i], 
    csv['acceleration_z'][i], 
    csv['gyro_x'][i], 
    sv['gyro_y'][i], 
    csv['gyro_z'][i]])

 

시그모이드(활성화 함수), 교차 엔트로피(손실 함수)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def CEE(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta) + (1 - t) * np.log(1- y + delta))
훈련, 시험 데이터 셋 나누기
np.random.seed(100)

train_index = np.random.choice(len(datas), int(len(datas)*0.7), replace=False) # split data

train_x = []
train_t = []
test_x = []
test_t = []

for i in range(len(datas)):
    if i in train_index:
        train_x.append(datas[i])
        train_t.append(labels[i])
    else:
        test_x.append(datas[i])
        test_t.append(labels[i])
        
train_x = np.array(train_x)
train_t = np.array(train_t)
test_x = np.array(test_x)
test_t = np.array(test_t)

#0 :	 loss:  0.7747657608989847
#1000 :	 loss:  0.37083220464769867
#2000 :	 loss:  0.3477996479918518
#3000 :	 loss:  0.33783168956841686
#4000 :	 loss:  0.3325747582595837
#5000 :	 loss:  0.3295149900202311
#6000 :	 loss:  0.3276248725821451
#7000 :	 loss:  0.3264097852612979
#8000 :	 loss:  0.32560611196844996
#9000 :	 loss:  0.3250631595181379
모델 학습
np.random.seed(100)

w = np.random.randn(1, 7) 

lr = 2.5
max_epoch = 10000

for epoch in range(max_epoch):
    y = sigmoid(np.dot(w, train_x.T))
    dw = -np.dot((train_t - y), train_x) / train_t.size
    w -= (lr * dw)
        
    if epoch % (max_epoch//10) == 0:
        loss = CEE(y, train_t) / train_t.size
        print(epoch, ':\t', 'loss: ', loss)
성능평가
def test():
    correct = 0
    wrong = 0
    for i in range(test_t.size):
        x = test_x[i].T
        y = sigmoid(np.dot(w, x))
        t = test_t[i]
        if abs(t - y) < 0.5:
            correct += 1
        else: wrong += 1
    print("정인식률: " , 100*correct / (correct + wrong))
test()
# 정인식률:  85.23911652932988

 

Comments