반응형
1. 데이터 전처리
import tensorflow as tf
from tensorflow.keras import layers, utils
from tensorflow.keras.datasets import mnist
num_classes = 10; epochs = 10; batch_size = 100
learning_rate = 0.1; dropout_rate = 0.5
# input image dimensions
img_rows, img_cols = 28, 28
# data loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#각차원의 크기를 튜플형태로 반환하는 변수 x_train.shape
input_shape = (img_rows, img_cols, 1)
#3차원인 데이터를 4차원으로 (각 개별 value를 list로 변환 ex) [1,0,1,1] -> [[1],[0],[1],[1]] )
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
#데이터를 실수화
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
#0~1값으로 정규화
x_train /= 255
x_test /= 255
#60000개의 train set, 100000개의 tests set.
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)
2. model 디자인 (feature extraction)
model = tf.keras.Sequential()
# ---------------------------------[첫번째 convolution layer]---------------------------------
# kernel개수, kernel size(5,5), (행strides,열stride), padding을 이용하여 input과 output 크기 유지)
# 파라미터수 : 416=5*5*1*16+16(바이어스 : 커널당 1개)
model.add(layers.Conv2D(16, (5, 5), strides=(1, 1), padding='same',
activation='relu', input_shape=input_shape,
name='conv1'))
# ---------------------------------[첫번째 polling]------------------------------------------
model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool1'))
# ---------------------------------[두번째 convolution layer]---------------------------------
# 파라미터수 : 12832=5*5*32*16+32(바이어스 : 커널당 1개)
model.add(layers.Conv2D(32, (5, 5), activation='relu',
name='conv2'))
# ---------------------------------[두번째 polling]------------------------------------------
model.add(layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool2'))
3. model 디자인 (classification, MLP(FFN) )
# ---------------------------------[input layer]---------------------------------
# flatten : 3차원을 1차원으로 바꿔줌 ( ex) [[[1],[2]],[[1],[3]],[[1],[2]]] -> [1,2,1,3,1,2] )
(800=5*5*32)
model.add(layers.Flatten())
# ---------------------------------[hidden layer]---------------------------------
# 파라미터 수 : 102528=(800+1)*128
model.add(layers.Dense(128, activation='relu'))
# -------------------------------- [output layer]---------------------------------
# 파라미터 수 : 1290=(128+1)*10
model.add(layers.Dense(num_classes, activation='softmax'))
# compile 모델
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adadelta(lr=learning_rate),
metrics=['accuracy'])
4. 학습
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
hist = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2,
callbacks=[early_stop],
shuffle=True,
verbose=1)
score = model.evaluate(x_test, y_test, verbose=1)
print('test_loss:', score[0], ' , test_acc:', score[1])
#학습시킨 모델을 저장 해당디렉토리에 저장
model.save("cnn_mnist.h5")
- epoch : 최대 10회 학습
- training set : validation 비율 0.2이므로 60000*0.8=48000개
5. 학습된 parameter 개수확인
model.summary()
(1) feature extraction
* kernel 의 각 요소들이 parameter가 된다.
* kernel 의 차원(두께)는 입력의 차원과 같기 때문에 한번의 커널을 거친 후 feature map의 개수는 커널의 개수와 같다
- convolution 1 (parameter) :
416 = 5*5*1*16+16
- convolution 2 (parameter) :
12832 = 5*5*16*32+32
(2) classification
- flatten 이후 input 값 :
800=5*5*32
- input parameter :
102528=(800+1)*128
- hidden parameter :
1290=(128+1)*10
6. 학습 과정 시각화
import matplotlib.pyplot as plt
fig, loss_ax = plt.subplots(figsize=(10, 5))
acc_ax = loss_ax.twinx()
loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')
loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')
plt.show()
반응형