CNNを用いたpython画像処理入門① - 数理コンサルタントの備忘録

今回は、画像認識に有効なCNNを用いた簡単な画像認識タスクをpythonで実施する。まずはtensorflowで用意されているデータセットのFasion MNISTを読み込む。

import tensorflow as tf
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print(x_train.shape)
# (画像数,縦の画素数,横の画素数)

実際に画像を出力してみる

%matplotlib inline
import matplotlib.pyplot as plt

# Fasion MNISTのラベル名
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

#表示領域を設定（行，列）
fig, ax = plt.subplots(2, 5,figsize=(10,4))

for i in range(10):
    plt.subplot(2,5,i+1)
    plt.tick_params(color='white')      #メモリを消す
    plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    plt.imshow(x_train[i],cmap='gray')
    plt.title(class_names[y_train[i]])

#図が重ならないようにする
plt.tight_layout()

#図を表示
plt.show()

f:id:guitartakahiro:20210227211600p:plain

画像の次元を(画像数,縦の画素数,横の画素数)->(画像数,縦の画素数,横の画素数,チャンネル数)に変換する

num_train_pic = x_train.shape[0] 
num_test_pic = x_test.shape[0]
height = x_train[0].shape[0]
width = x_train[0][0].shape[0]
x_train = x_train.reshape(num_train_pic,height,width,1)
x_test = x_test.reshape(num_test_pic,height,width,1)

画素数を0-1に正規化する

x_train = x_train /255.
x_test = x_test /255.

モデルを定義・コンパイルし、サマリを表示。 Conv2Dで2次元畳み込み層を設定し、MaxPool2Dでプーリング処理を行う。 MNISTのラベルは10クラス存在するため、最終的な全結合層の出力次元数を「10」にする。 Softmax関数で総和が1となるように、各出力の予測確率を計算。 (例. 出力クラス=[0.1,0,0,0,0,0,0.1,0,0,0.8]⇔0の確率10%, 6の確率10%,9の確率80%)

model = tf.keras.Sequential([
    # 入力画像 (縦の画素数,横の画素数,チャンネル数)
    # 28x28x1 -> 24x24x16
    layers.Conv2D(16,kernel_size=(5,5),activation='relu',
                 kernel_initializer='he_normal',input_shape=(height,width,1)),
    # 24x24x16 -> 12x12x16
    layers.MaxPool2D(pool_size=(2,2)),
    # 12x12x16 -> 8x8x64
    layers.Conv2D(64,kernel_size=(5,5),activation='relu',
                 kernel_initializer='he_normal',input_shape=(height,width,1)),
    # 8x8x64 -> 4x4x64
    layers.MaxPool2D(pool_size=(2,2)),
    # 4x4x64 -> 1024
    layers.Flatten(),
    # 2024 -> 10
    layers.Dense(10, activation='softmax')    
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 24, 24, 16)        416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 8, 8, 64)          25664     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                10250     
=================================================================
Total params: 36,330
Trainable params: 36,330
Non-trainable params: 0
_________________________________________________________________

モデルの学習

early_stopping = tf.keras.callbacks.EarlyStopping(patience=1,verbose=1)
history = model.fit(x=x_train,y=y_train,batch_size=128,epochs=100,verbose=1,
                   validation_data = (x_test,y_test),callbacks=[early_stopping])

2乗誤差の推移表示

plt.figure(figsize=(16, 8))
key='loss'
ax = plt.plot(history.epoch, history.history[key], label=f'Train {key}')
plt.plot(history.epoch, history.history[f'val_{key}'], '--',
         color=ax[0].get_color(), label=f'Val {key}')

plt.xlabel('Epochs')
plt.ylabel(key.title())
plt.legend()
plt.xlim([0,max(history.epoch)])

f:id:guitartakahiro:20210227213511p:plain

最後にソースコード全体を示す

%matplotlib inline
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import layers

# データセットの読み込み
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()


# Fasion MNISTのラベル名
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

#表示領域を設定（行，列）
fig, ax = plt.subplots(2, 5,figsize=(10,4))

for i in range(10):
    plt.subplot(2,5,i+1)
    plt.tick_params(color='white')      #メモリを消す
    plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    plt.imshow(x_train[i],cmap='gray')
    plt.title(class_names[y_train[i]])

#図が重ならないようにする
plt.tight_layout()

#図を表示
plt.savefig('../output/fashion_mnist_pic1.png')
plt.show()

# (画像数,縦の画素数,横の画素数)->(画像数,縦の画素数,横の画素数,チャンネル数)に変換
num_train_pic = x_train.shape[0] 
num_test_pic = x_test.shape[0]
height = x_train[0].shape[0]
width = x_train[0][0].shape[0]
x_train = x_train.reshape(num_train_pic,height,width,1)
x_test = x_test.reshape(num_test_pic,height,width,1)

# 画素数を0-1に正規化する
x_train = x_train /255.
x_test = x_test /255.

# モデルの定義
model = tf.keras.Sequential([
    # 入力画像 (縦の画素数,横の画素数,チャンネル数)
    # 28x28x1 -> 24x24x16
    layers.Conv2D(16,kernel_size=(5,5),activation='relu',
                 kernel_initializer='he_normal',input_shape=(height,width,1)),
    # 24x24x16 -> 12x12x16
    layers.MaxPool2D(pool_size=(2,2)),
    # 12x12x16 -> 8x8x64
    layers.Conv2D(64,kernel_size=(5,5),activation='relu',
                 kernel_initializer='he_normal',input_shape=(height,width,1)),
    # 8x8x64 -> 4x4x64
    layers.MaxPool2D(pool_size=(2,2)),
    # 4x4x64 -> 1024
    layers.Flatten(),
    # 2024 -> 10
    layers.Dense(10, activation='softmax')    
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(model.summary())


# モデルの学習
early_stopping = tf.keras.callbacks.EarlyStopping(patience=1,verbose=1)
history = model.fit(x=x_train,y=y_train,batch_size=128,epochs=100,verbose=1,
                   validation_data = (x_test,y_test),callbacks=[early_stopping])

# 2乗誤差の推移表示
plt.figure(figsize=(16, 8))
key='loss'
ax = plt.plot(history.epoch, history.history[key], label=f'Train {key}')
plt.plot(history.epoch, history.history[f'val_{key}'], '--',
         color=ax[0].get_color(), label=f'Val {key}')

plt.xlabel('Epochs')
plt.ylabel(key.title())
plt.legend()
plt.xlim([0,max(history.epoch)])