Commit 2373f6fd authored by Matthew Armstrong (mma118)'s avatar Matthew Armstrong (mma118)
Browse files

Upload New File

parent 76dd57e3
%% Cell type:code id: tags:
``` python
# Libraries \n\nimport os\nimport glob\nimport shutil\nimport json\nimport keras\nimport itertools\nimport numpy as np\nimport pandas as pd\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.applications import EfficientNetB3\nfrom tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization, DepthwiseConv2D, Conv2D, Input\nfrom tensorflow.keras.optimizers import Adam, SGD\nfrom tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau\n\n\n# Working Directories\n\nwork_dir = '/kaggle/input/cassava-leaf-disease-classification/'\ntrain_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'\n
```
%% Cell type:code id: tags:
``` python
# Visualizing Class Distributions \n\ndf = pd.read_csv(work_dir + 'train.csv')\n\ndf.head()\n\nprint(df.shape)\n\n\nx = ['CMD', 'Healthy', 'CGM', 'CBSD','CBB']\n\n\ny = df['label'].value_counts()\n\nprint(y)\n\nplt.bar(x, y)\n\nplt.ylabel('Frequency')\nplt.xlabel('Labels')\n\nplt.title('Distribution of Image Classes: TRAIN')\n
```
%% Cell type:code id: tags:
``` python
# Reading the labels from the json file \n\nf = open(work_dir + 'label_num_to_disease_map.json')\n\nlabels = json.load(f) # Stored as dict by default \n\n\nlabels = {int(k):v for k,v in labels.items()} # Convert keys from strings to ints \n\n\n# Defining the working dataset\n\ndf['class_name'] = df.label.map(labels)
```
%% Cell type:code id: tags:
``` python
# Attaching labels to the class sets and defining them as variables\n\nmask = df['label'] == 0\nCBB = df[mask]\n\nmask = df['label'] == 1\nCBSD = df[mask]\n\nmask = df['label'] == 2\nCGM = df[mask]\n\nmask = df['label'] == 3\nCMD = df[mask]\n\nmask = df['label'] == 4\nHealthy = df[mask]\n
```
%% Cell type:code id: tags:
``` python
# Sampling images from each class\n# The goal of these sample sizes is to help reduce class imbalances \n\n# Since this is a real dataset, we can assume that the class imbalances indicate a real-world correlation \n# with the frequencies of each disease, so I am choosing to maintain a (more muted) class imbalance in the final dataset\n\nCBB = CBB.sample(frac=1)\nCBSD = CBSD.sample(frac=1)\nCGM = CGM.sample(frac=1)\nCMD = CMD.sample(frac=0.9)\nHealthy = Healthy.sample(frac=1)\n
```
%% Cell type:code id: tags:
``` python
##### PREPROCESSING #####\n\n\n# Combining the sets of images from each class \n\nclass_sets = [CBB, CBSD, CGM, CMD, Healthy]\nf_df = pd.concat(class_sets)\n\nprint(len(f_df))\n\n# Splits -> train: 0.8, validation: 0.2\n\nfrom sklearn.model_selection import train_test_split\n\ntrain, validation = train_test_split(f_df, test_size = 0.2, random_state = 42, shuffle=True, stratify = f_df['class_name'])\n\n# Convert the images into image data using keras.preprocessing.image.ImageDataGenerator\n\nfrom keras.preprocessing.image import ImageDataGenerator\n\n# Variables for size, num_classes and batch_size\n\nIMG_SIZE = 300\nn = 5\nBATCH_SIZE = 32\n\n# Preprocessing for the train and validation datasets -> Note, ImageDataGenerator performs input normalization with the rescale arg\n\ndatagen = ImageDataGenerator(\n width_shift_range = 0.2,\n height_shift_range = 0.2,\n shear_range = 0.2,\n zoom_range = 0.2,\n horizontal_flip = True,\n vertical_flip = True,\n fill_mode = 'nearest',\n rescale = 1.0/255.0)\n\n\ntrain_set = datagen.flow_from_dataframe(train,\n directory = train_path,\n seed=42,\n x_col = 'image_id',\n y_col = 'class_name',\n target_size = (IMG_SIZE, IMG_SIZE),\n class_mode = 'categorical',\n interpolation = 'nearest',\n shuffle = True,\n batch_size = BATCH_SIZE)\n\nvalidation_set = datagen.flow_from_dataframe(validation,\n directory = train_path,\n seed=42,\n x_col = 'image_id',\n y_col = 'class_name',\n target_size = (IMG_SIZE, IMG_SIZE),\n class_mode = 'categorical',\n interpolation = 'nearest',\n shuffle = True,\n batch_size = BATCH_SIZE)\n
```
%% Cell type:code id: tags:
``` python
##### THE MODEL ######\n\n\n## INPUT BLOCK ##\n\ndef create_model2():\n \n model = Sequential()\n model.add(Conv2D(3, 3, kernel_initializer="glorot_uniform", padding="same", input_shape = (256,256,3)))\n model.add(BatchNormalization())\n model.add(keras.layers.Activation(keras.activations.relu))\n model.add(Dropout(0.2))\n \n model.add(DepthwiseConv2D(3, kernel_initializer="glorot_uniform", padding="valid"))\n model.add(BatchNormalization())\n model.add(keras.layers.Activation(keras.activations.relu))\n model.add(Dropout(0.2))\n \n \n model.add(DepthwiseConv2D(3, kernel_initializer="glorot_uniform", padding="same"))\n model.add(BatchNormalization())\n model.add(keras.layers.Activation(keras.activations.relu))\n model.add(Dropout(0.2))\n \n model.add(DepthwiseConv2D(3, kernel_initializer="glorot_uniform", padding="valid"))\n model.add(BatchNormalization())\n model.add(keras.layers.Activation(keras.activations.relu))\n model.add(Dropout(0.2))\n \n model.add(Conv2D(3, 3, kernel_initializer="glorot_uniform", padding="valid"))\n model.add(BatchNormalization())\n model.add(keras.layers.Activation(keras.activations.relu))\n model.add(Dropout(0.2))\n \n model.add(GlobalAveragePooling2D())\n model.add(Flatten())\n model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))\n model.add(Dropout(0.2))\n model.add(Dense(32, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))\n model.add(Dropout(0.2))\n model.add(Dense(n, activation = 'softmax'))\n\n return model\n\n\n\n\n \ndef create_model():\n \n model = Sequential()\n model.add(EfficientNetB3(input_shape = (300, 300, 3), include_top = False, weights = None))\n model.add(GlobalAveragePooling2D())\n model.add(Flatten())\n model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))\n model.add(Dropout(0.2))\n model.add(Dense(32, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))\n model.add(Dropout(0.2))\n model.add(Dense(n, activation = 'softmax'))\n \n return model\n
```
%% Cell type:code id: tags:
``` python
test = create_model()\ntest.summary()\n\n\n\nEPOCHS = 25\n\nSTEP_SIZE_TRAIN = train_set.n//train_set.batch_size\nSTEP_SIZE_VALID = validation_set.n//validation_set.batch_size\n\nprint(STEP_SIZE_TRAIN, STEP_SIZE_VALID)
```
%% Cell type:code id: tags:
``` python
def model_fit():\n \n \n model = create_model()\n \n loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,\n label_smoothing=0.001,\n name='categorical_crossentropy')\n \n model.compile(optimizer = Adam(learning_rate = 1e-4),\n loss = loss,\n metrics = ['categorical_accuracy']) \n \n es = EarlyStopping(monitor='val_loss', mode='min', patience=5,\n restore_best_weights=True, verbose=1)\n \n checkpoint_cb = ModelCheckpoint("test_model_best.h5",\n save_best_only=True,\n monitor = 'val_loss',\n mode='min')\n \n training = model.fit(train_set,\n validation_data = validation_set,\n epochs= EPOCHS,\n batch_size = BATCH_SIZE,\n steps_per_epoch = STEP_SIZE_TRAIN,\n validation_steps = STEP_SIZE_VALID,\n callbacks=[es, checkpoint_cb])\n \n model.save('test_model'+'.h5') \n \n return training
```
%% Cell type:code id: tags:
``` python
\ntrain = model_fit()\n\n\nacc = train.history['categorical_accuracy']\nval_acc = train.history['val_categorical_accuracy']\n\nloss = train.history['loss']\nval_loss = train.history['val_loss']\n\nepochs_range = range(EPOCHS)\n\nplt.figure(figsize=(8, 8))\nplt.subplot(1, 2, 1)\nplt.plot(epochs_range, acc, label='Training Accuracy')\nplt.plot(epochs_range, val_acc, label='Validation Accuracy')\nplt.legend(loc='lower right')\nplt.title('Training and Validation Accuracy')\n\nplt.subplot(1, 2, 2)\nplt.plot(epochs_range, loss, label='Training Loss')\nplt.plot(epochs_range, val_loss, label='Validation Loss')\nplt.legend(loc='upper right')\nplt.title('Training and Validation Loss')\nplt.show()
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment