Keras very low accuracy, don’t know what to do

  Kiến thức lập trình

I am working on a university project. My professor wanted me to work on this “[]” project.
He wants me to add ensemble models to this project. I am currently using XGBoost, GBM and VGG16.

XGBoost and GBM works fine but when it comes to VGG16 or CNN or PCENAS I always get low accuracy like this below:

Epoch 1/20 167/167 41s 238ms/step - accuracy: 0.0000e+00 - loss: 11.0473 - val_accuracy: 0.0000e+00 - val_loss: 8.8739 
Epoch 2/20 167/167 40s 238ms/step - accuracy: 0.0000e+00 - loss: 8.7834 - val_accuracy: 0.0000e+00 - val_loss: 8.9999 

I am currently using the code below:

import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import cv2
import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import xgboost as xgb
import lightgbm as lgb
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import VGG16

# Define constants
OVERVIEW_PATH = r'G:svkpyoverview-of-recordings.csv'
BASE_PATH = r'G:svkpyrecordings'
SPECTROGRAMS_DIR = 'spectrograms'
SUBDIRS = ['test', 'train', 'validate']

# Load CSV file
overview = pd.read_csv(OVERVIEW_PATH)

# Create spectrograms directory
os.makedirs(SPECTROGRAMS_DIR, exist_ok=True)

# Text Data Processing
texts = overview['phrase']
labels = overview['prompt']
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(texts).toarray()

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)
num_classes = len(np.unique(y))

# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = xgb.XGBClassifier(), y_train)

# Evaluate XGBoost model
y_pred_xgb = xgb_model.predict(X_test)
print('XGBoost Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('XGBoost Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_xgb))

# Train GBM model
gbm_model = lgb.LGBMClassifier(), y_train)

# Evaluate GBM model
y_pred_gbm = gbm_model.predict(X_test)
print('GBM Accuracy:', accuracy_score(y_test, y_pred_gbm))
print('GBM Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_gbm))

# Audio Data Processing
def find_audio_file(filename):
    for subdir in SUBDIRS:
        audio_path = os.path.join(BASE_PATH, subdir, filename)
        if os.path.exists(audio_path):
            return audio_path
    return None

# Convert audio files to spectrograms and save
audio_files = overview['file_name']
for file in tqdm(audio_files):
    audio_path = find_audio_file(file)
    if audio_path:
            y, sr = librosa.load(audio_path, sr=None)
            S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            pcen = librosa.pcen(S, sr=sr)
            plt.figure(figsize=(10, 4))
            librosa.display.specshow(pcen, sr=sr, x_axis='time', y_axis='mel')
            plt.colorbar(format='%+2.0f dB')
            plt.title('PCEN spectrogram')
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")
        print(f"File not found: {file}")

# Load spectrogram files
spectrogram_files = glob.glob(f'{SPECTROGRAMS_DIR}/*.png')
images = []
labels = []

for file in tqdm(spectrogram_files):
    image = cv2.imread(file)
    image = cv2.resize(image, (224, 224))
    label = os.path.basename(file).split('_')[2]

images = np.array(images)
labels = label_encoder.fit_transform(labels)
num_classes_audio = len(np.unique(labels))
labels = to_categorical(labels, num_classes=num_classes_audio)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create VGG16 model with transfer learning
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(num_classes_audio, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001), y_train, validation_data=(X_test, y_test), epochs=30, callbacks=[early_stopping, reduce_lr])

# Evaluate the VGG16 model
_, accuracy = model.evaluate(X_test, y_test)
print('VGG16 Transfer Learning Model Accuracy:', accuracy)

# Save the model'audio_model_vgg16.keras')

# Load saved model
model = load_model('audio_model_vgg16.keras')

# Evaluate saved model
_, accuracy = model.evaluate(X_test, y_test)
print('Saved VGG16 Model Accuracy:', accuracy)

Can someone help me with my problem please? I don't really know about coding this much. Thanks.

I tried using other models which are: VGG16,CNN,RNN,PCENAS,VGG16-LSTM. Results were the same.

New contributor

Giray is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.