import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from PIL import Image
from tqdm import tqdm

directory = 'C:/Users/kirk/Desktop/artbench-10-imagefolder-split/train/'

# Define the paths to the image folders
folder_paths = [os.path.join(directory, folder) for folder in os.listdir(directory) if os.path.isdir(os.path.join(directory, folder))]

# Initialize empty lists for images and labels
images = []
labels = []


# Create a dictionary to map folder names to integer labels
folder_labels = {'art_nouveau': 0, 'baroque': 1, 'expressionism': 2, 'impressionism': 3,
                 'post_impressionism': 4, 'realism': 5, 'renaissance': 6, 'romanticism': 7,
                 'surrealism': 8, 'ukiyo_e': 9}

# Loop through each folder and load the images
for folder_path in folder_paths:
    # Get the label for this folder
    folder_name = os.path.basename(folder_path)
    label = folder_labels[folder_name]
    for filename in tqdm(os.listdir(folder_path), desc=f'Loading images from {folder_name}', unit='image'):
        # Load the image as a numpy array
        img = np.array(Image.open(os.path.join(folder_path, filename)))
        # Add the image to the list
        images.append(img)
        # Add the label to the list
        labels.append(label)




# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Reshape the images to 1D arrays
images = images.reshape((images.shape[0], -1))

# Initialize t-SNE with default parameters
tsne = TSNE()

# Fit and transform the data
embeddings = tsne.fit_transform(images)

# Plot the embeddings with different colors for each cluster
plt.scatter(embeddings[:, 0], embeddings[:, 1], c=labels)
plt.show()