import numpy as np
import pandas as pd
import networkx as nx
from collections import defaultdict
__author__ = 'Fatemeh Zare<faz361@mail.usask.ca>'
"""
 Advanced graph-based k-means is an appropriate algorithm for the datasets which have lots of dynamic objects.
 Instead of passing through space, Advanced graph-based k-means can only pass through certain points. Thus, it can be applied to datasets
which can be modeled as graphs.
In Advanced graph-based k-means, initial centroids would be selected based on
Global Unique Interaction instead of random selection. Wisely selection of initial
centroids would decrease the probability of falling into the local optimal clustering.
"""



def agkmeans(G, k):

    index_list1 = []
    for i in G.node:
        index_list1.append(i)
    df_initial = pd.DataFrame(columns=['node', 'degree', 'centroid'], index=index_list1)
    df_initial = df_initial.reset_index(drop=True)
    p = 0
    for i in G.node:
        df_initial["node"][p] = i
        df_initial["degree"][p] = G.degree(i)
        p = p + 1

    df_initial.sort_values("degree", inplace=True, ascending=False)
    df_initial = df_initial.reset_index(drop=True)

    k_list = []
    for d in range(0, k):
        k_list.append(df_initial['node'][d])

    df2 = pd.DataFrame(columns=['node', 'shoretest_path_centroid', 'centroid'])
    df2['node'] = df_initial['node']

    def assignment1(G, df, randomlist):
        df['shoretest_path_centroid'] = float('inf')
        df['centroid'] = None
        for j in randomlist:
            for i in G.node:
                b = df.loc[df['node'] == i, 'shoretest_path_centroid'].iloc[0]

                try:
                    n = nx.shortest_path_length(G, i, j)
                except nx.NetworkXNoPath:
                    n=1000000000000000000
                if n < b:
                    df.loc[df['node'] == i, 'shoretest_path_centroid'] = n
                    df.loc[df['node'] == i, 'centroid'] = j

        randomlist.sort()
        return (randomlist)

    def update(G, df2, k_list):
        df2_sum = df2.groupby(['centroid'])['shoretest_path_centroid'].sum()

        for i in k_list:
            for j in G.neighbors(i):
                p = 0
                sump = 0
                for b in G.node:
                    if (df2.loc[df2['node'] == b, 'centroid'].iloc[0] == i):
                        try:
                            p = nx.shortest_path_length(G, j, b)
                        except nx.NetworkXNoPath:
                            p = float('inf')

                        sump += p
                if sump < df2_sum[i]:
                    df2.loc[df2['centroid'] == i, 'centroid'] = j

        for i in df2['node']:
            b = df2.loc[df2['node'] == i, 'centroid'].iloc[0]
            try:
                v = nx.shortest_path_length(G, i, b)
            except nx.NetworkXNoPath:
                v = float('inf')
            df2.loc[df2['node'] == i, 'shoretest_path_centroid'] = v
#
        lista = df2['centroid'].unique().tolist()
        return (lista)

    a=assignment1(G, df2, k_list)

    b=update(G, df2, k_list)

    while (a.sort() != b.sort()):
        k_list = b
        a=assignment1(G, df2, k_list)
        b=update(G, df2, k_list)

    dict1=df2.set_index('node')['centroid'].to_dict()
    return (dict1)