# File: kmeans1.py
# Desc: make an example of how to cluster objects using K-means algorithm

from math import *
from random import randrange
import string

# get list of objects from user
# and store them in a file
def getObjects(n):
   
   
    # start an empty list of n objects
    objects =[0]*n
    for i in range(n):
        x = input ("Enter an item : ")
        objects.insert(i, x)

    print "The list you enter is: "
    for i in range(n):
        print objects[i],

    print

    # store input in a file
    #outfile = open ("data.txt", "w")
    # create a string
#    for i in range (n):
#        outfile.write ("%d\n" % (objects[i]))
#    outfile.close()

    # return object
    return objects

# function to do k-means clustering alrgorithm
def cluster (data, n):
    # let the user decide the number of objects in a cluster:
    n_item = input ("How many objects are there in a cluster? ")
    for i in range (n):
        print "list item : ", data[i]
    
    # number of clusters, k= number of objects/n
    k = n/n_item;
    print len(data), n
    print "k is = ", k
    # decide the centroids randomly
    centroid = [0]*k
    print "Making random centroids: "
    for i in range (k):
        # make sure this random number is different from other
        test = 0
        first = 0
        print "centroid[",i,"]"
        while (first ==0 or test ==1):
            test = 0
            print "first =", first, " test =", test, 
            ran_num = randrange (0, n)
            print " ran_num =", ran_num
            for j in range (i):
                print "centroid [", j,"]", centroid[j]
                if (ran_num == centroid[j]):
                    test = 1
            first += 1
                    
        centroid[i]= ran_num
        print "Random centroid is : ", data[centroid[i]]
    # Loop:
    # assign points to centroids by:
    # calculating distance from a point to centroids
    # assign a point to the nearest centroid
    data_group = [0]*n
    for i in range (n):
        min_dis = dis (data[i], data[centroid[0]])
        data_group.insert(i,0)
        for j in range (k):
            dist = dis (data[i], data[centroid[j]])
            if (dist <= min_dis):
                min_dis = dist
                data_group.insert (i,j)
        print "data_group[", i, "]: ", data_group[i]
             
    # step2:
    # recalculate the centroids 
    for i in range (k):
        sum = 0
        num_item = 0
        print "Group of centroid[",i,"]: ","centroid= ", data[centroid[i]]
        for j in range (n):
            if (data_group[j]== i):
               sum += data[j]
               num_item +=1
               print "value: ",data[j], " of group: ", data_group[j]
        centroid_val = sum/num_item
        centroid[i] = centroid_val
        # now centroid[i] no longer stores index value of the corresponding
        # ite in data list, but the real value of the centroid
        print "New centroid value : ", centroid[i]

    # step 3: reassign until no change is made
    count = 1
    while (count ==1):
        # making/assigning points to group
        for i in range (n):
            min_dis = dis (data[i], centroid[0])
            data_group.insert(i,0)
            for j in range (k):
                dist = dis (data[i], centroid[j])
                if (dist <= min_dis):
                    min_dis = dist
                    data_group.insert (i,j)
            print "data_group[", i, "]: ", data_group[i]

        # recalculate the centroids: centroid is the center,
        # calculate: each cordinate of it = average of all vertices' cordinates
        for i in range (k):
            sum = 0
            num_item = 0
            print "Group of centroid[",i,"]: ","centroid= ",centroid[i] 
            for j in range (n):
                if (data_group[j]== i):
                    sum += data[j]
                    num_item +=1
                    print "value: ",data[j], " of group: ", data_group[j]
            centroid_val = sum/num_item
            if (centroid_val== centroid[i]):
                count = 0
            else:
                centroid[i] = centroid_val
                print "New centroid value : ", centroid[i]

    # print out the final results
    print "*********Final clustering**********"
    for i in range (k):
         print "Group of centroid[",i,"]: ","centroid= ",centroid[i]
         for j in range (n):
             if (i == data_group[j]):
                 print "Value: ", data[j], "of group: ", data_group[j]
    
# calculate the distance between 2 points    
def dis (a,b):
    dis = abs (a-b)
    return (dis)
    
def main():
    n = input ("Enter the number of objects you want to cluster: ")
    data = getObjects(n)
    data = cluster(data, n)
    
main()
    
        
    
        
