Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Hierarchical Clustering #27

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions Hierarchical_clustering/Mall_Customers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0001,Male,19,15,39
0002,Male,21,15,81
0003,Female,20,16,6
0004,Female,23,16,77
0005,Female,31,17,40
0006,Female,22,17,76
0007,Female,35,18,6
0008,Female,23,18,94
0009,Male,64,19,3
0010,Female,30,19,72
0011,Male,67,19,14
0012,Female,35,19,99
0013,Female,58,20,15
0014,Female,24,20,77
0015,Male,37,20,13
0016,Male,22,20,79
0017,Female,35,21,35
0018,Male,20,21,66
0019,Male,52,23,29
0020,Female,35,23,98
0021,Male,35,24,35
0022,Male,25,24,73
0023,Female,46,25,5
0024,Male,31,25,73
0025,Female,54,28,14
0026,Male,29,28,82
0027,Female,45,28,32
0028,Male,35,28,61
0029,Female,40,29,31
0030,Female,23,29,87
0031,Male,60,30,4
0032,Female,21,30,73
0033,Male,53,33,4
0034,Male,18,33,92
0035,Female,49,33,14
0036,Female,21,33,81
0037,Female,42,34,17
0038,Female,30,34,73
0039,Female,36,37,26
0040,Female,20,37,75
0041,Female,65,38,35
0042,Male,24,38,92
0043,Male,48,39,36
0044,Female,31,39,61
0045,Female,49,39,28
0046,Female,24,39,65
0047,Female,50,40,55
0048,Female,27,40,47
0049,Female,29,40,42
0050,Female,31,40,42
0051,Female,49,42,52
0052,Male,33,42,60
0053,Female,31,43,54
0054,Male,59,43,60
0055,Female,50,43,45
0056,Male,47,43,41
0057,Female,51,44,50
0058,Male,69,44,46
0059,Female,27,46,51
0060,Male,53,46,46
0061,Male,70,46,56
0062,Male,19,46,55
0063,Female,67,47,52
0064,Female,54,47,59
0065,Male,63,48,51
0066,Male,18,48,59
0067,Female,43,48,50
0068,Female,68,48,48
0069,Male,19,48,59
0070,Female,32,48,47
0071,Male,70,49,55
0072,Female,47,49,42
0073,Female,60,50,49
0074,Female,60,50,56
0075,Male,59,54,47
0076,Male,26,54,54
0077,Female,45,54,53
0078,Male,40,54,48
0079,Female,23,54,52
0080,Female,49,54,42
0081,Male,57,54,51
0082,Male,38,54,55
0083,Male,67,54,41
0084,Female,46,54,44
0085,Female,21,54,57
0086,Male,48,54,46
0087,Female,55,57,58
0088,Female,22,57,55
0089,Female,34,58,60
0090,Female,50,58,46
0091,Female,68,59,55
0092,Male,18,59,41
0093,Male,48,60,49
0094,Female,40,60,40
0095,Female,32,60,42
0096,Male,24,60,52
0097,Female,47,60,47
0098,Female,27,60,50
0099,Male,48,61,42
0100,Male,20,61,49
0101,Female,23,62,41
0102,Female,49,62,48
0103,Male,67,62,59
0104,Male,26,62,55
0105,Male,49,62,56
0106,Female,21,62,42
0107,Female,66,63,50
0108,Male,54,63,46
0109,Male,68,63,43
0110,Male,66,63,48
0111,Male,65,63,52
0112,Female,19,63,54
0113,Female,38,64,42
0114,Male,19,64,46
0115,Female,18,65,48
0116,Female,19,65,50
0117,Female,63,65,43
0118,Female,49,65,59
0119,Female,51,67,43
0120,Female,50,67,57
0121,Male,27,67,56
0122,Female,38,67,40
0123,Female,40,69,58
0124,Male,39,69,91
0125,Female,23,70,29
0126,Female,31,70,77
0127,Male,43,71,35
0128,Male,40,71,95
0129,Male,59,71,11
0130,Male,38,71,75
0131,Male,47,71,9
0132,Male,39,71,75
0133,Female,25,72,34
0134,Female,31,72,71
0135,Male,20,73,5
0136,Female,29,73,88
0137,Female,44,73,7
0138,Male,32,73,73
0139,Male,19,74,10
0140,Female,35,74,72
0141,Female,57,75,5
0142,Male,32,75,93
0143,Female,28,76,40
0144,Female,32,76,87
0145,Male,25,77,12
0146,Male,28,77,97
0147,Male,48,77,36
0148,Female,32,77,74
0149,Female,34,78,22
0150,Male,34,78,90
0151,Male,43,78,17
0152,Male,39,78,88
0153,Female,44,78,20
0154,Female,38,78,76
0155,Female,47,78,16
0156,Female,27,78,89
0157,Male,37,78,1
0158,Female,30,78,78
0159,Male,34,78,1
0160,Female,30,78,73
0161,Female,56,79,35
0162,Female,29,79,83
0163,Male,19,81,5
0164,Female,31,81,93
0165,Male,50,85,26
0166,Female,36,85,75
0167,Male,42,86,20
0168,Female,33,86,95
0169,Female,36,87,27
0170,Male,32,87,63
0171,Male,40,87,13
0172,Male,28,87,75
0173,Male,36,87,10
0174,Male,36,87,92
0175,Female,52,88,13
0176,Female,30,88,86
0177,Male,58,88,15
0178,Male,27,88,69
0179,Male,59,93,14
0180,Male,35,93,90
0181,Female,37,97,32
0182,Female,32,97,86
0183,Male,46,98,15
0184,Female,29,98,88
0185,Female,41,99,39
0186,Male,30,99,97
0187,Female,54,101,24
0188,Male,28,101,68
0189,Female,41,103,17
0190,Female,36,103,85
0191,Female,34,103,23
0192,Female,32,103,69
0193,Male,33,113,8
0194,Female,38,113,91
0195,Female,47,120,16
0196,Female,35,120,79
0197,Female,45,126,28
0198,Male,32,126,74
0199,Male,32,137,18
0200,Male,30,137,83
48 changes: 48 additions & 0 deletions Hierarchical_clustering/hc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Hierarchical Clustering

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Mall_Customers.csv')
X = dataset.iloc[:, [3, 4]].values
# y = dataset.iloc[:, 3].values

# Splitting the dataset into the Training set and Test set
"""from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""

# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""

# Using the dendrogram to find the optimal number of clusters
import scipy.cluster.hierarchy as sch
dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()

# Fitting Hierarchical Clustering to the dataset
from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
y_hc = hc.fit_predict(X)

# Visualising the clusters
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()
112 changes: 112 additions & 0 deletions NLP/Live-Twitter-Sentiment-Analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
### Live Twitter Sentiment Analysis ###

# pip install tweepy
# pip install textblob

# Creating Account with Twitter API

## Importing the Libraries ##
from textblob import TextBlob
import sys
import tweepy
import matplotlib.pyplot as plt

## Define a function to calculate the percentage ##
def percentage(part, whole):
return 100 * float(part)/float(whole)

## Connecting with the Twitter API ##

# Importing the keys #
consumerKey = "copy here your keys"
consumerSecret = "copy here your keys"
accessToken = "copy here your keys"
accessTokenSecret = "copy here your keys"

# Establish the connection with API #
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api = tweepy.API(auth)

# Search for the Term and define number of tweets #
searchTerm = input("Enter Keyword/Tag to search about: ")
NoOfTerms = int(input("Enter how many tweets to search: "))

# Get no of tweets and searched term together #
tweets = tweepy.Cursor(api.search, q=searchTerm).items(NoOfTerms)


## Iterate and Analyse the tweets ##

# Working of the TextBlob #
a = TextBlob("I am a bad Cricket player")
a.sentiment.polarity

b = TextBlob("I am a good Cricket player")
b.sentiment.polarity

c = TextBlob("I am a Cricket player")
c.sentiment.polarity

# Create variables to hold the average polarity #

positive = 0
negative = 0
neutral = 0
polarity = 0

for tweet in tweets:
analysis = TextBlob(tweet.text)
polarity += analysis.sentiment.polarity

if(analysis.sentiment.polarity == 0):
neutral += 1

elif(analysis.sentiment.polarity < 0.00):
negative += 1

elif(analysis.sentiment.polarity > 0.00):
positive += 1

# Generate the percentages using previously created function percentage #
positive = percentage(positive, NoOfTerms)
negative = percentage(negative, NoOfTerms)
neutral = percentage(neutral, NoOfTerms)
polarity = percentage(polarity, NoOfTerms)

# Limit the decimal upto 2 places #
positive = format(positive, '.2f')
negative = format(negative, '.2f')
neutral = format(neutral, '.2f')


## Print the result ##

print("How people are reacting on " + searchTerm +
" By analyzing" + str(NoOfTerms) + " Tweets ")

if (polarity == 0):
print("Neutral")

elif (polarity < 0.00):
print("Negative")

elif (polarity > 0.00):
print("Positive")

## Print the Pie Chart ##

labels = ['Positive ['+str(positive)+'%]',
'Neutral ['+str(neutral)+'%]',
'Negative ['+str(negative)+'%]']

sizes = [positive, neutral, negative]
colors = ['yellowgreen', 'gold', 'red']
patches, texts = plt.pie(sizes, colors = colors, startangle = 90)
plt.legend(patches, labels, loc = "best")
plt.title('How people are reacting on '
+searchTerm+' by analyzing '
+str(NoOfTerms)+' Tweets ')
plt.axis('equal')
plt.tight_layout()
plt.show()
Loading