Monday 9 April 2018

PCA

wine <- read.csv("C:/Users/hp/Desktop/wine.csv", header=FALSE)
View(wine)
library(arules)

colnames(wine)<-(c("No","Alcohol", "Malic acid","Ash","Alcalinity of ash","Magnesium"," Total phenols","Flavanoids","Nonflavanoid phenols","Proanthocyanins","Color intensity","Hue","OD280/OD315 of diluted wines","Proline"))

View(wine)
wine$No<-NULL
wineColnames<-(c("Alcohol", "Malic acid","Ash","Alcalinity of ash","Magnesium"," Total phenols","Flavanoids","Nonflavanoid phenols","Proanthocyanins","Color intensity","Hue","OD280/OD315 of diluted wines","Proline"))

wineColnames
wine1<-wine
View(wine1)
summary(wine1)
wine_cov<-cov(wine1)
wine_cov
wine_cor<-cor(wine1)
wine_cor
View(wine_cov)
wine_eigen<-eigen(wine_cov)
wine_eigen
wine_pca<-princomp(wine1,cor=FALSE)
install.packages('ggfortify')
library(ggfortify)
autoplot(wine_pca)
names(wine_pca)
loadings(wine_pca)
summary(wine_pca)
# sum of SD
sum(wine_pca$sdev)
# varience
sum((wine_pca$sdev)^2)

eigen(wine_cov)$values
mean(eigen(wine_cov)$values)

plot(wine_pca,type="lines")

#from the analysis we understand that only one component plays imp. role, i.e., alcohol
wine_alcohol<-eigen(wine_cov)$vector[,1]
wine_matrix<-as.matrix(wine1)


wine_final<-wine_matrix%*%(wine_alcohol)
wine_final
summary(wine_final)

plot(wine_final,xlab="Index 1 - 178",ylab="Range" )
title("Plot the PCA with Wine$Alcohol")
View(wine_final)

wine_pcakmean<-kmeans(wine_final[],3)
(wine_pcakmean)
library(fpc)
plotcluster(wine1,wine_pcakmean$cluster)
title("Plot the PCA with Wine After Transformation")
table(wine1,wine_pcakmean$cluster)

wine_kmean<-kmeans(wine1[],3)
wine_kmean
plotcluster(wine1,wine_kmean$cluster)
title("Plot the PCA with Wine Before Transformation")
dim(wine_kmean)