#The dataset consists of 100 observations on 14 separate #variables and is an example of a segmentation study for a business-to-bus #iness situation, specifically a survey of existing customers #of HATCO. Three types of information were collected: # First is the perception of HATCO on seven attributes identified # in past studies as the most influential in the # choice of suppliers. The respondents, purchasing managers of firms # buying from HATCO, rated HATCO on each attribute. # Second are actual purchase outcomes, either the evaluations of each # respondent's satisfaction with HATCO or the percentage of his or her # product purchases from HATCO. # Third are general characteristics of the # purchasing companies (e.g., firm size, industry type) ################################### dados=read.csv2("dados/HATCO.csv") dados=dados[,-c(1,ncol(dados))] numericas=sapply(dados,is.numeric) head(dados) # transformar variaveis categoricas em dummies dadosDummies=sapply(dados[,!numericas], function(xx) model.matrix(~xx-1)) matrizDummies=matrix(NA,nrow(dados),0) for(ii in 1:length(dadosDummies)) { matrizDummies=cbind(matrizDummies, dadosDummies[[ii]]) } dadosExt=cbind(dados[,numericas],matrizDummies) dadosExt=scale(dadosExt) # calcular distancia entre observacoes. Estou # usando distancia Euclidiana dados.dist = dist(dadosExt) #### Hierarquico cluster=hclust(dados.dist) plot(cluster, xlab ="" , sub ="" , ylab ="") clusters = cutree(cluster ,3) # descritiva numericas apply(as.matrix(which(numericas)), 1, function(xx) boxplot(dados[,xx]~clusters,main=colnames(dados)[xx]) ) cores=c("darkblue","darkred","darkgreen") # descritiva categoricas apply(as.matrix(which(!numericas)),1,function(xx) { counts=table(dados[,xx],clusters) barplot(counts, legend = rownames(counts),main=colnames(dados)[xx],bty="l",col=cores[1:length(rownames(counts))]) }) # K medias K=3 set.seed(302) clustersK=kmeans(dadosExt,centers = 3) table(clustersK$cluster,clusters) # descritiva numericas apply(as.matrix(which(numericas)),1,function(xx)boxplot(dados[,xx]~clustersK$cluster,main=colnames(dados)[xx])) # descritiva categoricas apply(as.matrix(which(!numericas)),1,function(xx) { counts=table(dados[,xx],clustersK$cluster) barplot(counts, legend = rownames(counts),main=colnames(dados)[xx],bty="l",col=cores[1:length(rownames(counts))]) })