load("../chap06/best.use.RData") bestsince1950 = subset(best.use, Yr >= 1950) bestsince1950split = split(bestsince1950, bestsince1950$Sid) #split data set into separate storm tracks each element in the list is a separate track #We are identifying features associated with each track. bestdata = t(sapply(bestsince1950split, function(x){ x1<-unlist(x[1, c("lon", "lat")]) x2<-unlist(x[nrow(x), c("lon", "lat")]) x3<-max(x$WmaxS) x4<-unlist(x[rev(which(x3 == x$WmaxS))[1], c("lon", "lat")]) return(c(x1,x2,x3,x4)) })) colnames(bestdata) = c("FirstLon", "FirstLat", "LastLon", "LastLat", "WmaxS", "MaxLon", "MaxLat") print("We can cluster on all location attributes") #We scale the data so that the cluster method does not weight one feature more than another feature. clusterdata = scale(bestdata) kmeansout<-kmeans(x=clusterdata,3) #We would like to provide the distance as well as the cluster membership. #This function takes the output of kmeans and generates a data frame with three columns #The first column is the track number, the second is the cluster number and the third is the distance. addClusterInfo<-function(cd=clusterdata,kmo=kmeansout,bd=bestdata) { centers=kmo$center[kmo$cluster,] dist=rowMeans((cd-centers)^2) id=1:length(dist) return(data.frame(bd,id=id,dist=dist,cluster=kmo$cluster)) } clusterSelect<-function(x=clusterInfo,n=select) { xsplit<-split(x,x$cluster) #split x by cluster xid<-unlist(lapply(xsplit,function(x) x$id[order(x$dist)[1:n]])) return(subset(x,id%in%xid)) } plotSelected<-function(x=selectedStormInformation,storms=bestsince1950split,colors=c("red","green","blue","black")) { uselat <- range(unlist(lapply(storms[x$id],function(x) x$lat))) uselon <- range(unlist(lapply(storms[x$id],function(x) x$lon))) plot(uselon,uselat,type="n",xlab="longitude",ylab="latitude") x <- x[order(x$cluster,x$dist),] #recalculate cluster and select values nclust <- length(unique(x$clust)) nselect <- nrow(x)/nclust allcolors <- as.vector(sapply(colors[1:nclust],function(x) colorRampPalette(c(x,"grey"),space="Lab")(nselect))) #add color to x x$color <- allcolors #reorder for reweaving x<-x[order(-x$dist,x$clust),] for(i in 1:nrow(x)) { stormid<-x$id[i] #which storm storm<-storms[[stormid]] #Get storm lines(storm$lon,storm$lat,lwd=1,col=x$color[i]) } require("maps") map("world",add=T) map("usa",add=T) } clusterInfo<-addClusterInfo(cd=clusterdata,kmo=kmeansout) select=30 selectedStormInformation<-clusterSelect(x=clusterInfo,n=select) plotSelected()