load("../chap06/best.use.RData") bestsince1950 = subset(best.use, Yr >= 1950) bestsince1950split = split(bestsince1950, bestsince1950$Sid) #split data set into separate storm tracks each element in the list is a separate track #We are identifying features associated with each track. bestdata = t(sapply(bestsince1950split, function(x){ x1<-unlist(x[1, c("lon", "lat")]) x2<-unlist(x[nrow(x), c("lon", "lat")]) x3<-range(x$lat) x4<-range(x$lon) x5<-max(x$WmaxS) x6<-unlist(x[rev(which(x5 == x$WmaxS))[1], c("lon", "lat")]) return(c(x1,x2,x3,x4,x5,x6)) })) colnames(bestdata) = c("FirstLon", "FirstLat", "LastLon", "LastLat", "MinLat", "MaxLat", "MinLon", "MaxLon", "WmaxS", "lon", "lat") print("We can cluster on all location attributes, but let use only the first 8") #We scale the data so that the cluster method does not weight one feature more than another feature. clusterdata = scale(bestdata[, c(1:4, 9:11)]) kmeansout<-lapply(2:10,kmeans,x=clusterdata) select=10 kmeanstrack<-lapply(kmeansout,function(x) { clusters<-split(1:length(x$cluster),x$cluster) mintrackSid<-lapply(clusters,function(i) { Y<-t(clusterdata[i,,drop=F]) #we reduce set but we must return i[ii] centers<-rowMeans(Y) #get means across each row (variable) dist <- colSums((Y-centers)^2) #subtract centers, and get sums distsort<-sort(dist)[select:1] tracklocation<-i[match(distsort,dist)] #return only one track bestsince1950split[tracklocation] }) } ) print("Are the tracks from the right locations?") print("Do you see a series of increasing numbers... all this means is that we have tracks from each cluster") require(maps) using4<-kmeanstrack[[2]] # 2 is the number of clusters minus 1 uselat<-range(unlist(lapply(using4,function(x) lapply(x,function(x) x$lat)))) uselon<-range(unlist(lapply(using4,function(x) lapply(x,function(x) x$lon)))) plot(uselon,uselat,type="n",xlab="longitude",ylab="latitude") colors=c("red","green","blue") allcolors <- as.vector(sapply(colors,function(x) colorRampPalette(c("grey",x),space="Lab")(select))) location <- 0 lapply(using4,function(x) { lapply(x, function(x) { assign("location",pos=1,location+1) lines(x$lon,x$lat,lwd=2,col=allcolors[location]) } ) } ) map("world",add=T) map("usa",add=T)