#These functions are for reading the global hurricane data only. #I have added utilities for reading the current best track data. #We need to stay current on the formats... read.data<-function(stem,yrs=c(95:99,0:5),...) { files<-paste(stem,format0(yrs,isize=2),sep="") remove(".baddata",pos=1) data<-lapply(files,read.single.file,...=...) dataout<-do.call("rbind",data) dataout } read.single.file<-function(x,loc=c(1:4,6:11,13,15,17,18,20,21,23),fieldid=c("Yr","Mo","Da","hr","lat","latd","lon","lond","FullName","Course","Speed","minp","Wmax","Wgust","Type","ObsType","WMO")) { full<-read.table(x,header=F,as.is=T,na.strings="???")[loc] colnames(full)<-fieldid full<-splitWMO(full) full<-latlon.combine(full) full$dateTime<-makeTimeString(y = full$Yr, m = full$Mo, d = full$Da, h = full$hr%/%1, min = round(100*(full$hr%%1))) full<-clean(full) full<-purge(full,Wmax=35,keep=2) #remove all records, keep only one advisory for each time. If multiple advisories keep the second one, if it exists, else keep the third. In all cases keep most complete full } clean<-function(x) { ###This is an initial cleaning of the data ###Remove duplicates ###Keep "ACT" type positions, remove forecasts/ ###We keep only WMO "WT" Wtype forecasts for tropical cyclone advisories. ###and "ACT" data. ###Remove bad dates from data ###Order data by date stormname product #remove #code when working x<-unique(x) x<-x[x$ObsType=="ACT",] x$ObsType<-NULL x<-x[x$Wtype=="WT",] x$Wtype<-NULL x<-x[x$PN%in%2:3,] ###Only Public Warnings and Forecast Advisories used. x<-x[!is.na(x$Wmax),] ###Only keep records with Wmax values, this is not a problem, (Alex-04 only, but have two records for same time one with both Wmax and minp: 2004 8 3 12 34.1 -76.5 ALEX-04 NA NA 974 ) } purge<-function(data,Wmax=30,keep=2) { #x is a cleaned advisory #Wmax is the minimum speed in kt to keep a record data<-data[data$Wmax>=Wmax,] idx<-split(1:nrow(data),paste(data$dateTime,data$FullName,sep="-")) pns<-lapply(idx,function(i,x) x[i], x=(data$PN==keep)) ##get T if PN==keep F if not cna<-lapply(idx,function(i,x) apply(x[i,],1,function(x) sum(!is.na(x))),x=data ) keep<-unlist(lapply(1:length(idx),function(i,ii,pn,cn,k) { out<-pn[[i]] if(sum(out)==0) ###There are none of the preferred advisory { out<-!out ###Take all other advisories } if(sum(out)==1) return(ii[[i]][out]) ###If there is a single or preferred advisory take out, else { ###now we are in selection mode. Either we have two or more preferred or two or more non preferred without any preferrred. value<-out*cn[[i]] out<-value==max(value) if(sum(out) >= 1) rev(ii[[i]][out])[1] ###here we have no contest, we just take the last observation... else stop("Error in max(value)") } } ,ii=idx,pn=pns,cn=cna,k=keep ) ) return(data[keep,]) } makeTimeString<-function(y,m,d,h=0,min=0,sec=0) { dateChar<-paste(y,format00(m,2),format00(d,2),sep="-") timeChar<-paste(format00(h,2),format00(min,2),format00(sec,2),sep=":") dateTimeChar<-paste(dateChar,timeChar,sep=" ") return(dateTimeChar) return() } makeTimeBase<-function(dateTimeChar,FinCenter="GMT",format="%Y-%m-%d %H:%M:%S") { ###generates timeDate object only, assumes FinCenter is Zone dateTimePosix<-strptime(dateTimeChar,format=format) } makeTime<-function(...,FinCenter="GMT") { makeTimeBase(makeTimeString(...),FinCenter=FinCenter,format="%Y-%m-%d %H:%M:%S") } addHour <-function(x = world.tracks, start = 0 ,extended=F) { ###This function is called to add hours and Storm id (FullName) to the data set ##inputs: x, the data frame, with at least the Yr, Sn, Mo, Da, hr fields. Note hr field contains minute as fraction ##outputs: a new data frame with the field days, and Sid added if(is.null(x$Region)) worldtracks<-F else worldtracks<-T if(worldtracks) { if(is.null(x$FullName)) { FullName<-as.character(x$Sid) } else { FullName<-as.character(x$FullName) x$FullName<-NULL } tmp<-unPaste(FullName,"-",extend=F) x$name<-as.character(tmp[[1]]) SYear<-as.numeric(as.character(tmp[[2]])) x$SYear<-SYear+1900+100*(SYear < 90) x$pmin<-x$minp } else { x$dateTime<-makeTimeString(y = x$Yr, m = x$Mo, d = x$Da, h = x$hr%/%1, min = 0) FullName <- paste(x$SYear, format(x$Sn), sep = ":") x$Type<-x$ET } nobs <- nrow(x) obs <- split(1:nobs, FullName) ###current row positions x$Shour <- as.vector(unlist( get.hours(lapply(obs, function(obsi, x) x[obsi,], x = x ),worldtrack=worldtracks))[order(unlist(obs))]) #match row positions unlist(obs)[order(unlist(obs))]-> 1:nobs if(worldtracks) { ###We fixed this problem in the worldtrack set, it affects mainly the best track data set, the year is incorrect } else { badhours <- x$Shour < 0 x$Shour[badhours] <- x$Shour[badhours] + 365*24 + (x$Yr[badhours] %% 4 == 0)*24 x$Yr[badhours]<-x$Yr[badhours]+1 ###test on ALICE 1954 12 30 should go to 1955 1 1 } ids<-names(obs) ids1<-match(ids,unique(FullName)) ##Order by first apperance x$Sid <- rep(ids1, sapply(obs, length))[order(unlist(obs))] + start ##repeat each name by number of observations for each storm, then sort to match input out<-x[order(x$Sid),] if(worldtracks) #These are the fields we want out[c("Sid","SYear","name","Region","Yr", "Mo", "Da", "hr", "Shour", "lat", "lon", "Wmax","Wgust", "pmin", "Course", "Speed", "Type")] else if(extended) out else out[c("Sid","Sn","SYear","name","Yr","Mo","Da","hr","Shour","lat","lon","Wmax","pmin","Type")] } get.hours<- function(y,maxhours=24,worldtrack=T) { ##Simple function to get hours. ##inputs y:, data frame with fields Yr, Mo, Da and hour.minutes (e.g. 6.00 is 6 am, 18.15 is 6:15pm) ##output days:, vector of days, from first data point. Called in many places to add days ##For interpretation, y0 is set to the first 6 hour period (00, 06, 12, 18 UT) Thus y-y0 is always between 0 and 6 hours y <- lapply(y, function(x,cleanup) { y <- makeTimeBase(x$dateTime) y0<- y[1] y0 <- makeTime(y = x$Yr[1], m = x$Mo[1], d = x$Da[1], h = 6*(x$hr[1]%/%6),FinCenter="GMT") hours <- as.numeric(y- y0, units = "hours") if(cleanup) #Clean up tracks, else return hours... Skip for best track (Should not have gaps) { badd<- T ##We will remove all records with missing Wmax in cleaning if(length(hours) > 1) { bigd<-((diff(hours))>maxhours) badd<-(c(bigd,F) | c(F,bigd)) ###Must be missing or bad } if(any(badd)) { if(length(badd)==1) warning(paste(" Single record for: ",x$Sid)) else warning(paste("Missing or Bad data for: ",x$Sid[1])) baddata<-list(x[badd,] ) names(baddata)<-x$Sid[1] if(exists(".baddata")) .baddata<<-c(.baddata,baddata) else .baddata<<-baddata } } return(hours) } ,cleanup=worldtrack) y } latlon.combine<-function(x) { ###x is a data frame with four columns, lat, latdir, lon, londir ###if latdir is "S" lat<--lat, if londir is "W" lon<-londir x$lat<-ifelse(x$latd=="N",x$lat ,-x$lat) x$lon<-ifelse(x$lond=="E",x$lon, -x$lon) x$lond<-NULL x$latd<-NULL return(x) } splitWMO<-function(x) { ###x is a data frame that contains a column "WMO", this function adds four new columns "Wtype", "Region","PN","SN" ###where PN is product Number, SN is storm number 1-5 rotates for each region, and Wtype is the WMO type, x$Wtype<-substring(x$WMO,1,2) x$Region<-substring(x$WMO,3,4) x$PN<-as.integer(substring(x$WMO,5,5)) x$SN<-as.integer(substring(x$WMO,6,6)) x$WMO<-NULL x } format0<-function(x,signs=c("-","",""),digits=0,isize=3) { #x is a vector which is formated to have leading zeros in place of characters #isize is the integer size, so size is at least isize #get sign signx<-signs[sign(x)+2] x<-abs(x) #split for better precision... if(digits>0) { xd<-x%%1 x<-x%/%1 xds<-paste(".",format00(as.integer(round(xd*10^digits)),digits),sep="") } else xds<-"" out<-paste(signx,format00(x,isize),xds,sep="") out } format00<-function(x,n) { xx <- paste(x) nn <- nchar(xx) n <- max(c(n, nn)) w <- rep(paste(rep(0, n), collapse = ""), length(x)) substr(w, n - nn + 1, n) <- xx w } importBest<-function(bestfile="h:/data/tracks2005.csv") { best <- read.csv(file = bestfile, as.is=TRUE) ###emove nas and names best <- best[!is.na(best[, 1]),] best$hr<-best$hr%/%100 #Keep hours as 0,6,12,18 ##Add hours field and fix Year best <- addHour(best) best$lon <- -best$lon ##data in best track is in West Longitude... east<-best$lon < -180 best$lon[east]<-360+best$lon[east] best } #### BesselDensity<-function (k1, k12, k2, m1, m2, g, x, N = 1000) { exp(k1 * cos(x - m1)) * integrate(function(y) exp((k2 + k12 * cos(x - m1)) * cos(y - m2)), g, pi, subdivisions = N)$value/(2 * pi * integrate(function(y) besselI(k1 + k12 * cos(y - m2), 0) * exp(k2 * cos(y - m2)), g, pi, subdivisions = N)$value) }