#These functions are for reading the global hurricane data only.
#I have added utilities for reading the current best track data.
#We need to stay current on the formats...


read.data<-function(stem,yrs=c(95:99,0:5),...)
{
    files<-paste(stem,format0(yrs,isize=2),sep="")
    remove(".baddata",pos=1)
    data<-lapply(files,read.single.file,...=...)
    dataout<-do.call("rbind",data)
    dataout
}

read.single.file<-function(x,loc=c(1:4,6:11,13,15,17,18,20,21,23),fieldid=c("Yr","Mo","Da","hr","lat","latd","lon","lond","FullName","Course","Speed","minp","Wmax","Wgust","Type","ObsType","WMO"))
{
   full<-read.table(x,header=F,as.is=T,na.strings="???")[loc]
   colnames(full)<-fieldid
   full<-splitWMO(full)
   full<-latlon.combine(full)
   full$dateTime<-makeTimeString(y = full$Yr, m = full$Mo, d = full$Da, h = full$hr%/%1, min = round(100*(full$hr%%1)))
   full<-clean(full)
   full<-purge(full,Wmax=35,keep=2)  #remove all records, keep only one advisory for each time. If multiple advisories keep the second one, if it exists, else keep the third. In all cases keep most complete
   full
}


clean<-function(x)
{
###This is an initial cleaning of the data
###Remove duplicates
###Keep "ACT" type positions, remove forecasts/
###We keep only WMO "WT" Wtype forecasts for tropical cyclone advisories.
###and "ACT" data.
###Remove bad dates from data 
###Order data by date stormname product
#remove #code when working
x<-unique(x)
x<-x[x$ObsType=="ACT",]
x$ObsType<-NULL 
x<-x[x$Wtype=="WT",]
x$Wtype<-NULL
x<-x[x$PN%in%2:3,] ###Only Public Warnings and Forecast Advisories used. 
x<-x[!is.na(x$Wmax),] ###Only keep records with Wmax values, this is not a problem, (Alex-04 only, but have two records for same time one with both Wmax and minp: 2004  8  3 12 34.1 -76.5 ALEX-04     NA    NA  974 )
}

purge<-function(data,Wmax=30,keep=2)
{
#x is a cleaned advisory
#Wmax is the minimum speed in kt to keep a record 
data<-data[data$Wmax>=Wmax,]
idx<-split(1:nrow(data),paste(data$dateTime,data$FullName,sep="-")) 
pns<-lapply(idx,function(i,x) x[i], x=(data$PN==keep)) ##get T if PN==keep F if not
cna<-lapply(idx,function(i,x) apply(x[i,],1,function(x) sum(!is.na(x))),x=data )
keep<-unlist(lapply(1:length(idx),function(i,ii,pn,cn,k)
                 { 
                 out<-pn[[i]] 
                  if(sum(out)==0) ###There are none of the preferred advisory
                  {
                   out<-!out      ###Take all other advisories
                  }
                  if(sum(out)==1) return(ii[[i]][out])   ###If there is a single or preferred advisory take out, 
                  else
                    {  ###now we are in selection mode. Either we have two or more preferred or two or more non preferred without any preferrred.
                    value<-out*cn[[i]]
                    out<-value==max(value)
                    if(sum(out) >= 1) rev(ii[[i]][out])[1]  ###here we have no contest, we just take the last observation...
                    else stop("Error in max(value)")
                    }
                  }                  
                  ,ii=idx,pn=pns,cn=cna,k=keep ) )
return(data[keep,])
}


makeTimeString<-function(y,m,d,h=0,min=0,sec=0)
{
       dateChar<-paste(y,format00(m,2),format00(d,2),sep="-")
       timeChar<-paste(format00(h,2),format00(min,2),format00(sec,2),sep=":")
       dateTimeChar<-paste(dateChar,timeChar,sep=" ")
       return(dateTimeChar)
       return()       
}
makeTimeBase<-function(dateTimeChar,FinCenter="GMT",format="%Y-%m-%d %H:%M:%S")
     {
     ###generates timeDate object only, assumes FinCenter is Zone
     dateTimePosix<-strptime(dateTimeChar,format=format)
     }

makeTime<-function(...,FinCenter="GMT")
{
makeTimeBase(makeTimeString(...),FinCenter=FinCenter,format="%Y-%m-%d %H:%M:%S")
}

addHour <-function(x = world.tracks, start = 0 ,extended=F)
{
    ###This function is called to add hours and Storm id (FullName) to the data set
    ##inputs: x, the data frame, with at least the Yr, Sn,  Mo, Da, hr fields. Note hr field contains minute as fraction
    ##outputs: a new data frame with the field days, and Sid added
    if(is.null(x$Region)) worldtracks<-F
    else
    worldtracks<-T
    if(worldtracks)
    {
        if(is.null(x$FullName))
        {
            FullName<-as.character(x$Sid)
        }
        else
        {
            FullName<-as.character(x$FullName)
            x$FullName<-NULL
        }
        tmp<-unPaste(FullName,"-",extend=F)
        x$name<-as.character(tmp[[1]])
        SYear<-as.numeric(as.character(tmp[[2]]))
        x$SYear<-SYear+1900+100*(SYear < 90)        
        x$pmin<-x$minp
    } 
    else
    {
    x$dateTime<-makeTimeString(y = x$Yr, m = x$Mo, d = x$Da, h = x$hr%/%1, min = 0)
    FullName <- paste(x$SYear, format(x$Sn), sep = ":")
    x$Type<-x$ET
    }
    nobs <- nrow(x)
    obs <- split(1:nobs, FullName) ###current row positions
    x$Shour <- as.vector(unlist(
    get.hours(lapply(obs, function(obsi, x) x[obsi,], x = x ),worldtrack=worldtracks))[order(unlist(obs))]) #match row positions unlist(obs)[order(unlist(obs))]-> 1:nobs
    if(worldtracks)
    {
    ###We fixed this problem in the worldtrack set, it affects mainly the best track data set, the year is incorrect
    }
    else
    {
    badhours <- x$Shour < 0
    x$Shour[badhours] <- x$Shour[badhours] + 365*24 + (x$Yr[badhours] %% 4 == 0)*24 
    x$Yr[badhours]<-x$Yr[badhours]+1 ###test on ALICE 1954 12 30 should go to 1955 1 1
    }
    ids<-names(obs) 
    ids1<-match(ids,unique(FullName))  ##Order by first apperance
    x$Sid <- rep(ids1, sapply(obs, length))[order(unlist(obs))] + start  ##repeat each name by number of observations for each storm, then sort to match input
    out<-x[order(x$Sid),]
    if(worldtracks) #These are the fields we want
    out[c("Sid","SYear","name","Region","Yr", "Mo", "Da", "hr", "Shour", "lat", "lon", "Wmax","Wgust", "pmin", "Course", "Speed", "Type")]
    else
    if(extended)
    out
    else
    out[c("Sid","Sn","SYear","name","Yr","Mo","Da","hr","Shour","lat","lon","Wmax","pmin","Type")]

}

get.hours<- function(y,maxhours=24,worldtrack=T)
{
    ##Simple function to get hours.
    ##inputs y:, data frame with fields Yr, Mo, Da and  hour.minutes (e.g. 6.00 is 6 am, 18.15 is 6:15pm)
    ##output days:, vector of days, from first data point. Called in many places to add days 
    ##For interpretation, y0 is set to the first 6 hour period (00, 06, 12, 18 UT) Thus y-y0 is always between 0 and 6 hours 
        
    y <- lapply(y, function(x,cleanup)
    {
        y <- makeTimeBase(x$dateTime)
        y0<- y[1]
        y0 <- makeTime(y = x$Yr[1], m = x$Mo[1], d = x$Da[1], h = 6*(x$hr[1]%/%6),FinCenter="GMT")
        hours <- as.numeric(y- y0, units = "hours")
        if(cleanup) #Clean up tracks, else return hours... Skip for best track (Should not have gaps)
        {
            badd<- T  ##We will remove all records with missing Wmax in cleaning 
            if(length(hours) > 1)
            {
            bigd<-((diff(hours))>maxhours)
            badd<-(c(bigd,F) | c(F,bigd))   ###Must be missing or bad
            }           
            if(any(badd))
            {
                if(length(badd)==1)
                    warning(paste("      Single record for: ",x$Sid))
                else
                    warning(paste("Missing or Bad data for: ",x$Sid[1]))
                baddata<-list(x[badd,] )
                names(baddata)<-x$Sid[1]
                if(exists(".baddata"))
                    .baddata<<-c(.baddata,baddata)
                else
                .baddata<<-baddata
            }
        } 
        return(hours)
    }
    ,cleanup=worldtrack)
    y
}

latlon.combine<-function(x)
{
   ###x is a data frame with four columns, lat, latdir, lon, londir
   ###if latdir is "S" lat<--lat, if londir is "W" lon<-londir
   x$lat<-ifelse(x$latd=="N",x$lat ,-x$lat)
   x$lon<-ifelse(x$lond=="E",x$lon, -x$lon)
   x$lond<-NULL
   x$latd<-NULL
   return(x)
}

splitWMO<-function(x)
{   
   ###x is a data frame that contains a column "WMO", this function adds four new columns "Wtype", "Region","PN","SN"
   ###where PN is product Number, SN is storm number 1-5 rotates for each region, and Wtype is the WMO type, 
   x$Wtype<-substring(x$WMO,1,2)
   x$Region<-substring(x$WMO,3,4)
   x$PN<-as.integer(substring(x$WMO,5,5))
   x$SN<-as.integer(substring(x$WMO,6,6))
   x$WMO<-NULL
   x
}


format0<-function(x,signs=c("-","",""),digits=0,isize=3)
{ #x is a vector which is formated to have leading zeros in place of characters
  #isize is the integer size, so size is at least isize 
  #get sign  
    signx<-signs[sign(x)+2]
    x<-abs(x)
  #split for better precision...
    if(digits>0)
    {
    xd<-x%%1
    x<-x%/%1
    xds<-paste(".",format00(as.integer(round(xd*10^digits)),digits),sep="")
    }
    else
    xds<-""
    out<-paste(signx,format00(x,isize),xds,sep="")
    out
}

format00<-function(x,n)
{
    xx <- paste(x)
    nn <- nchar(xx)
    n <- max(c(n, nn))
    w <- rep(paste(rep(0, n), collapse = ""), length(x))
    substr(w, n - nn + 1, n) <- xx
    w
}

importBest<-function(bestfile="h:/data/tracks2005.csv")
{   
    best <- read.csv(file = bestfile, as.is=TRUE)
    ###emove nas and names
    best <- best[!is.na(best[, 1]),]
    best$hr<-best$hr%/%100 #Keep hours as 0,6,12,18
        ##Add hours field and fix Year 
    best <- addHour(best)
        best$lon <- -best$lon ##data in best track is in West Longitude...  
        east<-best$lon < -180
        best$lon[east]<-360+best$lon[east]
        best
}


####
BesselDensity<-function (k1, k12, k2, m1, m2, g, x, N = 1000) 
{
    exp(k1 * cos(x - m1)) * integrate(function(y) exp((k2 + k12 * 
        cos(x - m1)) * cos(y - m2)), g, pi, subdivisions = N)$value/(2 * 
        pi * integrate(function(y) besselI(k1 + k12 * cos(y - 
        m2), 0) * exp(k2 * cos(y - m2)), g, pi, subdivisions = N)$value)
}