##These functions augment the ismev routines.
#This is augmented to support the hurRisk functionality...
###pp.rate calculates the rate given gev parameters mu=x[1],sigma=x[2],xi=x[3]
###pp.rate for y=threshold, is the threshold crossing rate
###updated April 2, 2009 to make independent of ismev...
###latest version

#Just to make consistent... a wrapper function for a single call...
"epp" <-
function(int.df,...){exceed.pp.plot(wind=int.df[,2],npy=nrow(int.df)/(diff(range(int.df[,1]))+1),...)}

require(MASS)
"get.gpd.fit"<-function(x,y,threshold=NULL,type=c("rl","rp"))
{
#simple utility function to get either a return period or return level from a vector of either return periods or categories 
if(is.null(threshold)) stop("need valid threshold")
type<-match.arg(type)
xx<-c(threshold,x[2:3])
rate<-x[1]
if(type=="rl")
return(pp.rl(x=xx,y=y,rate=rate))
else
if(type=="rp")
return(1/pp.rate(x=xx,y=y,rate=rate))
else
return("Coding error")
}

"pp.rate" <-function(x,y,rate=1)
{
#rate term added for compatibility with GPD where rate=threshold crossing rate * P(X exceeds threshold) thinned Poisson Process.
#In this format x[1] is threshold, x[2] is scale, x[3] is shape, 
denom<-1+ x[3]*(y-x[1])/x[2]
ifelse(denom<=0,0,rate*denom^(-1/x[3]))
}

"pp.rl"<-function(x,y,rate=1)  
{
#rate term added see above
    lambda<-1/(rate*y) #y is return period rate*y is frequency 1/frequency is probability 
    if (x[3] != 0) 
        x[1] + x[2]/x[3]* (lambda^(-x[3]) - 1)
    else x[1]-x[2]*log(lambda)
}

###pp.sigma, calculates the GPD scale parameter for a vector of thresholds y
###As y increases 
"pp.sigma"<-function(x,y){x[2]+x[3]*(y-x[1])}

###This is for completeness
"pp.xi"<-function(x,y){ rep(x[3],length(y))} 

"derivparam" <-
function(x,f,delta=1.0e-6,y=1){
lx<-length(x)
xp<-xn<-x0<-array(rep(x,each=lx),c(lx,lx))
xd<-row(x0)==col(x0)
xp[xd]<-x0[xd]+delta/2
xn[xd]<-x0[xd]-delta/2
yp<-apply(xp,1,f,y)
yn<-apply(xn,1,f,y)
(yp-yn)/delta
}

get.conf<-function(y,model,p=c(.05,.10,.5,.90,.95),f=c("pp.rate","pp.rl"),digits=0,units="kt",rate=1)
{
###y is the return level of interest
###returns confidence interval on return period for storms of a given strength or stronger.
f<-match.arg(f)
ff<-get(f)
p0<-ff(x=model$coef,y=y,rate=rate)
sigma<-get.sigma(mle=model$coef,cov=model$cov,y=y,f=ff)
nr<-length(p0)
nc<-length(p)
if(f=="pp.rate")
{
q<-round(1/qnorm(rep(1-p,each=nr),mean=rep(p0,nc),sd=rep(sigma,nc)),digits=digits)  ###use 1/return rate, don't use delta on return period
q<-ifelse(q<0,Inf,q)
}
else
q<-round(qnorm(rep(p,each=nr),mean=rep(p0,nc),sd=rep(sigma,nc)),digits=digits)
dim(q)<-c(nr,nc)
colnames(q)<-paste(round(p,3)*100,"%",sep="")
rownames(q)<-paste(format(y),units,sep=" ")
return(q)
}


get.sigma<-function(mle,cov,y,f){dd<-derivparam(x=mle,f=f,y=y); return(sqrt(apply(dd,1,q.form,m=cov)))}  


"np.fit"<-function(x, rate)
{
    return(1 - exp( - rate * (1 - ((rank(x) - 0.5)/length(x)))))
}

"pp.pois.rl" <- function(a, mat = NULL, dat, rate = 1, minq = 0, addtext = "for 29N 89W", plot.lower = NULL, plot.upper = NULL, rp = NULL, plotrl=T, eps=1e-06,...)
{
    ###
    ### function called by exceed.pp.plot, called in turn by exceed.model.plot, which is called by exceed.model.climates,
        ### used in exceed.ssc. See exceed.pp.plot for details.
    ### produces return level curve and 95 % confidence intervals, modified to handle return periods 1,2,...9,10,20,...90,100,200,..,1000
        ### 
        ### on usual scale
    ### rps, return periods of interest 
    a1 <- a
    a2 <- a
    a3 <- a
    a1[1] <- a[1] + eps
    a2[2] <- a[2] + eps
    a3[3] <- a[3] + eps
    if(is.null(rp))
        rp <- c(seq(1, 9, by = 1), seq(10, 90, by = 10), seq(100, 1000, by = 100))
    f <- exp(-1/rp)   ###return period=1/return rate. The return period as the number of years between two events is 1/(1-f) and its about .5 years larger than rp. This matches normal definitions.
    #   f <- c(seq(0.01, 0.09, by = 0.01), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.995, 0.999)
    q <- gevq(a, 1 - f)
    keep.q <- q >= minq
    f <- f[keep.q]
    q <- q[keep.q]
    d1 <- (gevq(a1, 1 - f) - q)/eps
    d2 <- (gevq(a2, 1 - f) - q)/eps
    d3 <- (gevq(a3, 1 - f) - q)/eps
    d <- cbind(d1, d2, d3)
    if(is.null(mat))
        v <- 0
    else v <- apply(d, 1, q.form, m = mat)
    upper <- q + 1.96 * sqrt(v)
    lower <- 2 * q - upper        
    rp <- -1/log(f) 
    rp.emp <- -1/log(1 - np.fit(dat, rate = rate))
    rl <- dat

    if(plotrl)
    {
    if(!is.null(plot.lower))
        ymin <- plot.lower
    else ymin <- min(dat, lower)
    if(!is.null(plot.upper))
        ymax <- plot.upper
    else ymax <- max(dat, upper)
    plot(rp, q, log = "x", type = "n", xlim = c(1, 1000), ylim = c(ymin, ymax), xlab = "Return Period", ylab = 
        "Return Level",...)
    title(paste("Return Level Plot", addtext, sep = "\n"))
    lines(rp, q, ...)
    lines(rp, upper, col = 4, ...)
    lines(rp, lower, col = 4, ...)
    points(rp.emp, dat,err=-1,pch=".",cex=3,col="red")
    }
    return(list(return.level.curves = data.frame(rp = rp, lower = lower, q = q, upper = upper), empirical.plot = data.frame(
        rp = rp.emp, rl = rl), model = list(coef = a, cov = mat)))
}

exceed.pp.plot<-function(wind,npy=1,threshold=34,addtext="Return Level Plot",xunit="Years",yunit="kt",plotrl=T,...)
   ##called by exceed.model.plot, documentation provided as needed.
{
  #model.fit<-try(pp.fit(wind,npy=npy,threshold=threshold,show=F))
  model.fit<-try(gpd.fit.gev(x=wind,thresh=threshold,time=NULL,ratex=npy))
  if(class(model.fit)=="try-error")
    {
      cat(paste("No model fit for", addtext,".\n"))
      return(NULL)
    }
  wind.data<-pp.pois.rl(model.fit$pp.mle,model.fit$pp.cov,dat=wind,rate=npy,addtext=addtext,plotrl=plotrl,...)
  wind.data$gpd.coef<-model.fit$gpd.mle
  wind.data$gpd.cov<-model.fit$gpd.cov
 if(plotrl)
 {
  mtext(side=1,line=2,xunit)
  mtext(side=2,line=2,yunit)
  abline(h=threshold,col=6)
 }
  return(wind.data)
}
###The functions gpd.to.gev,  grad, gpd.to.gev.cov, gpd.fit.gev, were created to convert stable fits with
###using gpd.fit to a GEV points over threshold fit, under the assumption that the threshold is constant.
###These fucntions work only when sigma, xi, rate, are not dependent on covariates.
###Provided "as is" without any documentation. 
"gpd.to.gev" <- function(parameters = NULL, sigma, xi, rate, thresh)
{
  if(!is.null(parameters))
    {
      rate <- parameters[1]
      sigma <- parameters[2]
      xi <- parameters[3]
    }
  if(xi == 0)
    mu = sigma * log(rate) + thresh
  else mu <- thresh + sigma/xi * (rate^( xi)- 1)
  sigma <- sigma + xi * (mu - thresh)
  return(c(mu = mu, sigma = sigma, xi = xi))
}

"grad" <- function(f, x, ep = 10^-6, ...)
{
###This function performs a gradient derivative, where f(x) returns a vector, then grad(f(x)) is a matrix.
###Note that one column per elemnt of x and one row per element of f(x) so that delta(f(x)) = grad(f,x) %*% deltax
###where delta x is a column 
## if y=f(x)  then grad(f)[i,j =  delta(y[i])/partial(x[j])
  eps <- ep * x
  n <- length(x)
  x0 <- f(x, ...)
  m <- matrix(0, length(x0), n)
  colnames(m) <- names(x)
  rownames(m) <- names(x0)
  for(i in 1:n)
    {
      x1 <- x
      x1[i] <- x[i] + eps[i]
      x2 <- x
      x2[i] <- x[i] - eps[i]
      m[, i] <- (f(x1, ...) - f(x2, ...))/(2 * eps[i])
    }
  m
}

"gpd.to.gev.cov" <- function(parameters, gpd.cov, thresh, time)
{
  grad.gpd.gev <- grad(gpd.to.gev, parameters, thresh = thresh)
  sigma <- matrix(0, 3, 3)
  sigma[2:3, 2:3] <- gpd.cov
  sigma[1, 1] <- parameters[1]/time
  return(grad.gpd.gev %*% sigma %*% t(grad.gpd.gev))
}
gpd.fit.gev<-function(x,thresh,time=NULL,ratex=NULL,show=F)
  {
    
    ##This assumes that the count per time  is poission with lambda = rate)
    xt<-x[x>thresh]
    if(is.null(time))
    {
    time<-length(x)/ratex
    }
    rate<-length(xt)/time 
    var.rate<-rate/time   #assume time known xt poisson, then var(xt/time)= var(xt)/time^2 approx n/time^2 = rate/time 
    gpd.dat<-gpd.fit(x,threshold=thresh,show=F)
    gpd.mle<-c(rate,gpd.dat$mle)
    names(gpd.mle)<-c("rate","sigma","xi")
    gpd.cov<-gpd.dat$cov
    gpd.cov.out<-matrix(0,3,3)
    gpd.cov.out[2:3,2:3]<-gpd.cov
    gpd.cov.out[1,1]<-var.rate
    dimnames(gpd.cov.out)<-rep(list(c("rate","sigma","xi")),2)
    gpd.mlev<-as.vector(gpd.mle)
    pp.mle<-gpd.to.gev(parameters=gpd.mlev,thresh=thresh)
    pp.cov<-gpd.to.gev.cov(parameters=gpd.mlev,gpd.cov=gpd.cov,thresh=thresh,time=time)
    return(list(gpd.mle=gpd.mle,gpd.cov=gpd.cov.out,pp.mle=pp.mle,pp.cov=pp.cov))
  }
  
gevq<- function (a, p) 
{
    if (a[3] != 0) 
        a[1] + (a[2] * ((-log(1 - p))^(-a[3]) - 1))/a[3]
    else a[1] - a[2] * log(-log(1 - p))
}

q.form <- function (d, m) 
{
    t(as.matrix(d)) %*% m %*% as.matrix(d)
}

 mrl.plot <- function (data, umin = min(data), umax = max(data) - 0.1, conf = 0.95, nint = 100) 
{
    x <- xu <- xl <- numeric(nint)
    u <- seq(umin, umax, length = nint)
    for (i in 1:nint) {
        data <- data[data > u[i]]
        x[i] <- mean(data - u[i])
        sdev <- sqrt(var(data))
        n <- length(data)
        xu[i] <- x[i] + (qnorm((1 + conf)/2) * sdev)/sqrt(n)
        xl[i] <- x[i] - (qnorm((1 + conf)/2) * sdev)/sqrt(n)
    }
    plot(u, x, type = "l", xlab = "u", ylab = "Mean Excess", 
        ylim = c(min(xl[!is.na(xl)]), max(xu[!is.na(xu)])))
    lines(u[!is.na(xl)], xl[!is.na(xl)], lty = 2)
    lines(u[!is.na(xu)], xu[!is.na(xu)], lty = 2)
}


"gpd.fit" <- 
function (xdat, threshold, npy = 365, ydat = NULL, sigl = NULL, 
    shl = NULL, siglink = identity, shlink = identity, show = TRUE, 
    method = "Nelder-Mead", maxit = 10000, init=NULL,...) 
{
    z <- list()
    npsc <- length(sigl) + 1
    npsh <- length(shl) + 1
    n <- length(xdat)
    z$trans <- FALSE
    if (is.function(threshold)) 
        stop("`threshold' cannot be a function")
    u <- rep(threshold, length.out = n)
    if (length(unique(u)) > 1) 
        z$trans <- TRUE
    xdatu <- xdat[xdat > u]
    xind <- (1:n)[xdat > u]
    u <- u[xind]
    in2 <- sqrt(6 * var(xdat))/pi
    in1 <- mean(xdat, na.rm = TRUE) - 0.57722 * in2
    if (is.null(sigl)) {
        sigmat <- as.matrix(rep(1, length(xdatu)))
        siginit <- in2
        cnames<-c("sigma")
    }
    else {
        z$trans <- TRUE
        sigmat <- cbind(rep(1, length(xdatu)), ydat[xind, sigl,drop=F])
        siginit <- c(in2, rep(0, length(sigl)))
        cnames<-paste("sigma",colnames(sigmat),sep=".")
    }
    if (is.null(shl)) {
        shmat <- as.matrix(rep(1, length(xdatu)))
        shinit <- 0.1
        cnames<-c(cnames,"xi")
    }
    else {
        z$trans <- TRUE
        shmat <- cbind(rep(1, length(xdatu)), ydat[xind, shl,drop=F])        
        shinit <- c(0.1, rep(0, length(shl)))
        cnames<-c(cnames,paste("xi",colnames(shmat),sep="."))
    }
    if(is.null(init))
    init <- c(siginit, shinit)
    z$model <- list(sigl, shl)
    z$link <- deparse(substitute(c(siglink, shlink)))
    z$threshold <- threshold
    z$nexc <- length(xdatu)
    z$data <- xdatu
    gpd.lik <- function(a) {
        sc <- siglink(sigmat %*% (a[seq(1, length = npsc)]))
        xi <- shlink(shmat %*% (a[seq(npsc + 1, length = npsh)]))
        y <- (xdatu - u)/sc
        y <- 1 + xi * y
        if (min(sc) <= 0) 
            l <- 10^6
        else {
            if (min(y) <= 0) 
                l <- 10^6
            else {
                l <- sum(log(sc)) + sum(log(y) * (1/xi + 1))
            }
        }
        l
    }
    x <- optim(init, gpd.lik, hessian = TRUE, method = method, 
        control = list(maxit = maxit, ...))
    sc <- siglink(sigmat %*% (x$par[seq(1, length = npsc)]))
    xi <- shlink(shmat %*% (x$par[seq(npsc + 1, length = npsh)]))
    z$conv <- x$convergence
    z$nllh <- x$value
    z$vals <- cbind(sc, xi, u)
    if (z$trans) {
       
        z$data <- -log(as.vector((1 + (xi * (xdatu - u))/sc)^(-1/xi)))
  
    }
    names(x$par)<-cnames
    dimnames(x$hessian)<-list(cnames,cnames)
    z$mle <- x$par
    z$rate <- length(xdatu)/n
    z$cov <- ginv(x$hessian)
    dimnames(z$cov)<-dimnames(x$hessian)
    z$se <- sqrt(diag(z$cov))
    z$ttest<-z$mle/z$se
    z$n <- n
    z$npy <- npy
    z$xdata <- xdat
    if (show) {
        if (z$trans) 
            print(z[c(2, 3)])
        if (length(z[[4]]) == 1) 
            print(z[4])
        print(z[c(5, 7)])
        if (!z$conv) 
            print(z[c(8, 10, 11, 13, 14)])
    }
            z$test <- (sc+xi*(xdatu-u))/abs(xi)  
    invisible(z)
}
identity<-function(x) x

gpd.fitrange2<-
function (data, umin, umax, nint = 10, show = FALSE) 
{
    m <- s <- up <- ul <- matrix(0, nrow = nint, ncol = 2)
    u <- seq(umin, umax, length = nint)
    for (i in 1:nint) {
        z <- gpd.fit(data, u[i], show = show)
        m[i, ] <- z$mle
        m[i, 1] <- m[i, 1] - m[i, 2] * u[i]
        d <- matrix(c(1, -u[i]), ncol = 1)
        v <- t(d) %*% z$cov %*% d
        s[i, ] <- z$se
        s[i, 1] <- sqrt(v)
        up[i, ] <- m[i, ] + 1.96 * s[i, ]
        ul[i, ] <- m[i, ] - 1.96 * s[i, ]
    }
    names <- c("Modified Scale", "Shape")
    for (i in 2:1) {
        um <- max(up[, i])
        ud <- min(ul[, i])
        plot(u, m[, i], ylim = c(ud, um), xlab = "Threshold", 
            ylab = names[i], type = "b")
        for (j in 1:nint) lines(c(u[j], u[j]), c(ul[j, i], up[j, 
            i]))
    }
    invisible()
}

###From HurRisk, but useful widely

PredictHurRiskFun<-function(theta,thetaref=NULL,vthetaref,threshold,loglink=F)
{
#Generates POT Surival function of (Time and Velocity)
#The output is a function which can be used to estimate the P(Max_{t<=Time})(Wmax(t))>v)
#This assumes that we have N[T] storms in T years with maximum Wmax[i] then P(Max_{i =1,...,N[T]) Wmax[i]>v)
#Theta is a vector of parameters (lograte,logsigma, xi) or rate (rate, sigma,xi). 
#This function requires package cubature to do the integration over R^3
#The integration is over [0,1]^3, via the multivariate normal distribution
#z is N(0,I) so x is N(theta,vtheta) 
#E(f(x)) is found by noting that
#if Z has a uniform distribution, then X = g(Z)= theta + qnorm(z)%*%chol(vtheta) has a normal distribution 
#So E(f(x))= E(f(g(z)) = integral over [0,1]^3 of f(g(z)). Also this integration adapts quickly.
#The change of rate and sigma to lograte and logsigma prevents negative values for sigma and rate.  
#This returns a function (not a value).
#This function is then evaluated at different time (in years) and Wmax values
#Note that all the variables are stored within the function environment returned.  
#Correction for logarithm?
#Model assumptions
#Maximum storm strength follows GPD distribution
#Storm frequency above any threshold follows Poisson distribution.
 
 if(!loglink)
 {
  if(is.null(thetaref))
  thetaref<-theta
  mult<-1/c(thetaref[1:2],1)
  vthetaref<-vthetaref*outer(mult,mult,"*") #delta method
  #assume z=exp(y) where y is normal with mean my and variance s2y
  #and the parameters  have mean mx and variance s2x then
  #set E(z)=mx and VAR(x)=s2x; E(z)= exp(my+s2y/2)=mx; and VAR(z)=E(z)^2*(exp(s2y/2)-1) =s2x so
  #set s2y=log(s2x/mx^2+1) and my=log(mx)-s2y/2
  #for multivariate normals i.e. assume z[1]=exp(y[1]) and z[2]=y[2] and var(y[1],y[2])=sy1y2
  #We have VAR(x[1],x[2])=sx1x2 = VAR(exp(y[1]),y[2])= E(exp(y[1]))*sy1y2=mx1*sy1sy2 so sy1sy2=sx1x2/mx1
  #for z=exp(y); we have VAR(exp(y))[1,2]=sx1x2= mx1*mx2*(exp(sy1y2)-1) so sy1y2=log(sx1x2/(mx1*mx2)-1) 
  vthetaref[c(1:2),c(1:2)]<-log(vthetaref[c(1:2),c(1:2)]+1) #correction to delta method gives correct variance for exp(x)  
  ltheta<-c(log(theta[1:2])-diag(vthetaref)[1:2]/2,theta[3]) #moment correction subtracted
 }
 vthetachol<-t(chol(vthetaref))
 out<-function(Wmax,time,N=10000,meanonly=T) 
 { 
 #We use the reference values for the parameters to determine a relative rate adjustment.
 #In most cases this is the ratio of D/Dref where Dref is the reference distance and D is the 
 #desired distance for example if D=50 and Dref=100, this should be 2. We incorporate the sensitivity of the model by 
 #fitting a model and translating the parameters (log(rate),log(sigma), xi)  to match the fitted values for the desired radius,
 #We use the estimated parameter covariates at the reference radius as the same as those for the desired radius. 
 #Thus the model parameters are assumed
 #log(mu),log(sigma),xi ~ N(ltheta,varthetaref)


 #Not used. 
 #relrate<-theta[1]/thetaref[1] 
 #This attempt failed. We will 
 #WmaxM<-pp.rl(x=c(threshold,thetaref[2:3]),y=1/pp.rate(x=c(threshold,theta[2:3]),y=Wmax)) #adjustment for nonlinearities. 
 
 yy<-rep(Wmax,length(time))
 times<-rep(time,each=length(Wmax))

  tmp<-function(x)
  { 
   z<-ltheta+vthetachol%*%qnorm(x)
   value=pp.vec.rate
   value<-1-exp(-pp.vec.rate(x=cbind(rate=exp(z[1,]),sigma=exp(z[2,]),xi=z[3,]),u=threshold,y=yy,time=times))
   dim(value)<-c(N,length(Wmax),length(time))   
   dimnames(value)<-list(sample=1:N,Wmax=Wmax,time=time)
   if(meanonly) return(apply(value,c(2,3),mean))
   else return(value)
  }
  s<-runif(3*N)
  dim(s)<-c(3,N)
  tmp(s)
 }
 return(out)
}
pp.vec.rate<-function(x,u,y,time=NULL,Dimnames=F)
{
#paramters rate, sigma, xi as columns in matrix, u is threshold, y is wind speed
#rate term added for compatibility with GPD where rate=threshold crossing rate * P(X exceeds threshold) thinned Poisson Process.
#In this format x[,1] is threshold, x[,2] is scale, x[,3] is shape,
#time is a vector one value for each y value. This allows us to find the rate for a fixed time in years
 
if(!is.null(time))
{
if(length(time)==1) time=rep(time,length(y))
if(length(time) != length(y)) stop("Relative Rate length should be 1 or length of y")
}
denom<- 1+ outer(x[,3]/x[,2],(y-u),"*")
ones<-rep(1,length(y))
test<-denom<=0
out<-outer(x[,1],ones,"*")*denom^(outer(-1/x[,3],ones,"*"))
out[test]<-0
if(!is.null(time))
out<-out*outer(rep(1,nrow(x)),time,"*")
if(Dimnames)
{
colnames(out)<-paste(y,round(timerate),sep+".")
rownames(out)<-rownames(x)
}
out
}