##These functions augment the ismev routines. #This is augmented to support the hurRisk functionality... ###pp.rate calculates the rate given gev parameters mu=x[1],sigma=x[2],xi=x[3] ###pp.rate for y=threshold, is the threshold crossing rate ###updated April 2, 2009 to make independent of ismev... ###latest version #Just to make consistent... a wrapper function for a single call... "epp" <- function(int.df,...){exceed.pp.plot(wind=int.df[,2],npy=nrow(int.df)/(diff(range(int.df[,1]))+1),...)} require(MASS) "get.gpd.fit"<-function(x,y,threshold=NULL,type=c("rl","rp")) { #simple utility function to get either a return period or return level from a vector of either return periods or categories if(is.null(threshold)) stop("need valid threshold") type<-match.arg(type) xx<-c(threshold,x[2:3]) rate<-x[1] if(type=="rl") return(pp.rl(x=xx,y=y,rate=rate)) else if(type=="rp") return(1/pp.rate(x=xx,y=y,rate=rate)) else return("Coding error") } "pp.rate" <-function(x,y,rate=1) { #rate term added for compatibility with GPD where rate=threshold crossing rate * P(X exceeds threshold) thinned Poisson Process. #In this format x[1] is threshold, x[2] is scale, x[3] is shape, denom<-1+ x[3]*(y-x[1])/x[2] ifelse(denom<=0,0,rate*denom^(-1/x[3])) } "pp.rl"<-function(x,y,rate=1) { #rate term added see above lambda<-1/(rate*y) #y is return period rate*y is frequency 1/frequency is probability if (x[3] != 0) x[1] + x[2]/x[3]* (lambda^(-x[3]) - 1) else x[1]-x[2]*log(lambda) } ###pp.sigma, calculates the GPD scale parameter for a vector of thresholds y ###As y increases "pp.sigma"<-function(x,y){x[2]+x[3]*(y-x[1])} ###This is for completeness "pp.xi"<-function(x,y){ rep(x[3],length(y))} "derivparam" <- function(x,f,delta=1.0e-6,y=1){ lx<-length(x) xp<-xn<-x0<-array(rep(x,each=lx),c(lx,lx)) xd<-row(x0)==col(x0) xp[xd]<-x0[xd]+delta/2 xn[xd]<-x0[xd]-delta/2 yp<-apply(xp,1,f,y) yn<-apply(xn,1,f,y) (yp-yn)/delta } get.conf<-function(y,model,p=c(.05,.10,.5,.90,.95),f=c("pp.rate","pp.rl"),digits=0,units="kt",rate=1) { ###y is the return level of interest ###returns confidence interval on return period for storms of a given strength or stronger. f<-match.arg(f) ff<-get(f) p0<-ff(x=model$coef,y=y,rate=rate) sigma<-get.sigma(mle=model$coef,cov=model$cov,y=y,f=ff) nr<-length(p0) nc<-length(p) if(f=="pp.rate") { q<-round(1/qnorm(rep(1-p,each=nr),mean=rep(p0,nc),sd=rep(sigma,nc)),digits=digits) ###use 1/return rate, don't use delta on return period q<-ifelse(q<0,Inf,q) } else q<-round(qnorm(rep(p,each=nr),mean=rep(p0,nc),sd=rep(sigma,nc)),digits=digits) dim(q)<-c(nr,nc) colnames(q)<-paste(round(p,3)*100,"%",sep="") rownames(q)<-paste(format(y),units,sep=" ") return(q) } get.sigma<-function(mle,cov,y,f){dd<-derivparam(x=mle,f=f,y=y); return(sqrt(apply(dd,1,q.form,m=cov)))} "np.fit"<-function(x, rate) { return(1 - exp( - rate * (1 - ((rank(x) - 0.5)/length(x))))) } "pp.pois.rl" <- function(a, mat = NULL, dat, rate = 1, minq = 0, addtext = "for 29N 89W", plot.lower = NULL, plot.upper = NULL, rp = NULL, plotrl=T, eps=1e-06,...) { ### ### function called by exceed.pp.plot, called in turn by exceed.model.plot, which is called by exceed.model.climates, ### used in exceed.ssc. See exceed.pp.plot for details. ### produces return level curve and 95 % confidence intervals, modified to handle return periods 1,2,...9,10,20,...90,100,200,..,1000 ### ### on usual scale ### rps, return periods of interest a1 <- a a2 <- a a3 <- a a1[1] <- a[1] + eps a2[2] <- a[2] + eps a3[3] <- a[3] + eps if(is.null(rp)) rp <- c(seq(1, 9, by = 1), seq(10, 90, by = 10), seq(100, 1000, by = 100)) f <- exp(-1/rp) ###return period=1/return rate. The return period as the number of years between two events is 1/(1-f) and its about .5 years larger than rp. This matches normal definitions. # f <- c(seq(0.01, 0.09, by = 0.01), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.995, 0.999) q <- gevq(a, 1 - f) keep.q <- q >= minq f <- f[keep.q] q <- q[keep.q] d1 <- (gevq(a1, 1 - f) - q)/eps d2 <- (gevq(a2, 1 - f) - q)/eps d3 <- (gevq(a3, 1 - f) - q)/eps d <- cbind(d1, d2, d3) if(is.null(mat)) v <- 0 else v <- apply(d, 1, q.form, m = mat) upper <- q + 1.96 * sqrt(v) lower <- 2 * q - upper rp <- -1/log(f) rp.emp <- -1/log(1 - np.fit(dat, rate = rate)) rl <- dat if(plotrl) { if(!is.null(plot.lower)) ymin <- plot.lower else ymin <- min(dat, lower) if(!is.null(plot.upper)) ymax <- plot.upper else ymax <- max(dat, upper) plot(rp, q, log = "x", type = "n", xlim = c(1, 1000), ylim = c(ymin, ymax), xlab = "Return Period", ylab = "Return Level",...) title(paste("Return Level Plot", addtext, sep = "\n")) lines(rp, q, ...) lines(rp, upper, col = 4, ...) lines(rp, lower, col = 4, ...) points(rp.emp, dat,err=-1,pch=".",cex=3,col="red") } return(list(return.level.curves = data.frame(rp = rp, lower = lower, q = q, upper = upper), empirical.plot = data.frame( rp = rp.emp, rl = rl), model = list(coef = a, cov = mat))) } exceed.pp.plot<-function(wind,npy=1,threshold=34,addtext="Return Level Plot",xunit="Years",yunit="kt",plotrl=T,...) ##called by exceed.model.plot, documentation provided as needed. { #model.fit<-try(pp.fit(wind,npy=npy,threshold=threshold,show=F)) model.fit<-try(gpd.fit.gev(x=wind,thresh=threshold,time=NULL,ratex=npy)) if(class(model.fit)=="try-error") { cat(paste("No model fit for", addtext,".\n")) return(NULL) } wind.data<-pp.pois.rl(model.fit$pp.mle,model.fit$pp.cov,dat=wind,rate=npy,addtext=addtext,plotrl=plotrl,...) wind.data$gpd.coef<-model.fit$gpd.mle wind.data$gpd.cov<-model.fit$gpd.cov if(plotrl) { mtext(side=1,line=2,xunit) mtext(side=2,line=2,yunit) abline(h=threshold,col=6) } return(wind.data) } ###The functions gpd.to.gev, grad, gpd.to.gev.cov, gpd.fit.gev, were created to convert stable fits with ###using gpd.fit to a GEV points over threshold fit, under the assumption that the threshold is constant. ###These fucntions work only when sigma, xi, rate, are not dependent on covariates. ###Provided "as is" without any documentation. "gpd.to.gev" <- function(parameters = NULL, sigma, xi, rate, thresh) { if(!is.null(parameters)) { rate <- parameters[1] sigma <- parameters[2] xi <- parameters[3] } if(xi == 0) mu = sigma * log(rate) + thresh else mu <- thresh + sigma/xi * (rate^( xi)- 1) sigma <- sigma + xi * (mu - thresh) return(c(mu = mu, sigma = sigma, xi = xi)) } "grad" <- function(f, x, ep = 10^-6, ...) { ###This function performs a gradient derivative, where f(x) returns a vector, then grad(f(x)) is a matrix. ###Note that one column per elemnt of x and one row per element of f(x) so that delta(f(x)) = grad(f,x) %*% deltax ###where delta x is a column ## if y=f(x) then grad(f)[i,j = delta(y[i])/partial(x[j]) eps <- ep * x n <- length(x) x0 <- f(x, ...) m <- matrix(0, length(x0), n) colnames(m) <- names(x) rownames(m) <- names(x0) for(i in 1:n) { x1 <- x x1[i] <- x[i] + eps[i] x2 <- x x2[i] <- x[i] - eps[i] m[, i] <- (f(x1, ...) - f(x2, ...))/(2 * eps[i]) } m } "gpd.to.gev.cov" <- function(parameters, gpd.cov, thresh, time) { grad.gpd.gev <- grad(gpd.to.gev, parameters, thresh = thresh) sigma <- matrix(0, 3, 3) sigma[2:3, 2:3] <- gpd.cov sigma[1, 1] <- parameters[1]/time return(grad.gpd.gev %*% sigma %*% t(grad.gpd.gev)) } gpd.fit.gev<-function(x,thresh,time=NULL,ratex=NULL,show=F) { ##This assumes that the count per time is poission with lambda = rate) xt<-x[x>thresh] if(is.null(time)) { time<-length(x)/ratex } rate<-length(xt)/time var.rate<-rate/time #assume time known xt poisson, then var(xt/time)= var(xt)/time^2 approx n/time^2 = rate/time gpd.dat<-gpd.fit(x,threshold=thresh,show=F) gpd.mle<-c(rate,gpd.dat$mle) names(gpd.mle)<-c("rate","sigma","xi") gpd.cov<-gpd.dat$cov gpd.cov.out<-matrix(0,3,3) gpd.cov.out[2:3,2:3]<-gpd.cov gpd.cov.out[1,1]<-var.rate dimnames(gpd.cov.out)<-rep(list(c("rate","sigma","xi")),2) gpd.mlev<-as.vector(gpd.mle) pp.mle<-gpd.to.gev(parameters=gpd.mlev,thresh=thresh) pp.cov<-gpd.to.gev.cov(parameters=gpd.mlev,gpd.cov=gpd.cov,thresh=thresh,time=time) return(list(gpd.mle=gpd.mle,gpd.cov=gpd.cov.out,pp.mle=pp.mle,pp.cov=pp.cov)) } gevq<- function (a, p) { if (a[3] != 0) a[1] + (a[2] * ((-log(1 - p))^(-a[3]) - 1))/a[3] else a[1] - a[2] * log(-log(1 - p)) } q.form <- function (d, m) { t(as.matrix(d)) %*% m %*% as.matrix(d) } mrl.plot <- function (data, umin = min(data), umax = max(data) - 0.1, conf = 0.95, nint = 100) { x <- xu <- xl <- numeric(nint) u <- seq(umin, umax, length = nint) for (i in 1:nint) { data <- data[data > u[i]] x[i] <- mean(data - u[i]) sdev <- sqrt(var(data)) n <- length(data) xu[i] <- x[i] + (qnorm((1 + conf)/2) * sdev)/sqrt(n) xl[i] <- x[i] - (qnorm((1 + conf)/2) * sdev)/sqrt(n) } plot(u, x, type = "l", xlab = "u", ylab = "Mean Excess", ylim = c(min(xl[!is.na(xl)]), max(xu[!is.na(xu)]))) lines(u[!is.na(xl)], xl[!is.na(xl)], lty = 2) lines(u[!is.na(xu)], xu[!is.na(xu)], lty = 2) } "gpd.fit" <- function (xdat, threshold, npy = 365, ydat = NULL, sigl = NULL, shl = NULL, siglink = identity, shlink = identity, show = TRUE, method = "Nelder-Mead", maxit = 10000, init=NULL,...) { z <- list() npsc <- length(sigl) + 1 npsh <- length(shl) + 1 n <- length(xdat) z$trans <- FALSE if (is.function(threshold)) stop("`threshold' cannot be a function") u <- rep(threshold, length.out = n) if (length(unique(u)) > 1) z$trans <- TRUE xdatu <- xdat[xdat > u] xind <- (1:n)[xdat > u] u <- u[xind] in2 <- sqrt(6 * var(xdat))/pi in1 <- mean(xdat, na.rm = TRUE) - 0.57722 * in2 if (is.null(sigl)) { sigmat <- as.matrix(rep(1, length(xdatu))) siginit <- in2 cnames<-c("sigma") } else { z$trans <- TRUE sigmat <- cbind(rep(1, length(xdatu)), ydat[xind, sigl,drop=F]) siginit <- c(in2, rep(0, length(sigl))) cnames<-paste("sigma",colnames(sigmat),sep=".") } if (is.null(shl)) { shmat <- as.matrix(rep(1, length(xdatu))) shinit <- 0.1 cnames<-c(cnames,"xi") } else { z$trans <- TRUE shmat <- cbind(rep(1, length(xdatu)), ydat[xind, shl,drop=F]) shinit <- c(0.1, rep(0, length(shl))) cnames<-c(cnames,paste("xi",colnames(shmat),sep=".")) } if(is.null(init)) init <- c(siginit, shinit) z$model <- list(sigl, shl) z$link <- deparse(substitute(c(siglink, shlink))) z$threshold <- threshold z$nexc <- length(xdatu) z$data <- xdatu gpd.lik <- function(a) { sc <- siglink(sigmat %*% (a[seq(1, length = npsc)])) xi <- shlink(shmat %*% (a[seq(npsc + 1, length = npsh)])) y <- (xdatu - u)/sc y <- 1 + xi * y if (min(sc) <= 0) l <- 10^6 else { if (min(y) <= 0) l <- 10^6 else { l <- sum(log(sc)) + sum(log(y) * (1/xi + 1)) } } l } x <- optim(init, gpd.lik, hessian = TRUE, method = method, control = list(maxit = maxit, ...)) sc <- siglink(sigmat %*% (x$par[seq(1, length = npsc)])) xi <- shlink(shmat %*% (x$par[seq(npsc + 1, length = npsh)])) z$conv <- x$convergence z$nllh <- x$value z$vals <- cbind(sc, xi, u) if (z$trans) { z$data <- -log(as.vector((1 + (xi * (xdatu - u))/sc)^(-1/xi))) } names(x$par)<-cnames dimnames(x$hessian)<-list(cnames,cnames) z$mle <- x$par z$rate <- length(xdatu)/n z$cov <- ginv(x$hessian) dimnames(z$cov)<-dimnames(x$hessian) z$se <- sqrt(diag(z$cov)) z$ttest<-z$mle/z$se z$n <- n z$npy <- npy z$xdata <- xdat if (show) { if (z$trans) print(z[c(2, 3)]) if (length(z[[4]]) == 1) print(z[4]) print(z[c(5, 7)]) if (!z$conv) print(z[c(8, 10, 11, 13, 14)]) } z$test <- (sc+xi*(xdatu-u))/abs(xi) invisible(z) } identity<-function(x) x gpd.fitrange2<- function (data, umin, umax, nint = 10, show = FALSE) { m <- s <- up <- ul <- matrix(0, nrow = nint, ncol = 2) u <- seq(umin, umax, length = nint) for (i in 1:nint) { z <- gpd.fit(data, u[i], show = show) m[i, ] <- z$mle m[i, 1] <- m[i, 1] - m[i, 2] * u[i] d <- matrix(c(1, -u[i]), ncol = 1) v <- t(d) %*% z$cov %*% d s[i, ] <- z$se s[i, 1] <- sqrt(v) up[i, ] <- m[i, ] + 1.96 * s[i, ] ul[i, ] <- m[i, ] - 1.96 * s[i, ] } names <- c("Modified Scale", "Shape") for (i in 2:1) { um <- max(up[, i]) ud <- min(ul[, i]) plot(u, m[, i], ylim = c(ud, um), xlab = "Threshold", ylab = names[i], type = "b") for (j in 1:nint) lines(c(u[j], u[j]), c(ul[j, i], up[j, i])) } invisible() } ###From HurRisk, but useful widely PredictHurRiskFun<-function(theta,thetaref=NULL,vthetaref,threshold,loglink=F) { #Generates POT Surival function of (Time and Velocity) #The output is a function which can be used to estimate the P(Max_{t<=Time})(Wmax(t))>v) #This assumes that we have N[T] storms in T years with maximum Wmax[i] then P(Max_{i =1,...,N[T]) Wmax[i]>v) #Theta is a vector of parameters (lograte,logsigma, xi) or rate (rate, sigma,xi). #This function requires package cubature to do the integration over R^3 #The integration is over [0,1]^3, via the multivariate normal distribution #z is N(0,I) so x is N(theta,vtheta) #E(f(x)) is found by noting that #if Z has a uniform distribution, then X = g(Z)= theta + qnorm(z)%*%chol(vtheta) has a normal distribution #So E(f(x))= E(f(g(z)) = integral over [0,1]^3 of f(g(z)). Also this integration adapts quickly. #The change of rate and sigma to lograte and logsigma prevents negative values for sigma and rate. #This returns a function (not a value). #This function is then evaluated at different time (in years) and Wmax values #Note that all the variables are stored within the function environment returned. #Correction for logarithm? #Model assumptions #Maximum storm strength follows GPD distribution #Storm frequency above any threshold follows Poisson distribution. if(!loglink) { if(is.null(thetaref)) thetaref<-theta mult<-1/c(thetaref[1:2],1) vthetaref<-vthetaref*outer(mult,mult,"*") #delta method #assume z=exp(y) where y is normal with mean my and variance s2y #and the parameters have mean mx and variance s2x then #set E(z)=mx and VAR(x)=s2x; E(z)= exp(my+s2y/2)=mx; and VAR(z)=E(z)^2*(exp(s2y/2)-1) =s2x so #set s2y=log(s2x/mx^2+1) and my=log(mx)-s2y/2 #for multivariate normals i.e. assume z[1]=exp(y[1]) and z[2]=y[2] and var(y[1],y[2])=sy1y2 #We have VAR(x[1],x[2])=sx1x2 = VAR(exp(y[1]),y[2])= E(exp(y[1]))*sy1y2=mx1*sy1sy2 so sy1sy2=sx1x2/mx1 #for z=exp(y); we have VAR(exp(y))[1,2]=sx1x2= mx1*mx2*(exp(sy1y2)-1) so sy1y2=log(sx1x2/(mx1*mx2)-1) vthetaref[c(1:2),c(1:2)]<-log(vthetaref[c(1:2),c(1:2)]+1) #correction to delta method gives correct variance for exp(x) ltheta<-c(log(theta[1:2])-diag(vthetaref)[1:2]/2,theta[3]) #moment correction subtracted } vthetachol<-t(chol(vthetaref)) out<-function(Wmax,time,N=10000,meanonly=T) { #We use the reference values for the parameters to determine a relative rate adjustment. #In most cases this is the ratio of D/Dref where Dref is the reference distance and D is the #desired distance for example if D=50 and Dref=100, this should be 2. We incorporate the sensitivity of the model by #fitting a model and translating the parameters (log(rate),log(sigma), xi) to match the fitted values for the desired radius, #We use the estimated parameter covariates at the reference radius as the same as those for the desired radius. #Thus the model parameters are assumed #log(mu),log(sigma),xi ~ N(ltheta,varthetaref) #Not used. #relrate<-theta[1]/thetaref[1] #This attempt failed. We will #WmaxM<-pp.rl(x=c(threshold,thetaref[2:3]),y=1/pp.rate(x=c(threshold,theta[2:3]),y=Wmax)) #adjustment for nonlinearities. yy<-rep(Wmax,length(time)) times<-rep(time,each=length(Wmax)) tmp<-function(x) { z<-ltheta+vthetachol%*%qnorm(x) value=pp.vec.rate value<-1-exp(-pp.vec.rate(x=cbind(rate=exp(z[1,]),sigma=exp(z[2,]),xi=z[3,]),u=threshold,y=yy,time=times)) dim(value)<-c(N,length(Wmax),length(time)) dimnames(value)<-list(sample=1:N,Wmax=Wmax,time=time) if(meanonly) return(apply(value,c(2,3),mean)) else return(value) } s<-runif(3*N) dim(s)<-c(3,N) tmp(s) } return(out) } pp.vec.rate<-function(x,u,y,time=NULL,Dimnames=F) { #paramters rate, sigma, xi as columns in matrix, u is threshold, y is wind speed #rate term added for compatibility with GPD where rate=threshold crossing rate * P(X exceeds threshold) thinned Poisson Process. #In this format x[,1] is threshold, x[,2] is scale, x[,3] is shape, #time is a vector one value for each y value. This allows us to find the rate for a fixed time in years if(!is.null(time)) { if(length(time)==1) time=rep(time,length(y)) if(length(time) != length(y)) stop("Relative Rate length should be 1 or length of y") } denom<- 1+ outer(x[,3]/x[,2],(y-u),"*") ones<-rep(1,length(y)) test<-denom<=0 out<-outer(x[,1],ones,"*")*denom^(outer(-1/x[,3],ones,"*")) out[test]<-0 if(!is.null(time)) out<-out*outer(rep(1,nrow(x)),time,"*") if(Dimnames) { colnames(out)<-paste(y,round(timerate),sep+".") rownames(out)<-rownames(x) } out }