Kinetic Energy of Tornadoes in the United States

James B. Elsner & Tyler Fricker

Code in support of our paper in review with PLoS ONE.

Set working directory and load packages.

setwd("~/Dropbox/Tornadoes")
library(ggplot2)
library(ggmap)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rgdal)
## Loading required package: sp
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj

NRC EF3 model

library(rgeos)
## rgeos version: 0.3-4, (SVN revision 438)
##  GEOS runtime version: 3.3.3-CAPI-1.7.4 
##  Polygon checking: TRUE
EF0 = matrix(
     c(0, 0, 200, 200, 0, 0, 50, 50, 0, 0),
     nrow = 5,
     ncol = 2)
EF1 = matrix(
     c(52.9, 52.9, 147.1, 147.1, 52.9, 13.225, 36.775, 36.775, 13.225, 13.225),
     nrow = 5,
     ncol = 2)
EF2 = matrix(
     c(80, 80, 120, 120, 80, 20, 30, 30, 20, 20),
     nrow = 5,
     ncol = 2)
EF3 = matrix(
     c(93.3, 93.3, 106.7, 106.7, 93.3, 23.325, 26.675, 26.675, 23.325, 23.325),
     nrow = 5,
     ncol = 2)
p0 = Polygon(EF0)
ps0 = Polygons(list(p0), 0)
p1 = Polygon(EF1)
ps1 = Polygons(list(p1), 1)
p2 = Polygon(EF2)
ps2 = Polygons(list(p2), 2)
p3 = Polygon(EF3)
ps3 = Polygons(list(p3), 3)
spsNRC3 = SpatialPolygons(list(ps0, ps1, ps2, ps3))
spsNRC3.df = fortify(spsNRC3)
spsNRC3.df$EF = paste("EF", spsNRC3.df$id, sep = "")
gArea(spsNRC3, byid = TRUE)
##        0        1        2        3 
## 10000.00  2218.41   400.00    44.89
library("wesanderson")
pal = wes.palette(3, name = "Zissou")
pal = c(grey(.75), pal)
ggplot(spsNRC3.df, aes(x = long, y = lat, fill = EF)) +
  geom_polygon() + 
  coord_fixed() + 
  xlab("") + ylab("") +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        strip.background = element_blank(),
        legend.position = "right") +
#  theme_tufte() +
  scale_fill_manual(values = pal) +
  theme(legend.title = element_blank()) 

plot of chunk unnamed-chunk-2

Tornado data

Data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/.

download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
              "tornado.zip", mode = "wb")
unzip("tornado.zip")

Read the tornado data.

TornL = readOGR(dsn = ".", layer = "tornado", 
                stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions
TornL$OM = as.integer(TornL$OM)
TornL$Year = as.integer(TornL$YR)
TornL$Month = as.integer(TornL$MO)
TornL$EF = as.integer(TornL$MAG)
TornL$Date = as.Date(TornL$DATE)
TornL$Length = as.numeric(TornL$LEN) * 1609.34
TornL$Width = as.numeric(TornL$WID) * .9144
TornL$FAT = as.integer(TornL$FAT)
TornL$SLON = as.numeric(TornL$SLON)
TornL$SLAT = as.numeric(TornL$SLAT)
TornL$ELON = as.numeric(TornL$ELON)
TornL$ELAT = as.numeric(TornL$ELAT)
TornL$INJ = as.numeric(TornL$INJ)
TornL$LOSS = as.numeric(TornL$LOSS)
Torn.df = as.data.frame(TornL)
#write.table(Torn.df, file = "Tornadoes.txt", row.names = FALSE)

Total kinetic energy (TKE)

Torn.df = Torn.df %>%
  filter(Year >= 2007) %>%
  mutate(Area = .5 * Length * .5 * Width * pi)
dim(Torn.df)
## [1] 8752   28

Empirical model from Table 3-1 of NRC 2007. Percent area by EF rating for each EF category. Threshold wind speeds (m/s) are lower bound 3 sec gusts on the operational EF Scale (Table 2-1 of NRC2007). Area * 1000 = volume in cubic meters.

perc = c(1, 0, 0, 0, 0, 0, 
         .772, .228, 0, 0, 0, 0,
         .616, .268, .115, 0, 0, 0,
         .529, .271, .133, .067, 0, 0,
         .543, .238, .131, .056, .032, 0,
         .538, .223, .119, .07, .033, .017)
percM = matrix(perc, ncol = 6, byrow = TRUE)
threshW = c(29.06, 38.45, 49.62, 60.8, 74.21, 89.41)
midptW = c(diff(threshW)/2 + threshW[-length(threshW)], threshW[length(threshW)] + 7.5)
midptW
## [1] 33.76 44.03 55.21 67.50 81.81 96.91
ef = Torn.df$EF + 1
EW2 = numeric()
for(i in 1:length(ef)){
  EW2[i] = midptW^2 %*% percM[ef[i], ]
  }
Torn.df = Torn.df %>%
  mutate(TKE = .5 * EW2 * Area * 1000,
         DPI = Area * (EF + 1),
         TDI = (midptW[ef] * Width)^2)
summary(Torn.df$TKE)/10^9 # gigajoules
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       9      62    1650     383  517000
goftest::ad.test(log10(Torn.df$TKE), 
                 null = 'pnorm',
                 mean = mean(log10(Torn.df$TKE)), 
                 sd = sd(log10(Torn.df$TKE)))
## 
##  Anderson-Darling test of goodness-of-fit
##  Null hypothesis: Normal distribution
##  with parameters mean = 10.7927730358991, sd = 1.16499679732759
## 
## data:  log10(Torn.df$TKE)
## An = 3.479, p-value = 0.01573
quantile(Torn.df$TKE/10^12, prob = c(.9, .95, .99))
##    90%    95%    99% 
##  1.975  5.542 31.916
Torn.df[which.max(Torn.df$TKE), ]
##       OM   YR MO DY       DATE     TIME TZ ST STF STN MAG INJ FAT LOSS
## 4079 140 2010  4 24 2010-04-24 10:09:00  3 LA  22   6   4 146  10  386
##      CLOSS SLAT  SLON  ELAT   ELON    LEN  WID Year Month EF       Date
## 4079 23.52 32.4 -91.3 33.43 -89.04 148.97 3080 2010     4  4 2010-04-24
##      Length Width      Area       TKE       DPI       TDI
## 4079 239743  2816 530302212 5.167e+14 2.652e+09 5.309e+10

Reject the null hypothesis of a lognormal distribution.

Distribution of TKE

library(scales)
ggplot(Torn.df, aes(TKE)) +
  geom_histogram(binwidth = .5, color = "white") +
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                     labels = trans_format("log10", math_format(10^.x))) +
  xlab("Total Kinetic Energy (J)") +
  ylab("Number of Tornadoes")

plot of chunk unnamed-chunk-10

Validation

Compare with DPI and TDI

Torn.df = Torn.df %>%
  mutate(TKErank = length(TKE) - rank(TKE, ties.method = "min") + 1,
         DPIrank = length(DPI) - rank(DPI, ties.method = "min") + 1,
         TDIrank = length(TDI) - rank(TDI, ties.method = "min") + 1)
cor.test(Torn.df$TKE, Torn.df$DPI, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKE and Torn.df$DPI
## t = 795.3, df = 8750, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.9929 0.9934
## sample estimates:
##    cor 
## 0.9932
cor.test(Torn.df$TKE, Torn.df$TDI, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKE and Torn.df$TDI
## t = 98.89, df = 8750, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.7181 0.7347
## sample estimates:
##    cor 
## 0.7265
cor.test(Torn.df$TKE[Torn.df$EF >= 2], Torn.df$DPI[Torn.df$EF >= 2], conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKE[Torn.df$EF >= 2] and Torn.df$DPI[Torn.df$EF >= 2]
## t = 299.9, df = 1114, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.9932 0.9944
## sample estimates:
##    cor 
## 0.9939
cor.test(Torn.df$TKE[Torn.df$EF >= 4], Torn.df$TDI[Torn.df$EF >= 4], conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKE[Torn.df$EF >= 4] and Torn.df$TDI[Torn.df$EF >= 4]
## t = 6.843, df = 64, p-value = 3.502e-09
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.5140 0.7542
## sample estimates:
##  cor 
## 0.65
cor.test(Torn.df$TKErank, Torn.df$DPIrank)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKErank and Torn.df$DPIrank
## t = 1078, df = 8750, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9961 0.9964
## sample estimates:
##    cor 
## 0.9963
cor.test(Torn.df$TKErank, Torn.df$TDIrank)
## 
##  Pearson's product-moment correlation
## 
## data:  Torn.df$TKErank and Torn.df$TDIrank
## t = 169.1, df = 8750, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8701 0.8799
## sample estimates:
##    cor 
## 0.8751
df = tbl_df(Torn.df) %>%
  select(Date, EF, TKE, DPI, TDI, TKErank, DPIrank, TDIrank) %>%
  arrange(desc(TKE))
df
## Source: local data frame [8,752 x 8]
## 
##          Date EF       TKE       DPI       TDI TKErank DPIrank TDIrank
## 1  2010-04-24  4 5.167e+14 2.652e+09 5.309e+10       1       1       4
## 2  2011-04-27  5 3.537e+14 2.014e+09 3.801e+10       2       2       6
## 3  2011-04-27  4 2.362e+14 1.212e+09 3.783e+10       3       3       8
## 4  2011-04-27  4 2.027e+14 1.040e+09 1.109e+10       4       4      45
## 5  2011-04-27  4 1.929e+14 9.899e+08 1.733e+10       5       5      26
## 6  2008-02-05  4 1.811e+14 9.294e+08 9.751e+09       6       6      55
## 7  2011-04-25  2 1.794e+14 6.848e+08 2.143e+10       7      10      20
## 8  2008-05-10  4 1.497e+14 7.680e+08 1.733e+10       8       8      26
## 9  2011-04-27  4 1.443e+14 7.405e+08 6.170e+09       9       9     102
## 10 2012-03-02  3 1.427e+14 6.208e+08 9.512e+09      10      11      57
## ..        ... ..       ...       ...       ...     ...     ...     ...

Correlated with losses, injuries, fatalities

cor.test(Torn.df$TKE, Torn.df$FAT, conf.level = .9)$estimate
##    cor 
## 0.4152
cor.test(Torn.df$TKE, Torn.df$FAT, conf.level = .9)$conf.int
## [1] 0.4005 0.4296
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$DPI, Torn.df$FAT, conf.level = .9)$estimate
##    cor 
## 0.4446
cor.test(Torn.df$DPI, Torn.df$FAT, conf.level = .9)$conf.int
## [1] 0.4304 0.4586
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$TDI, Torn.df$FAT, conf.level = .9)$estimate
##    cor 
## 0.3498
cor.test(Torn.df$TDI, Torn.df$FAT, conf.level = .9)$conf.int
## [1] 0.3343 0.3651
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$TKE, Torn.df$INJ, conf.level = .9)$estimate
##   cor 
## 0.394
cor.test(Torn.df$TKE, Torn.df$INJ, conf.level = .9)$conf.int
## [1] 0.3791 0.4088
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$DPI, Torn.df$INJ, conf.level = .9)$estimate
##    cor 
## 0.4083
cor.test(Torn.df$DPI, Torn.df$INJ, conf.level = .9)$conf.int
## [1] 0.3936 0.4229
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$TDI, Torn.df$INJ, conf.level = .9)$estimate
##    cor 
## 0.3318
cor.test(Torn.df$TDI, Torn.df$INJ, conf.level = .9)$conf.int
## [1] 0.3161 0.3474
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$TKE, Torn.df$LOSS, conf.level = .9)$estimate
##    cor 
## 0.3662
cor.test(Torn.df$TKE, Torn.df$LOSS, conf.level = .9)$conf.int
## [1] 0.3509 0.3814
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$DPI, Torn.df$LOSS, conf.level = .9)$estimate
##    cor 
## 0.3903
cor.test(Torn.df$DPI, Torn.df$LOSS, conf.level = .9)$conf.int
## [1] 0.3753 0.4051
## attr(,"conf.level")
## [1] 0.9
cor.test(Torn.df$TDI, Torn.df$LOSS, conf.level = .9)$estimate
##    cor 
## 0.3387
cor.test(Torn.df$TDI, Torn.df$LOSS, conf.level = .9)$conf.int
## [1] 0.3230 0.3541
## attr(,"conf.level")
## [1] 0.9
Torn.df %>%
  group_by(EF) %>%
  summarize(nT = n())
## Source: local data frame [6 x 2]
## 
##   EF   nT
## 1  0 4994
## 2  1 2642
## 3  2  818
## 4  3  232
## 5  4   57
## 6  5    9

Variation plots

By EF category

df = Torn.df %>%
  group_by(EF) %>%
  summarize(Count = n(),
            TKEef = sum(TKE),
            avgTKE = mean(TKE),
            sd = sd(TKE),
            se = sd/sqrt(Count),
            ciMult = qt(.95/2 + .5, Count - 1),
            ci = se * ciMult)

ggplot(df, aes(x = factor(EF), y = avgTKE/10^12, fill = EF)) + 
  geom_histogram(stat = "identity") + 
  xlab("EF Category") + 
  ylab("Average Kinetic Energy (TJ)") + 
  scale_fill_continuous(low = "#fdd49e", high = "#990000", guide = "none") +
  geom_errorbar(aes(ymin = (avgTKE - se)/10^12, 
                    ymax = (avgTKE + se)/10^12), 
                width = .1) +
  geom_text(aes(label = Count, x = factor(EF), y = 0), 
            data = df, 
            vjust = 1.3, size = 4)

plot of chunk unnamed-chunk-13

Top ten days by TKE

df = Torn.df %>%
  group_by(Date) %>%
  summarize(Count = n(),
            TKEdaily = sum(TKE)/10^12,
            DPIdaily = sum(DPI),
            TDIdaily = sum(TDI)) %>% # terajoules
  mutate(TKEdailyRank = length(TKEdaily) - rank(TKEdaily, ties.method = "min") + 1,
         DPIdailyRank = length(DPIdaily) - rank(DPIdaily, ties.method = "min") + 1,
         TDIdailyRank = length(TDIdaily) - rank(TDIdaily, ties.method = "min") + 1) %>%
  arrange(desc(TKEdaily))
dim(df)
## [1] 1185    8
df
## Source: local data frame [1,185 x 8]
## 
##          Date Count TKEdaily  DPIdaily  TDIdaily TKEdailyRank DPIdailyRank
## 1  2011-04-27   207   2627.0 1.269e+10 3.421e+11            1            1
## 2  2010-04-24    37    655.0 3.224e+09 7.800e+10            2            2
## 3  2011-05-24    48    437.0 2.156e+09 9.209e+10            3            3
## 4  2012-03-02    69    419.6 1.775e+09 3.727e+10            4            4
## 5  2010-05-10    69    381.6 1.568e+09 8.845e+10            5            5
## 6  2012-04-14    85    349.1 1.425e+09 6.009e+10            6            7
## 7  2008-02-05    62    334.5 1.564e+09 2.871e+10            7            6
## 8  2011-04-15    73    319.4 1.233e+09 4.332e+10            8           10
## 9  2008-05-23    79    318.9 1.275e+09 1.172e+11            9            9
## 10 2007-05-04    28    273.0 1.325e+09 1.536e+11           10            8
## ..        ...   ...      ...       ...       ...          ...          ...
## Variables not shown: TDIdailyRank (dbl)
library(ggthemes)
library(wesanderson)
pal = wes.palette(name = "Zissou", type = "continuous")
top = 10
df2 = df[1:top, ]
or = order(df2$TKEdaily, decreasing = FALSE)
df2$DateF = factor(df2$Date, levels = as.character(df2$Date[or]))
ggplot(df2, aes(x = DateF, y = TKEdaily, fill = Count)) +
  geom_histogram(stat = "identity") + 
  coord_flip() + 
  xlab("Date (Year-Month-Day)") + 
  ylab("Total Kinetic Energy of U.S. Tornadoes (TJ)\nRanked by Day") + 
  scale_fill_gradientn(colours = pal(3), name = "Number of\nTornadoes")

plot of chunk unnamed-chunk-15

cor.test(df$TKEdaily, df$Count, conf.level = .9)
## 
##  Pearson's product-moment correlation
## 
## data:  df$TKEdaily and df$Count
## t = 34.39, df = 1183, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 90 percent confidence interval:
##  0.6823 0.7302
## sample estimates:
##    cor 
## 0.7071
df %>%
  mutate(Eff = TKEdaily/Count) %>%
  arrange(desc(Eff))
## Source: local data frame [1,185 x 9]
## 
##          Date Count TKEdaily  DPIdaily  TDIdaily TKEdailyRank DPIdailyRank
## 1  2010-04-24    37  654.999 3.224e+09 7.800e+10            2            2
## 2  2007-04-20     2   30.049 1.186e+08 5.178e+09           74           74
## 3  2011-04-27   207 2626.995 1.269e+10 3.421e+11            1            1
## 4  2008-07-24     4   40.364 1.540e+08 1.976e+09           65           68
## 5  2007-05-04    28  272.954 1.325e+09 1.536e+11           10            8
## 6  2011-05-24    48  437.007 2.156e+09 9.209e+10            3            3
## 7  2009-08-21     1    8.779 2.657e+07 7.944e+08          160          171
## 8  2013-04-11    14  112.979 4.651e+08 8.746e+09           28           27
## 9  2011-01-01     7   55.122 2.315e+08 1.550e+10           56           51
## 10 2013-10-04    18  141.736 6.978e+08 7.396e+10           24           19
## ..        ...   ...      ...       ...       ...          ...          ...
## Variables not shown: TDIdailyRank (dbl), Eff (dbl)

Cumulative daily aggregate energy by year

df = Torn.df %>%
  group_by(Year) %>%
  mutate(TKEc = cumsum(TKE),
         DoY = as.numeric(Date - as.Date(paste(Year, "-01-01", sep = ""))) + 1,
         Date2 = as.POSIXct(as.Date(DoY, origin = "2015-01-01"))) %>%
  select(Date2, Year, TKEc)
df$Year = as.character(df$Year)

ggplot(df, aes(x = Date2, y = TKEc/10^15, color = Year)) +
  geom_line(size = 2, alpha = .75) +
  scale_x_datetime(labels = date_format("%b"), breaks = date_breaks(width = "1 month")) +
  xlab("") + ylab("Cumulative Tornado Energy (PJ)")

plot of chunk unnamed-chunk-17

Monthly energy and frequency

df = Torn.df %>%
  group_by(Month) %>%
  summarize(TKEm = sum(TKE),
            Count = n()) %>%
  mutate(Ma = factor(month.abb[Month], levels = month.abb[1:12]))

pA = ggplot(df, aes(x = Ma, y = TKEm/10^12, fill = Count)) + 
  geom_histogram(stat = "identity") + 
  xlab("Month") + 
  ylab("Kinetic Energy (TJ)") +
  scale_fill_continuous(low = "#9ecae1", high = "#08519c", name = "Number of \nTornadoes")

pB = ggplot(df, aes(x = Ma, y = Count, fill = TKEm/10^12)) + 
  geom_histogram(stat = "identity") + 
  xlab("Month") + 
  ylab("Number of Tornadoes") +
  scale_fill_continuous(low = "#9ecae1", high = "#08519c", name = "Kinetic\nEnergy (TJ)")

source("multiplot.txt")
mat = matrix(c(1, 2), nrow = 2, byrow = TRUE)
pA = pA + ggtitle("a") + theme(plot.title = element_text(hjust = 0))
pB = pB + ggtitle("b") + theme(plot.title = element_text(hjust = 0))              
multiplot(pA, pB, layout = mat)
## Loading required package: grid