# Fitting a Copula to Data # ________________________ # 1. Data Transformations # _______________________ # R libraries include special commands that are not commonly used. library("copula") library("mFilter") library("stats") library("e1071") # for skewness library(VineCopula) # for BiCopGofTest() library("ggExtra") # for marginal histograms library("ggplot2") # for marginal histograms #mydata <- read.csv("K:/AIMS South Africa 2019/Data/WheatYield.csv", header=TRUE) mydata <- read.csv("../Data/WheatYield.csv", header=TRUE) head(mydata) # Shows the top of the data file. attach(mydata) # Variables in the datafile can be directly accessed # using their names. YieldNSWts <- ts(YieldNSW,frequency=1,start=c(1861,1)) YieldVICts <- ts(YieldVIC,frequency=1,start=c(1861,1)) YieldWAts <- ts(YieldWA,frequency=1,start=c(1861,1)) plot(YieldNSWts, type = "l") abline(h = 1.0, lty=1) plot(YieldVICts, type = "l") abline(h = 1.0, lty=1) plot(YieldWAts, type = "l") abline(h = 1.0, lty=1) # Use a nonlinear trend line (HP filter) and draw two charts showing # the yield data and trend, and the cyclical deviations from the trend. YieldNSW.hp <- hpfilter(YieldNSWts,freq=20000,type=c("lambda"),drift=FALSE) plot(YieldNSW.hp) plot(YieldNSW.hp$cycle, type="l") abline(h = 0, lty=1) YieldVIC.hp <- hpfilter(YieldVICts,freq=20000,type=c("lambda"),drift=FALSE) plot(YieldVIC.hp) plot(YieldVIC.hp$cycle, type = "l") abline(h=0,lty=1) YieldWA.hp <- hpfilter(YieldWAts,freq=20000,type=c("lambda"),drift=FALSE) plot(YieldWA.hp) plot(YieldWA.hp$cycle, type = "l") abline(h=0,lty=1) # Draw scatter plots of wheat yields across states. plot(YieldNSW.hp$cycle, YieldWA.hp$cycle, xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5), xy.labels = FALSE, col = "blue", main = "Wheat Yield: WA and NSW", cex = 0.8, cex.main = 1.0, cex.axis = 1.0, cex.lab = 1.0) plot(YieldVIC.hp$cycle, YieldWA.hp$cycle, xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5), xy.labels = FALSE, col = "blue", main = "Wheat Yield: WA and VIC", cex = 0.8) plot(YieldNSW.hp$cycle, YieldVIC.hp$cycle, xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5), xy.labels = FALSE, col = "blue", main = "Wheat Yield: NSW and VIC", cex = 0.8) # Create data matrices with the detrended wheat yields for each pair # of states. NSWWA <- cbind(YieldNSW.hp$cycle, YieldWA.hp$cycle) # Or: NSWWA <- matrix(c(YieldNSW.hp$cycle, YieldWA.hp$cycle), ncol = 2) head(NSWWA) # Check the data matrix NSWA dim(NSWWA) VICWA <- cbind(YieldVIC.hp$cycle, YieldWA.hp$cycle) head(VICWA) dim(VICWA) NSWVIC <- cbind(YieldNSW.hp$cycle, YieldVIC.hp$cycle) head(NSWVIC) dim(NSWVIC) # Correlations between wheat yields across states: cor(NSWWA, method="pearson") cor(NSWWA, method="kendall") cor(NSWWA, method="spearman") cor(VICWA, method="pearson") cor(VICWA, method="kendall") cor(VICWA, method="spearman") cor(NSWVIC, method="pearson") cor(NSWVIC, method="kendall") cor(NSWVIC, method="spearman") # 2. Fitting a Gaussian Copula # ____________________________ # The following code uses the empirical distribution function for # the pseudo-observations. # Define a bivariate Gaussian (normal) normal copula and fit it to # the pseudo-observations for NSW and WA using maximum likelihood. normal.cop <- normalCopula(dim=2) set.seed(7) fit.cop <- fitCopula(normal.cop,pobs(NSWWA),method="ml") rho <-coef(fit.cop) fit.cop AIC(fit.cop) # AIC information criterion BIC(fit.cop) # Bayesian information criterion # Repeat the preceding steps for VIC and WA. normal.cop <- normalCopula(dim=2) fit.cop <- fitCopula(normal.cop,pobs(VICWA),method="ml") rho <-coef(fit.cop) rho fit.cop AIC(fit.cop) BIC(fit.cop) # Repeat the preceding steps for NSW and VIC. normal.cop <- normalCopula(dim=2) set.seed(7) fit.cop <- fitCopula(normal.cop,pobs(NSWVIC),method="ml") rho <-coef(fit.cop) rho fit.cop AIC(fit.cop) BIC(fit.cop) # 3. Fitting a Student t Copula # _____________________________ t.cop <- tCopula(dim=2) set.seed(7) fit.cop <- fitCopula(t.cop,pobs(NSWWA),method="ml") rho <-coef(fit.cop) rho fit.cop AIC(fit.cop) BIC(fit.cop) t.cop <- tCopula(dim=2,df=3) set.seed(7) fit.cop <- fitCopula(t.cop,pobs(VICWA)) rho <-coef(fit.cop) rho fit.cop AIC(fit.cop) BIC(fit.cop) t.cop <- tCopula(dim=2) set.seed(7) fit.cop <- fitCopula(t.cop,pobs(NSWVIC),method="ml") rho <-coef(fit.cop) rho fit.cop AIC(fit.cop) BIC(fit.cop) # 4. Empirical Distribution Function # __________________________________ # Draw histograms for marginal probability density functions: # hist(YieldNSW.hp$cycle, breaks=20) # hist(YieldVIC.hp$cycle, breaks=20) # hist(YieldWA.hp$cycle, breaks=20) hist(YieldNSW.hp$cycle, breaks=25, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Marginal PDF of Wheat Yield in NSW", xlab="Wheat Yield (t/ha)", ylab="frequency") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) hist(YieldVIC.hp$cycle, breaks=25, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Marginal PDF of Wheat Yield in VIC", xlab="Wheat Yield (t/ha)", ylab="frequency") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) hist(YieldWA.hp$cycle, breaks=25, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Figure 6. Marginal PDF of Wheat Yield in WA", xlab="Wheat Yield (t/ha)", ylab="f(x2)") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) # Draw empirical CDFs or the marginal distributions: Fm <- ecdf(YieldNSW.hp$cycle) summary(Fm) Fi <- ecdf(YieldVIC.hp$cycle) summary(Fm) Fj <- ecdf(YieldWA.hp$cycle) summary(Fi) plot(Fm, verticals = TRUE, do.points = FALSE, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Figure 7. Empirical Marginal CDF of Wheat Yield in NSW", xlab="Wheat Yield in NSW (t/ha)", ylab="Probability") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) plot(Fi, verticals = TRUE, do.points = FALSE, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Figure 7. Empirical Marginal CDF of Wheat Yield in VIC", xlab="Wheat Yield in VIC (t/ha)", ylab="Probability") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) plot(Fj, verticals = TRUE, do.points = FALSE, cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, main="Figure 8. Empirical Marginal CDF of Wheat Yield in WA", xlab="Wheat Yield in WA (t/ha)", ylab="Probability") # mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4) # Draw marginal histograms df1 <- data.frame(x = YieldNSW.hp$cycle, y = YieldWA.hp$cycle) p1 <- ggplot(df1, aes(x, y)) + geom_point() + theme_bw() p1 ggMarginal(p1 + ggtitle("Marginal Distributions", subtitle = "NSW and WA") + theme(plot.title = element_text(hjust = 0.5)) + theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) + xlab("Wheat Yield NSW (t/ha)") + ylab("Wheat Yield WA (t/ha)"), type = "histogram") df2 <- data.frame(x = YieldVIC.hp$cycle, y = YieldWA.hp$cycle) p2 <- ggplot(df2, aes(x, y)) + geom_point() + theme_bw() p2 ggMarginal(p2 + ggtitle("Marginal Distributions", subtitle = "VIC and WA") + theme(plot.title = element_text(hjust = 0.5)) + theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) + xlab("Wheat Yield VIC (t/ha)") + ylab("Wheat Yield WA (t/ha)"), type = "histogram") df3 <- data.frame(x = YieldNSW.hp$cycle, y = YieldVIC.hp$cycle) p3 <- ggplot(df3, aes(x, y)) + geom_point() + theme_bw() p3 ggMarginal(p3 + ggtitle("Marginal Distributions", subtitle = "NSW and VIC") + theme(plot.title = element_text(hjust = 0.5)) + theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) + xlab("Wheat Yield NSW (t/ha)") + ylab("Wheat Yield VIC (t/ha)"), type = "histogram")