# Fitting a Copula to Data
# ________________________


# 1. Data Transformations
# _______________________

# R libraries include special commands that are not commonly used. 

library("copula")      
library("mFilter")     
library("stats")
library("e1071")         # for skewness
library(VineCopula)      # for BiCopGofTest()
library("ggExtra")       # for marginal histograms
library("ggplot2")       # for marginal histograms

#mydata <- read.csv("K:/AIMS South Africa 2019/Data/WheatYield.csv", header=TRUE)
mydata <- read.csv("../Data/WheatYield.csv", header=TRUE)
head(mydata)      # Shows the top of the data file.
attach(mydata)    # Variables in the datafile can be directly accessed 
                  # using their names. 

YieldNSWts <- ts(YieldNSW,frequency=1,start=c(1861,1))
YieldVICts <- ts(YieldVIC,frequency=1,start=c(1861,1))
YieldWAts <- ts(YieldWA,frequency=1,start=c(1861,1))

plot(YieldNSWts, type = "l")
abline(h = 1.0, lty=1)
plot(YieldVICts, type = "l")
abline(h = 1.0, lty=1)
plot(YieldWAts, type = "l")
abline(h = 1.0, lty=1)

# Use a nonlinear trend line (HP filter) and draw two charts showing 
# the yield data and trend, and the cyclical deviations from the trend.

YieldNSW.hp <- hpfilter(YieldNSWts,freq=20000,type=c("lambda"),drift=FALSE) 
plot(YieldNSW.hp)
plot(YieldNSW.hp$cycle, type="l")
abline(h = 0, lty=1)
YieldVIC.hp <- hpfilter(YieldVICts,freq=20000,type=c("lambda"),drift=FALSE) 
plot(YieldVIC.hp)
plot(YieldVIC.hp$cycle, type = "l")
abline(h=0,lty=1)
YieldWA.hp <- hpfilter(YieldWAts,freq=20000,type=c("lambda"),drift=FALSE) 
plot(YieldWA.hp)
plot(YieldWA.hp$cycle, type = "l")
abline(h=0,lty=1)

# Draw scatter plots of wheat yields across states.  

plot(YieldNSW.hp$cycle, YieldWA.hp$cycle,
     xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5),
     xy.labels = FALSE, col = "blue", 
     main = "Wheat Yield: WA and NSW", 
     cex = 0.8, cex.main = 1.0, cex.axis = 1.0, cex.lab = 1.0)
plot(YieldVIC.hp$cycle, YieldWA.hp$cycle,
     xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5),
     xy.labels = FALSE, col = "blue", 
     main = "Wheat Yield: WA and VIC", 
     cex = 0.8)
plot(YieldNSW.hp$cycle, YieldVIC.hp$cycle,
     xlim = c(-1.5, 1.5), ylim = c(-1.5, 1.5),
     xy.labels = FALSE, col = "blue", 
     main = "Wheat Yield: NSW and VIC", 
     cex = 0.8)

# Create data matrices with the detrended wheat yields for each pair
# of states. 

NSWWA <- cbind(YieldNSW.hp$cycle, YieldWA.hp$cycle)
# Or: NSWWA <- matrix(c(YieldNSW.hp$cycle, YieldWA.hp$cycle), ncol = 2)
head(NSWWA)             # Check the data matrix NSWA
dim(NSWWA) 

VICWA <- cbind(YieldVIC.hp$cycle, YieldWA.hp$cycle)
head(VICWA)
dim(VICWA) 

NSWVIC <- cbind(YieldNSW.hp$cycle, YieldVIC.hp$cycle)
head(NSWVIC)
dim(NSWVIC) 

# Correlations between wheat yields across states:

cor(NSWWA, method="pearson")
cor(NSWWA, method="kendall") 
cor(NSWWA, method="spearman")

cor(VICWA, method="pearson")
cor(VICWA, method="kendall") 
cor(VICWA, method="spearman")

cor(NSWVIC, method="pearson")
cor(NSWVIC, method="kendall") 
cor(NSWVIC, method="spearman")


# 2. Fitting a Gaussian Copula 
# ____________________________

# The following code uses the empirical distribution function for 
# the pseudo-observations. 

# Define a bivariate Gaussian (normal) normal copula and fit it to 
# the pseudo-observations for NSW and WA using maximum likelihood.

normal.cop <- normalCopula(dim=2)
set.seed(7)
fit.cop <- fitCopula(normal.cop,pobs(NSWWA),method="ml")
rho <-coef(fit.cop)
fit.cop
AIC(fit.cop)          # AIC information criterion
BIC(fit.cop)          # Bayesian information criterion

# Repeat the preceding steps for VIC and WA. 

normal.cop <- normalCopula(dim=2)
fit.cop <- fitCopula(normal.cop,pobs(VICWA),method="ml")
rho <-coef(fit.cop)
rho
fit.cop
AIC(fit.cop)          
BIC(fit.cop)        

# Repeat the preceding steps for NSW and VIC. 

normal.cop <- normalCopula(dim=2)
set.seed(7)
fit.cop <- fitCopula(normal.cop,pobs(NSWVIC),method="ml")
rho <-coef(fit.cop)
rho
fit.cop
AIC(fit.cop)
BIC(fit.cop)


# 3. Fitting a Student t Copula
# _____________________________

t.cop <- tCopula(dim=2)
set.seed(7)
fit.cop <- fitCopula(t.cop,pobs(NSWWA),method="ml")
rho <-coef(fit.cop)
rho
fit.cop
AIC(fit.cop)          
BIC(fit.cop)          

t.cop <- tCopula(dim=2,df=3)
set.seed(7)
fit.cop <- fitCopula(t.cop,pobs(VICWA))
rho <-coef(fit.cop)
rho
fit.cop
AIC(fit.cop)          
BIC(fit.cop)          

t.cop <- tCopula(dim=2)
set.seed(7)
fit.cop <- fitCopula(t.cop,pobs(NSWVIC),method="ml")
rho <-coef(fit.cop)
rho
fit.cop
AIC(fit.cop)          
BIC(fit.cop)          


# 4. Empirical Distribution Function
# __________________________________

# Draw histograms for marginal probability density functions:

# hist(YieldNSW.hp$cycle, breaks=20)                          
# hist(YieldVIC.hp$cycle, breaks=20)
# hist(YieldWA.hp$cycle, breaks=20) 

hist(YieldNSW.hp$cycle, breaks=25,         
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Marginal PDF of Wheat Yield in NSW",
     xlab="Wheat Yield (t/ha)", ylab="frequency")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

hist(YieldVIC.hp$cycle, breaks=25,         
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Marginal PDF of Wheat Yield in VIC",
     xlab="Wheat Yield (t/ha)", ylab="frequency")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

hist(YieldWA.hp$cycle, breaks=25,
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Figure 6. Marginal PDF of Wheat Yield in WA",
     xlab="Wheat Yield (t/ha)", ylab="f(x2)")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

# Draw empirical CDFs or the marginal distributions:

Fm <- ecdf(YieldNSW.hp$cycle)   
summary(Fm) 
Fi <- ecdf(YieldVIC.hp$cycle)   
summary(Fm)
Fj <- ecdf(YieldWA.hp$cycle)
summary(Fi)

plot(Fm, verticals = TRUE, do.points = FALSE,
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Figure 7. Empirical Marginal CDF of Wheat Yield in NSW",
     xlab="Wheat Yield in NSW (t/ha)", ylab="Probability")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

plot(Fi, verticals = TRUE, do.points = FALSE,
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Figure 7. Empirical Marginal CDF of Wheat Yield in VIC",
     xlab="Wheat Yield in VIC (t/ha)", ylab="Probability")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

plot(Fj, verticals = TRUE, do.points = FALSE,
     cex = 0.6, cex.main = 1.0,cex.axis = 0.9, cex.lab = 0.9, 
     main="Figure 8. Empirical Marginal CDF of Wheat Yield in WA",
     xlab="Wheat Yield in WA (t/ha)", ylab="Probability")
# mtext("Add subtitle", cex=0.9, font=3, lheight=10, line=0.4)

# Draw marginal histograms

df1 <- data.frame(x = YieldNSW.hp$cycle, y = YieldWA.hp$cycle)
p1 <- ggplot(df1, aes(x, y)) + geom_point() + theme_bw()
p1
ggMarginal(p1 + 
   ggtitle("Marginal Distributions", subtitle = "NSW and WA") +
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) +
   xlab("Wheat Yield NSW (t/ha)") + ylab("Wheat Yield WA (t/ha)"), type = "histogram")

df2 <- data.frame(x = YieldVIC.hp$cycle, y = YieldWA.hp$cycle)
p2 <- ggplot(df2, aes(x, y)) + geom_point() + theme_bw()
p2
ggMarginal(p2 + 
   ggtitle("Marginal Distributions", subtitle = "VIC and WA") +
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) +
   xlab("Wheat Yield VIC (t/ha)") + ylab("Wheat Yield WA (t/ha)"), type = "histogram")

df3 <- data.frame(x = YieldNSW.hp$cycle, y = YieldVIC.hp$cycle)
p3 <- ggplot(df3, aes(x, y)) + geom_point() + theme_bw()
p3
ggMarginal(p3 + 
   ggtitle("Marginal Distributions", subtitle = "NSW and VIC") +
   theme(plot.title = element_text(hjust = 0.5)) +
   theme(plot.subtitle = element_text(hjust = 0.5, face = "italic")) +
   xlab("Wheat Yield NSW (t/ha)") + ylab("Wheat Yield VIC (t/ha)"), type = "histogram")