This document contains a progression of steps taken for creating a parametric stochastic model of daily precipitation.
We extract data from the original file, downloaded at NASA´s POWER access viewer. In this case, we are using the data given by the user community ‘Agroclimatology’ on Hissar (Haryana, India) between 01/01/1982 and 31/12/2014, containing the ICASA Format’s parameters:
weather <- read.csv("POWER_SinglePoint_Daily_19820101_20181231_029d15N_075d72E_3a7e0417.csv", skip = 17)
barplot(weather$PRECTOT,
ylab = "precipitation (mm)", xlab = "day (in series)")
boxplot(data = weather, PRECTOT ~ DOY, ylab = "precipitation (mm)", xlab = "day (in year)")
hist(weather$PRECTOT[weather$DOY == 201], xlab = "precipitation (mm)")
These plots show the difficulty of creating a parametric model for precipitation:
annualSum <- c()
for (year in levels(factor(weather$YEAR)))
{
annualSum <- c(annualSum, sum(weather$PRECTOT[weather$YEAR == year]))
}
layout(matrix(c(1,2), ncol = 2))
plot(levels(factor(weather$YEAR)), annualSum, type = 'l', xlab = "years", ylab = "annual precipitation (mm)")
abline(h = mean(annualSum), lty = 2)
text(x = levels(factor(weather$YEAR))[0.5 * nlevels(factor(weather$YEAR))],
y = max(annualSum),
labels = paste("mean annual precipitation (mm) = ", round(mean(annualSum), digits = 2) ),
cex = 0.6)
hist(annualSum, xlab = "precipitation (mm)")
The annual precipitation is a good candidate to be a parameter of our model (one that can be randomised every iteration using uniform or normal distribution).
summary_DOY <- function(multiYearSeries, doyName = "DOY", varName = "PRECTOT")
{
meanDailyValues <- c()
sdDailyValues <- c()
minDailyValues <- c()
maxDailyValues <- c()
for (day in levels(factor(multiYearSeries[, doyName])))
{
meanDailyValues <- c(meanDailyValues, mean(multiYearSeries[weather$DOY == day, varName], na.rm = T))
sdDailyValues <- c(sdDailyValues, sd(multiYearSeries[weather$DOY == day, varName], na.rm = T))
minDailyValues <- c(minDailyValues, min(multiYearSeries[weather$DOY == day, varName], na.rm = T))
maxDailyValues <- c(maxDailyValues, max(multiYearSeries[weather$DOY == day, varName], na.rm = T))
}
return(
data.frame(mean = meanDailyValues,
sd = sdDailyValues,
min = minDailyValues,
max = maxDailyValues)
)
}
summaryByDoy_empiric <- summary_DOY(weather)
summary_empiric <- summary(weather$PRECTOT)
summary_empiric$sd <- sd(weather$PRECTOT)
## Warning in summary_empiric$sd <- sd(weather$PRECTOT): Coercing LHS to a
## list
n = 33
maxDOY = 366 # max. days in year. more complex simulation can account better for leap years
dayOfYear <- c()
year_sim <- c()
dailyPrecipitation <- c()
for (i in 1:n)
{
for (day in 1:maxDOY)
{
year_sim <- c(year_sim, i)
dayOfYear <- c(dayOfYear, day)
dailyPrecipitation <- c(dailyPrecipitation,
# exponential model using the mean
(- summaryByDoy_empiric$mean[day]) * log(runif(1))
)
}
}
expModelOutput <- data.frame(year_sim, dayOfYear, dailyPrecipitation)
summaryByDoy_expModel <- summary_DOY(expModelOutput, doyName = "dayOfYear", varName = "dailyPrecipitation")
summary_expModel <- summary(expModelOutput$dailyPrecipitation)
summary_expModel$sd <- sd(expModelOutput$dailyPrecipitation)
## Warning in summary_expModel$sd <- sd(expModelOutput$dailyPrecipitation):
## Coercing LHS to a list
layout(matrix(c(1,2), nrow = 2))
par(mar = c(1,4,2,1))
barplot(weather$PRECTOT,
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(max(summaryByDoy_empiric$max), max(summaryByDoy_expModel$max)))))
text(x = nrow(weather) / 2, y = 0.9 * max(c(max(summaryByDoy_empiric$max), max(summaryByDoy_expModel$max))),
labels = paste(
"min = ", summary_empiric$Min.,
", max = ", round(summary_empiric$Max., digits = 3),
", median = ", summary_empiric$Median,
", mean = ", round(summary_empiric$Mean, digits = 3),
", sd = ", round(summary_empiric$sd, digits = 3)
),
cex = 0.8)
barplot(expModelOutput$dailyPrecipitation,
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(max(summaryByDoy_empiric$max), max(summaryByDoy_expModel$max)))))
text(x = nrow(expModelOutput) / 2, y = 0.9 * max(c(max(summaryByDoy_empiric$max), max(summaryByDoy_expModel$max))),
labels = paste(
"min = ", round(summary_expModel[[1]], digits = 3),
", max = ", round(summary_expModel[[6]], digits = 3),
", median = ", round(summary_expModel[[3]], digits = 3),
", mean = ", round(summary_expModel[[4]], digits = 3),
", sd = ", round(summary_expModel[[7]], digits = 3)
),
cex = 0.8)
layout(matrix(c(1, 2, 3), nrow = 3), heights = c(10, 10, 2))
par(mar = c(3,4,2,1))
plot(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$max, col = "red", type = 'l',
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_expModel$max))))
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$mean)
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$mean + summaryByDoy_empiric$sd, col = "darkorchid4")
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$min, col = "blue")
par(mar = c(4,4,2,1))
plot(1:nrow(summaryByDoy_expModel), summaryByDoy_expModel$max, col = "red", type = 'l',
xlab = "day in year",
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_expModel$max))))
lines(1:nrow(summaryByDoy_expModel), summaryByDoy_expModel$mean)
lines(1:nrow(summaryByDoy_expModel), summaryByDoy_expModel$mean + summaryByDoy_expModel$sd, col = "darkorchid4")
lines(1:nrow(summaryByDoy_expModel), summaryByDoy_expModel$min, col = "blue")
par(mar = c(0,0,0,0))
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.25, y = 0.8,
legend = c('max', 'mean', 'mean+sd', 'min'),
col = c('red', 'black', 'darkorchid4', 'blue'), lty = rep(1, 4),
horiz = T)
FAIL: the simulated data approach the mean but does not reflect the variability of the data, never returning the outliers that characterise the empirical pattern. Results underestimate greatly the overall precipitation and overfit the training data.
Method summary:
First we find the curve of daily cumulative proportion of the annual precipitation for multplie years in the dataset. Then, we use the mean and standard deviation of the values on each day (for multiple years) to create multiple simulated curves using a random normal distribution for each day in a year. Last, we derivate the simulated daily precipitation from the simulated curve of cumulative proportions.
proportionSeries <- c()
cumulativeProportionSeries <- c()
for (year in levels(factor(weather$YEAR)))
{
yearSum = sum(weather$PRECTOT[weather$YEAR == year])
cumPropYear = 0
for (day in levels(factor(weather$DOY)))
{
dayProportion = weather$PRECTOT[weather$YEAR == year & weather$DOY == day] / yearSum
cumPropYear = cumPropYear + dayProportion
proportionSeries <- c(proportionSeries,
dayProportion)
cumulativeProportionSeries <- c(cumulativeProportionSeries,
cumPropYear)
}
}
propToAnnual <- data.frame(proportionSeries, cumulativeProportionSeries)
par(mar = c(4, 4, 1, 1))
barplot(propToAnnual$proportionSeries,
xlab = "day (in series)",
ylab = "proportion of annual precipitation (mm/mm)")
par(mar = c(4, 4, 1, 1))
boxplot(propToAnnual$proportionSeries ~ weather$DOY,
xlab = "day (in year)",
ylab = "proportion of annual precipitation (mm/mm)")
The pattern should be similar to the one observed in precipitation – the difference is that the proportions depend on the value of annual precipitation of the corresponding year.
plot(1:length(propToAnnual$proportionSeries), propToAnnual$cumulativeProportionSeries, type = 'l',
xlab = "day (in series)",
ylab = "cumulative proportion of annual precipitation (mm/mm)")
boxplot(propToAnnual$cumulativeProportionSeries ~ weather$DOY,
xlab = "day (in year)",
ylab = "cumulative proportion of annual precipitation (mm/mm)")
Although limited to the 0-1 interval, the variability of the curve daily values is much more similar to a normal distribution.
cumPropDailyMean <- c()
cumPropDailySD <- c()
cumPropDailyMin <- c()
cumPropDailyMax <- c()
for (day in levels(factor(weather$DOY)))
{
cumPropDailyMean <- c(cumPropDailyMean, mean(propToAnnual$cumulativeProportionSeries[weather$DOY == day]))
cumPropDailySD <- c(cumPropDailySD, sd(propToAnnual$cumulativeProportionSeries[weather$DOY == day]))
cumPropDailyMin <- c(cumPropDailyMin, min(propToAnnual$cumulativeProportionSeries[weather$DOY == day]))
cumPropDailyMax <- c(cumPropDailyMax, max(propToAnnual$cumulativeProportionSeries[weather$DOY == day]))
}
set.seed(0)
n = 33
maxDOY = 366 # max. days in year. more complex simulation can account better for leap years
dayOfYear <- c()
year_sim <- c()
dailyCumPropPrecipitation <- c()
for (i in 1:n)
{
for (day in 1:maxDOY)
{
year_sim <- c(year_sim, i)
dayOfYear <- c(dayOfYear, day)
dailyCumPropPrecipitation <- c(dailyCumPropPrecipitation,
rnorm(1, cumPropDailyMean[day], cumPropDailySD[day])
)
}
}
# limit the values between 0-1
# (this tends to create concentrations in 0 and 1 at the beggining and end of the curve; not undesireble)
dailyCumPropPrecipitation <- sapply(dailyCumPropPrecipitation, function(x) min(1, max(0, x)))
# "correct" negative slopes
# Because the curves represent cumulative values,
# the next value must always be equal or greater than the previous value.
for (i in 1:length(dailyCumPropPrecipitation))
{
if(dayOfYear[i] > 1)
{
if (dailyCumPropPrecipitation[i] < dailyCumPropPrecipitation[i - 1])
{
# adjunst value to the last, if greater
dailyCumPropPrecipitation[i] <- max(dailyCumPropPrecipitation[i], dailyCumPropPrecipitation[i - 1])
# This tends to bend the curve towards the greater values, meaning earlier precipitation events
# (not desirible)
# alternative option: adjust to average between current and last values (UNSOLVED)
#dailyCumPropPrecipitation[i] <- mean(dailyCumPropPrecipitation[i], dailyCumPropPrecipitation[i - 1])
#dailyCumPropPrecipitation[i - 1] <- dailyCumPropPrecipitation[i]
}
}
}
normCumModelOutput <- data.frame(year_sim, dayOfYear, dailyCumPropPrecipitation)
boxplot(propToAnnual$cumulativeProportionSeries ~ weather$DOY, cex = 2,
xlab = "day (in year)",
ylab = "cumulative proportion of annual precipitation (mm/mm)",
main = "EMPIRICAL / SIMULATED")
par(new=TRUE)
boxplot(normCumModelOutput$dailyCumPropPrecipitation ~ normCumModelOutput$dayOfYear, cex = 2,
xlab = "day (in year)",
ylab = "cumulative proportion of annual precipitation (mm/mm)",
border = "blue")
dailyPropPrecipitation <- normCumModelOutput$dailyCumPropPrecipitation
for (i in 1:length(dailyPropPrecipitation))
{
if(normCumModelOutput$dayOfYear[i] > 1) # dailyPropPrecipitation = dailyCumPropPrecipitation, if day = 1
{
dailyPropPrecipitation[i] <- normCumModelOutput$dailyCumPropPrecipitation[i] - normCumModelOutput$dailyCumPropPrecipitation[i - 1]
}
}
normCumModelOutput$dailyPropPrecipitation <- dailyPropPrecipitation
summaryByDoy_normCumModel <- summary_DOY(normCumModelOutput, doyName = "dayOfYear", varName = "dailyPropPrecipitation")
summary_normCumModel <- summary(normCumModelOutput$dailyPropPrecipitation)
summary_normCumModel$sd <- sd(normCumModelOutput$dailyPropPrecipitation)
## Warning in summary_normCumModel$sd <- sd(normCumModelOutput
## $dailyPropPrecipitation): Coercing LHS to a list
annualSum_modelPar = mean(annualSum) # (mm)
summaryPrecip_model <- summary(normCumModelOutput$dailyPropPrecipitation * annualSum_modelPar)
layout(matrix(c(1,2), nrow = 2))
par(mar = c(1,4,2,1))
barplot(weather$PRECTOT,
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar))))
text(x = nrow(weather) / 2, y = 0.9 * max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar)),
labels = paste(
"min = ", round(summary_empiric$Min., digits = 3),
", max = ", round(summary_empiric$Max., digits = 3),
", median = ", round(summary_empiric$Median, digits = 3),
", mean = ", round(summary_empiric$Mean, digits = 3),
", sd = ", round(summary_empiric$sd, digits = 3)
))
barplot(normCumModelOutput$dailyPropPrecipitation * annualSum_modelPar,
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar))))
text(x = nrow(normCumModelOutput) / 2, y = 0.9 * max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar)),
labels = paste(
"min = ", summary_normCumModel$Min. * annualSum_modelPar,
", max = ", round(summary_normCumModel$Max. * annualSum_modelPar, digits = 3),
", median = ", summary_normCumModel$Media * annualSum_modelPar,
", mean = ", round(summary_normCumModel$Mean * annualSum_modelPar, digits = 3),
", sd = ", round(summary_normCumModel$sd, digits = 3)
))
layout(matrix(c(1, 2, 3), nrow = 3), heights = c(10, 10, 2))
par(mar = c(3,4,2,1))
plot(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$max, col = "red", type = 'l',
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar))))
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$mean)
lines(1:nrow(summaryByDoy_empiric),
summaryByDoy_empiric$mean + summaryByDoy_empiric$sd,
col = "darkorchid4")
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$min, col = "blue")
par(mar = c(4,4,2,1))
plot(1:nrow(summaryByDoy_normCumModel), summaryByDoy_normCumModel$max * annualSum_modelPar,
col = "red", type = 'l',
xlab = "day in year",
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_normCumModel$max * annualSum_modelPar))))
lines(1:nrow(summaryByDoy_normCumModel), summaryByDoy_normCumModel$mean * annualSum_modelPar)
lines(1:nrow(summaryByDoy_normCumModel),
(summaryByDoy_normCumModel$mean + summaryByDoy_normCumModel$sd) * annualSum_modelPar,
col = "darkorchid4")
lines(1:nrow(summaryByDoy_normCumModel), summaryByDoy_normCumModel$min * annualSum_modelPar, col = "blue")
par(mar = c(0,0,0,0))
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.25, y = 0.8,
legend = c('max', 'mean', 'mean+sd', 'min'),
col = c('red', 'black', 'darkorchid4', 'blue'), lty = rep(1, 4),
horiz = T)
annualSum_model <- c()
for (year in levels(factor(normCumModelOutput$year_sim)))
{
annualSum_model <- c(annualSum_model, sum(normCumModelOutput$dailyPropPrecipitation[normCumModelOutput$year_sim == year] * annualSum_modelPar))
}
Only to confirm that the daily proportion of annual precipitation always sum up to 1 meaning that the annual precipitation is really controlled as a parameter (i.e. annualSum_modelPar)
layout(matrix(c(1,2,3,4), nrow = 2))
plot(1:length(annualSum), annualSum, type = 'l', xlab = "years", ylab = "precipitation (mm)")
hist(annualSum, xlab = "precipitation (mm)")
plot(1:length(annualSum_model), annualSum_model, type = 'l', xlab = "years", ylab = "precipitation (mm)")
hist(annualSum_model, xlab = "precipitation (mm)")
FAIL: the simulated data reflect well the variability of the data and controls the total amount of precipitation in a year. However, simulated precipitation falls to early and, thus, too concentrated, raising the daily maximum much beyond the empirical maximum while creating longer periods of drought.
Method summary:
The simulated daily precipitation is obtained by using a double logistic curve as the propability function of (Shabani et al. 2018, http://doi.org/10.1017/S0021859617000934).
doubleLogistic <- function(dayOfYear,
plateauValue, inflection1, rate1, inflection2, rate2
#m1, m2, m3, m4, m5, m6, m7
#plateauValue, a, b, c, d
)
{
return(
(plateauValue / (1 + exp((inflection1 - dayOfYear) * rate1))) + ((1 - plateauValue) / (1 + exp((inflection2 - dayOfYear) * rate2)))
# Elmore et al. 2012
# m1 + (m2 - m7 * dayOfYear) * (
# (1 / (1 + exp((m3 - dayOfYear) / m4))) + (1 / (1 + exp((m5 - dayOfYear) / m6)))
# )
# Shabani et al. 2018:
#(plateauValue / (1 + exp(-a * dayOfYear + b))) + ((1 - plateauValue) / (1 + exp(-c * dayOfYear + d)))
)
}
doubleLogisticCurve <- function(start.x, end.x, plateauValue, inflection1, rate1, inflection2, rate2)
{
curve <- c()
for (i in start.x:end.x)
{
curve <- c(curve, doubleLogistic(i, plateauValue, inflection1, rate1, inflection2, rate2))
}
return(curve)
}
Brief parameter exploration:
colours <- rainbow(10)
plateauValues <- round(seq(from = 0.01, to = 0.8, length.out = 10), digits = 2)
inflection1Values <- round(seq(from = 20, to = 100, length.out = 10), digits = 2)
rate1Values <- round(seq(from = 0.1, to = 0.5, length.out = 10), digits = 2)
inflection2Values <- round(seq(from = 150, to = 300, length.out = 10), digits = 2)
rate2Values <- round(seq(from = 0.04, to = 0.08, length.out = 10), digits = 2)
layout(matrix(1:2, nrow = 2), heights = c(8, 1))
# plateau value
plot(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
ylab = "output", main = "parameter: plateau value")
for (i in 1:10)
{
abline(h = plateauValues[i], lty = 2)
lines(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = plateauValues[i], inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
col = colours[i])
}
par(mar = c(0,0,0,0), cex = 0.6)
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.01, y = 0.8,
legend = as.character(plateauValues),
col = colours, lty = rep(1, 4),
horiz = T)
layout(matrix(1:2, nrow = 2), heights = c(8, 1))
# inflection 1
plot(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
ylab = "output", main = "parameter: inflection1")
for (i in 1:10)
{
abline(v = inflection1Values[i], lty = 2)
lines(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = inflection1Values[i], rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
col = colours[i])
}
par(mar = c(0,0,0,0), cex = 0.6)
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.01, y = 0.8,
legend = as.character(inflection1Values),
col = colours, lty = rep(1, 4),
horiz = T)
layout(matrix(1:2, nrow = 2), heights = c(8, 1))
# rate 1
plot(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
ylab = "output", main = "parameter: rate1")
for (i in 1:10)
{
lines(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = rate1Values[i], inflection2 = 200, rate2 = 0.06),
col = colours[i])
}
par(mar = c(0,0,0,0), cex = 0.6)
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.01, y = 0.8,
legend = as.character(rate1Values),
col = colours, lty = rep(1, 4),
horiz = T)
layout(matrix(1:2, nrow = 2), heights = c(8, 1))
# inflection 2
plot(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
ylab = "output", main = "parameter: inflection2")
for (i in 1:10)
{
abline(v = inflection2Values[i], lty = 2)
lines(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = inflection2Values[i], rate2 = 0.06),
col = colours[i])
}
par(mar = c(0,0,0,0), cex = 0.6)
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.01, y = 0.8,
legend = as.character(inflection2Values),
col = colours, lty = rep(1, 4),
horiz = T)
layout(matrix(1:2, nrow = 2), heights = c(8, 1))
# rate 2
plot(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = 0.06),
ylab = "output", main = "parameter: rate2")
for (i in 1:10)
{
lines(1:366,
doubleLogisticCurve(
start.x = 1, end.x = 366,
plateauValue = 0.1, inflection1 = 40, rate1 = 0.2, inflection2 = 200, rate2 = rate2Values[i]),
col = colours[i])
}
par(mar = c(0,0,0,0), cex = 0.6)
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.01, y = 0.8,
legend = as.character(rate2Values),
col = colours, lty = rep(1, 4),
horiz = T)
set.seed(0)
n = 33
maxDOY = 366 # max. days in year. more complex simulation can account better for leap years
dayOfYear <- c()
year_sim <- c()
dailyCumPropPrecipitation <- c()
for (i in 1:n)
{
plateauValue = min(1, max(0, rnorm(1, 0.09, 0.05)))
inflection1 = min(maxDOY, max(1, rnorm(1, 40, 20)))
rate1 = max(0, rnorm(1, 0.08, 0.02))
inflection2 = min(maxDOY, max(1, rnorm(1, 210, 20)))
rate2 = max(0, rnorm(1, 0.05, 0.01))
for (day in 1:maxDOY)
{
year_sim <- c(year_sim, i)
dayOfYear <- c(dayOfYear, day)
dailyCumPropPrecipitation <- c(dailyCumPropPrecipitation,
doubleLogistic(
dayOfYear = day,
plateauValue = plateauValue,
inflection1 = inflection1,
rate1 = rate1,
inflection2 = inflection2,
rate2 = rate2
)
)
}
}
# algorithm to break continuous pattern
nSamplesPerYear = 200
maxSampleSize = 10
for (year in levels(factor(year_sim)))
{
for (i in 1:nSamplesPerYear)
{
# get a decreasing sample size proportionally to sample i
thisSampleSize = ceiling(maxSampleSize * i / nSamplesPerYear)
# get random day of year
rainDOY = round(runif(1, min = 1, max = maxDOY))
# set sample limits
earliestNeighbour = max(1, rainDOY - thisSampleSize)
latestNeighbour = min(366, rainDOY + thisSampleSize)
# get mean of neighbourhood
meanNeighbourhood = mean(dailyCumPropPrecipitation[year_sim == year &
dayOfYear >= earliestNeighbour &
dayOfYear <= latestNeighbour])
# assign mean to all days in neighbourhood
for (j in earliestNeighbour:latestNeighbour)
{
dailyCumPropPrecipitation[year_sim == year & dayOfYear == j] <- meanNeighbourhood
}
}
}
doubleLogCumModelOutput <- data.frame(year_sim, dayOfYear, dailyCumPropPrecipitation)
boxplot(propToAnnual$cumulativeProportionSeries ~ weather$DOY, cex = 2,
xlab = "day (in year)",
ylab = "cumulative proportion of annual precipitation (mm/mm)",
main = "EMPIRICAL / SIMULATED")
par(new=TRUE)
boxplot(doubleLogCumModelOutput$dailyCumPropPrecipitation ~ doubleLogCumModelOutput$dayOfYear,
cex = 2,
xlab = "day (in year)",
ylab = "cumulative proportion of annual precipitation (mm/mm)",
border = "blue")
dailyPropPrecipitation <- doubleLogCumModelOutput$dailyCumPropPrecipitation
for (i in 1:length(dailyPropPrecipitation))
{
if(doubleLogCumModelOutput$dayOfYear[i] > 1) # dailyPropPrecipitation = dailyCumPropPrecipitation, if day = 1
{
dailyPropPrecipitation[i] <- doubleLogCumModelOutput$dailyCumPropPrecipitation[i] - doubleLogCumModelOutput$dailyCumPropPrecipitation[i - 1]
}
}
doubleLogCumModelOutput$dailyPropPrecipitation <- dailyPropPrecipitation
summaryByDoy_doubleLogCumModel <- summary_DOY(doubleLogCumModelOutput, doyName = "dayOfYear", varName = "dailyPropPrecipitation")
summary_doubleLogCumModel <- summary(doubleLogCumModelOutput$dailyPropPrecipitation)
summary_doubleLogCumModel$sd <- sd(doubleLogCumModelOutput$dailyPropPrecipitation)
## Warning in summary_doubleLogCumModel$sd <- sd(doubleLogCumModelOutput
## $dailyPropPrecipitation): Coercing LHS to a list
annualSum_modelPar = mean(annualSum) # (mm)
summaryPrecip_empiric <- summary(weather$PRECTOT)
summaryPrecip_model <- summary(doubleLogCumModelOutput$dailyPropPrecipitation * annualSum_modelPar)
layout(matrix(c(1,2), nrow = 2))
par(mar = c(1,4,2,1))
barplot(weather$PRECTOT,
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar))))
text(x = nrow(weather) / 2, y = 0.9 * max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar)),
labels = paste(
"min = ", summaryPrecip_empiric[1],
", max = ", round(summaryPrecip_empiric[6], digits = 3),
", median = ", summaryPrecip_empiric[3],
", mean = ", round(summaryPrecip_empiric[4], digits = 3),
", sd = ", round(summaryPrecip_empiric[5], digits = 3)
))
barplot(doubleLogCumModelOutput$dailyPropPrecipitation * annualSum_modelPar,
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar))))
text(x = nrow(doubleLogCumModelOutput) / 2, y = 0.9 * max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar)),
labels = paste(
"min = ", round(summary_doubleLogCumModel$Min. * annualSum_modelPar, digits = 3),
", max = ", round(summary_doubleLogCumModel$Max. * annualSum_modelPar, digits = 3),
", median = ", round(summary_doubleLogCumModel$Median * annualSum_modelPar, digits = 3),
", mean = ", round(summary_doubleLogCumModel$Mean * annualSum_modelPar, digits = 3),
", sd = ", round(summary_doubleLogCumModel$sd, digits = 3)
))
layout(matrix(c(1, 2, 3), nrow = 3), heights = c(10, 10, 2))
par(mar = c(2,4,2,1))
plot(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$max, col = "red", type = 'l',
ylab = "precipitation (mm)", main = "empirical",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar))))
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$mean)
lines(1:nrow(summaryByDoy_empiric),
summaryByDoy_empiric$mean + summaryByDoy_empiric$sd,
col = "darkorchid4")
lines(1:nrow(summaryByDoy_empiric), summaryByDoy_empiric$min, col = "blue")
par(mar = c(4,4,2,1))
plot(1:nrow(summaryByDoy_doubleLogCumModel), summaryByDoy_doubleLogCumModel$max * annualSum_modelPar,
col = "red", type = 'l',
xlab = "day in year",
ylab = "precipitation (mm)", main = "simulated",
ylim = c(0, max(c(summaryByDoy_empiric$max, summaryByDoy_doubleLogCumModel$max * annualSum_modelPar))))
lines(1:nrow(summaryByDoy_doubleLogCumModel), summaryByDoy_doubleLogCumModel$mean * annualSum_modelPar)
lines(1:nrow(summaryByDoy_doubleLogCumModel),
(summaryByDoy_doubleLogCumModel$mean + summaryByDoy_doubleLogCumModel$sd) * annualSum_modelPar,
col = "darkorchid4")
lines(1:nrow(summaryByDoy_doubleLogCumModel), summaryByDoy_doubleLogCumModel$min * annualSum_modelPar, col = "blue")
par(mar = c(0,0,0,0))
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
legend(x = 0.25, y = 0.8,
legend = c('max', 'mean', 'mean+sd', 'min'),
col = c('red', 'black', 'darkorchid4', 'blue'), lty = rep(1, 4),
horiz = T)
Partial success: The double logistic model approaches the empirical pattern but tend to generate higher peaks concentrated around the inflection days. The great benefit of this approach is to be able to approximate the desired pattern by setting parameters (ideal for setting up simulation experiments).