library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
# Read in data vector
z <- read.csv("plotdata_macro_constrained_20210105.csv",header=TRUE,sep=",")
str(z)
## 'data.frame': 80 obs. of 9 variables:
## $ plot : chr "CAB.01" "CAB.02" "CAB.03" "CAB.04" ...
## $ worms : int 1 1 1 1 1 1 1 1 1 1 ...
## $ richness : int 18 19 19 12 14 14 17 18 19 12 ...
## $ forest : chr "CAB" "CAB" "CAB" "CAB" ...
## $ year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
## $ worm_biomass.m2: num 7.968 6.468 0.376 12.004 9.8 ...
## $ abundance.m2 : int 56 54 76 28 36 46 110 102 132 32 ...
## $ biomass.g.m2 : num 0.486 0.657 0.411 0.396 0.178 ...
## $ shannon : num 2.8 2.85 2.68 2.44 2.58 ...
summary(z)
## plot worms richness forest
## Length:80 Min. :0.0 Min. : 7.00 Length:80
## Class :character 1st Qu.:0.0 1st Qu.:15.00 Class :character
## Mode :character Median :0.5 Median :19.00 Mode :character
## Mean :0.5 Mean :19.77
## 3rd Qu.:1.0 3rd Qu.:22.00
## Max. :1.0 Max. :45.00
## year worm_biomass.m2 abundance.m2 biomass.g.m2
## Min. :2016 Min. : 0.000 Min. : 24.0 Min. :0.1185
## 1st Qu.:2016 1st Qu.: 0.690 1st Qu.: 56.0 1st Qu.:0.3587
## Median :2016 Median : 7.218 Median : 74.0 Median :0.6261
## Mean :2016 Mean : 22.971 Mean : 87.4 Mean :0.8771
## 3rd Qu.:2016 3rd Qu.: 45.491 3rd Qu.:103.0 3rd Qu.:0.9514
## Max. :2017 Max. :136.720 Max. :312.0 Max. :5.5635
## shannon
## Min. :1.294
## 1st Qu.:2.309
## Median :2.598
## Mean :2.569
## 3rd Qu.:2.838
## Max. :3.420
# Plot histogram of data
p1 <- ggplot(data=z, aes(x=abundance.m2, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
# Add empirical density curve
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
# Get maximum likelihood parameters for normal
normPars <- fitdistr(z$abundance.m2,"normal")
print(normPars)
## mean sd
## 87.400000 53.894712
## ( 6.025612) ( 4.260751)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 87.4 53.9
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 6.03 4.26
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 36.3 0 0 18.2
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 80
## $ loglik : num -432
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
## mean
## 87.4
# note structure of getting a named attribute
# Plot normal prob. density
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$abundance.m2),len=length(z$abundance.m2))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$abundance.m2), args = list(mean = meanML, sd = sdML))
p1 + stat
# Plot exponential prob. density
expoPars <- fitdistr(z$abundance.m2,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$abundance.m2), args = list(rate=rateML))
p1 + stat + stat2
# Plot uniform prob. density
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$abundance.m2), args = list(min=min(z$abundance.m2), max=max(z$abundance.m2)))
p1 + stat + stat2 + stat3
# plot gamma prob. density - this one fits best!
gammaPars <- fitdistr(z$abundance.m2,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$abundance.m2), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
# Plot beta prob density
pSpecial <- ggplot(data=z, aes(x=abundance.m2/(max(abundance.m2 + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$abundance.m2/max(z$abundance.m2 + 0.1),start=list(shape1=1,shape2=2),"beta")
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$abundance.m2), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
zsim <- rgamma(n=80, shape=3.6329359, rate = 0.0415673)
zsim <- data.frame(1:80,zsim)
names(zsim) <- list("ID","myVar")
zsim <- zsim[zsim$myVar>0,]
str(zsim)
## 'data.frame': 80 obs. of 2 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ myVar: num 50.1 79.2 32.9 50.3 63.9 ...
summary(zsim$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.19 50.23 78.10 87.82 114.22 243.63
psim <- ggplot(data=zsim, aes(x=myVar, y=after_stat(density))) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(psim)
print(p1)
The two histogram profiles are not drastically different but there are noticeable differences between the simulated data and the original data. The simulated data using the gamma distribution parameters is a little bit more uniform in spread and has less skewness compared to the original data
Insect biomass follows similar distribution as insect abundance (gamma)
Shannon diversity follows the normal distribution
Earthworm biomass seems to fit the beta distribution the best
For species richness, the normal distribution fits best