March 22, 2005

The Dataset

The Dataset

I processed them to get a table with bioclimatic values and occurrences: https://emilio-berti.github.io/datasets/leontopodium-nivale.csv.

leontopodium <- read.csv("leontopodium-nivale.csv")[, -1]
head(leontopodium, 2)
     bio_1   bio_10    bio_11 bio_12 bio_13 bio_14   bio_15 bio_16 bio_17
1 8.220790 15.60209 1.3516101    875     93     48 22.06818    274    161
2 8.299604 16.21712 0.7646251    655     74     37 21.02938    208    120
  bio_18 bio_19    bio_2    bio_3    bio_4    bio_5     bio_6    bio_7
1    231    206 6.854939 30.94288 587.1818 20.72924 -1.424282 22.15352
2    208    139 7.099833 29.86668 636.0113 21.60050 -2.171250 23.77175
      bio_8    bio_9 occurrence
1  8.948825 3.539078          1
2 16.217125 3.332750          1

What is the niche of this species? (ENM/SDM)

A naive approach

enm <- glm(
  occurrence ~ bio_1 + I(bio_1^2) + bio_2 + I(bio_2^2) + bio_3 + I(bio_3^2) + bio_4 + I(bio_4^2) + bio_5 + I(bio_5^2) + bio_6 + I(bio_6^2) + bio_7 + I(bio_7^2) + bio_8 + I(bio_8^2) + bio_9 + I(bio_9^2) + bio_10 + I(bio_10^2) + bio_11 + I(bio_11^2) + bio_12 + I(bio_12^2) + bio_13 + I(bio_13^2) + + bio_14 + I(bio_14^2) + bio_15 + I(bio_15^2) + bio_16 + I(bio_16^2) + bio_17 + I(bio_17^2) + bio_18 + I(bio_18^2) + bio_19 + I(bio_19^2),
  data = leontopodium,
  family = "binomial"
)
data.frame(
  bio = rownames(anova(aov(enm)))[anova(aov((enm)))$`Pr(>F)` <= 0.05],
  p.value = anova(aov((enm)))$`Pr(>F)`[anova(aov((enm)))$`Pr(>F)` <= 0.05]
)
          bio      p.value
1       bio_1 1.181202e-06
2       bio_4 8.402027e-05
3       bio_5 1.147592e-02
4  I(bio_6^2) 3.798042e-02
5      bio_11 2.795784e-02
6 I(bio_18^2) 1.501037e-02
7        <NA>           NA

A naive approach: issues

The issue of collinearity:

data.frame(
  bio = rownames(anova(aov(enm)))[anova(aov((enm)))$`Pr(>F)` <= 0.05],
  p.value = anova(aov((enm)))$`Pr(>F)`[anova(aov((enm)))$`Pr(>F)` <= 0.05]
)
          bio      p.value
1       bio_1 1.181202e-06
2       bio_4 8.402027e-05
3       bio_5 1.147592e-02
4  I(bio_6^2) 3.798042e-02
5      bio_11 2.795784e-02
6 I(bio_18^2) 1.501037e-02
7        <NA>           NA
sort(round(cor(leontopodium[, -20]), 2)[, "bio_1"], decreasing = TRUE)
 bio_1 bio_10  bio_5 bio_11  bio_6  bio_9  bio_2  bio_3  bio_8  bio_7  bio_4 
  1.00   0.98   0.97   0.96   0.94   0.60   0.55   0.47   0.45   0.38   0.18 
bio_15 bio_19 bio_13 bio_17 bio_12 bio_14 bio_16 bio_18 
  0.02  -0.19  -0.35  -0.35  -0.38  -0.39  -0.40  -0.49 

A PCA approach

Reduce the dimensionality of the climate using PCA:

pca <- prcomp(leontopodium[, -20], scale = TRUE, center = TRUE)
summary(pca)$importance[, 1:5]
                           PC1      PC2      PC3      PC4      PC5
Standard deviation     2.90455 2.013001 1.743501 1.121994 1.045515
Proportion of Variance 0.44402 0.213270 0.159990 0.066260 0.057530
Cumulative Proportion  0.44402 0.657290 0.817280 0.883540 0.941070

A PCA approach: biplot

par(mfrow = c(1, 2))
biplot(pca, xlabs = rep("+", nrow(leontopodium)), col = c(adjustcolor("grey20", alpha.f = 0.1), "blue"))
biplot(pca, xlabs = rep("+", nrow(leontopodium)), choices = c(1, 3), col = c(adjustcolor("grey20", alpha.f = 0.1), "blue"))

A PCA approach: loadings

par(mfrow = c(1, 2), mar = c(4, 4, 2, 2))
biplot(pca, xlabs = rep("+", nrow(leontopodium)), col = c(adjustcolor("grey20", alpha.f = 0.1), "blue"))
barplot(sort(pca$rotation[, 1]), las = 2, main = "PC1", cex.names = 1.2)

A PCA approach: loadings

par(mfrow = c(1, 3))
barplot(sort(pca$rotation[, 1]), las = 2, main = "PC1", cex.names = 1.2)
barplot(sort(pca$rotation[, 2]), las = 2, main = "PC2", cex.names = 1.2)
barplot(sort(pca$rotation[, 3]), las = 2, main = "PC3", cex.names = 1.2)

A PCA approach: loadings

par(mfrow = c(1, 3), mar = c(16, 4, 2, 2))
barplot(sort(pca$rotation[, 1]), las = 2, main = "PC1", cex.names = 1.5)
barplot(sort(pca$rotation[, 2]), las = 2, main = "PC2", cex.names = 1.5)
barplot(sort(pca$rotation[, 3]), las = 2, main = "PC3", cex.names = 1.5)

A PCA approach: use PCs as predictors

pc1 <- pca$x[, 1]
pc2 <- pca$x[, 2]
pc3 <- pca$x[, 3]
enm <- glm(
  leontopodium$occurrence ~ pc1 + pc2 + pc3 + I(pc1^2) + I(pc2^2) + I(pc3^2),
  family = "binomial"
)
data.frame(PC = rownames(anova(aov(enm))), p.value = round(anova(aov((enm)))$`Pr(>F)`, 3))
         PC p.value
1       pc1   0.000
2       pc2   0.322
3       pc3   0.007
4  I(pc1^2)   0.418
5  I(pc2^2)   0.208
6  I(pc3^2)   0.074
7 Residuals      NA

A PCA approach: coefficients

exp(coefficients(enm))[c("pc1", "pc3")]
      pc1       pc3 
1.0707521 0.9355468 
scatter.smooth(leontopodium$occurrence ~ pca$x[, 1], xlab = c("PC1"), ylab = "P(occurrence)")
scatter.smooth(leontopodium$occurrence ~ pca$x[, 3], xlab = c("PC3"), ylab = "P(occurrence)")

A PCA approach: intepretation

  • L. nivale likes PC1.
  • L. nivale dislikes PC3.
round(pca$rotation[, 1], 3)[order(abs(pca$rotation[, 1]), decreasing = TRUE)[1:4]]
 Annual range T       Annual Pr          Mean T Pr driest month 
         -0.298          -0.296          -0.290           0.272 
round(pca$rotation[, 3], 3)[order(abs(pca$rotation[, 3]), decreasing = TRUE)[1:4]]
   T driest quarter Min T coldest month     Diurnal range T      Pr seasonality 
              0.398               0.387               0.326               0.305 
  • L. nivale likes cold, stable habitats …
  • … but with a limit on \(T_{min}\) and quite variable in \(Pr\).