Introduction

This paper presents the process of analyzing the results of a survey of chestnut seedlings. In particular, the plants were inoculated with two different Phytophthora species, P. cinnamomi and P. cambivora, leaving one control thesis without pathogen inoculation. In each of the three theses inoculated or not, treatments with five different products were tested, leaving one control thesis without treatment.

The different treatments are as follows:

	Treatment name	Description
1	BV	Bacillus sp. 1
2	BV + F35	Bacillus sp. 1 + Bacillus sp. 2
3	F35	Bacillus sp. 2
4	Kalex	Potassium phosphate
5	Ridomil	Anti-peronosporic
6	Control	Control thesis without

The root proprieties are statistically analyzed to select the best product for each treatment in order to use it in field application inside the LIFE Fagesos project. The roots parameters investigated are:

Parameter name	Description
length	Total Roots Length
avgDiam	Total Roots mean diameter
rootVolume	Total Roots Volume
FRL	Fine Roots Length
CRL	Coarse Roots Length
FRS	Fine Roots Surface
CRS	Coarse Roots Surface
FVOL	Fine Roots Volume
weight	Fine Roots weight

Furthermore a leaf symptom analysis is carried out

Load the packages

# Load the necessary packages
library(readxl)
library(dplyr)
library(stringr)
library(readr)
library(ggplot2)
library(gridExtra)
library(cowplot)
library(heplots)
library(FactoMineR)
library(factoextra)
library(Factoshiny)
library(kableExtra)
library(DT)
library(vegan)
library(vegan)

Symptom analysis

# Load the symptom assessment sheets datased 
dataframe <- read_delim("./Symptom_assessment/Symptom_assessment_modified.csv", delim = ";")

# Transform dataframe from wide to long
df_long <- tidyr::pivot_longer(dataframe, 
                               cols = starts_with("16/06/2023"):starts_with("21/09/2023"),
                               names_to = "Date",
                               values_to = "Class")

# Convert Date column to date format
df_long$Date <- as.Date(df_long$Date, format = "%d/%m/%Y")

# Convert dates to number of days
df_long$Days <- as.numeric(df_long$Date - min(df_long$Date))

# Calculates the average of Class for each combination of Inoculum, Treatment and Days
df_summary <- df_long %>%
  group_by(Inoculum, Treatment, Days) %>%
  summarize(mean_class = mean(Class)) %>%
  ungroup()

# Write the summary dataframe in a CSV file
write_csv(df_summary, "./Symptom_assessment/DATAFRAME/df_summary_modified.csv")

# Load the df summary previously created
df_summary <- read_csv("./Symptom_assessment/DATAFRAME/df_summary_modified.csv")

# Extract from df_summary the values that have a Days<=82 and eilimate the records with Treatment = Ridomil
df_filtered2 <- df_summary[df_summary$Days <= 82 & df_summary$Treatment != "Ridomil", ]

# Create a list of unique inoculations and treatments
inocula <- unique(df_filtered2$Inoculum)
trattamenti <- unique(df_filtered2$Treatment)

# View the Symptom assessment Table 
datatable(df_filtered2, 
          caption = "Symptom assessment Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

# Create a list of graphs, one for each inoculum
plots <- lapply(unique(df_filtered2$Inoculum), function(inoculum) {
  # Filter the dataframe for the current inoculum
  df_inoculum <- df_filtered2[df_filtered2$Inoculum == inoculum,]
  
  # Create the graph
  p <- ggplot(df_inoculum, aes(x = Days, y = mean_class, color = Treatment)) +
    geom_step(linewidth = 1) + # if I enter geom_line() / geom_step(), the lines are not square
    labs(title = paste("Inoculum:", inoculum),
         x = "Days",
         y = "Mean Class") +
    ylim(0, 4)
  
  return(p)
})

# Create a grid layout with graphics
grid_plot <- do.call(gridExtra::grid.arrange, c(plots, ncol = 2))

# Save grid layout as PNG file
ggsave("./Symptom_assessment/GRAFICI/MODIFICHE/mean_class_graph1.png", grid_plot, width = 16, height = 12, dpi = 700)

Symptom assessment graph for each inoculum

# Ripulisci l'enviroment di R
rm(list=ls())

Roots analysis

Get the data set

In this section, the data set is loaded into R and file preparation begins.

# Get the name of the file xlxs inside the folder
file_names <- list.files(path = "./Data", pattern = "\\.xlsx$")

# Inizialize a new empty data frame
combined_df <- data.frame()

# Read each file and add it in a new dataframe
for (file in file_names) {
  temp_df <- readxl::read_excel(paste("./Data", file, sep = "/"), sheet = "Global")
  combined_df <- rbind(combined_df, temp_df)
}

# Extract the Treatment column from the "Sanmple id" column and create a new "Treatment" column
# Estrai il Treatment dalla colonna Sample id, e crea la nuova colonna treatment
combined_df$treatment <- sapply(strsplit(combined_df$`Sample Id`, "_"), '[', 3)

# Mobe th "Treatment" column to the second place
combined_df <- combined_df %>%
  select(`Sample Id`, treatment, inoculum, everything())

# Extract the "inoculum column" from the "Sample id" column and create a new "inoculum" column.
combined_df <- combined_df %>%
  mutate(inoculum = str_extract(`Sample Id`, "(?<=_)[^_]+(?=_)"))

# Move the "inoculum" column to the 3° place
combined_df <- combined_df %>%
  select(treatment, inoculum, everything())

# Store the dataframe in csv format
write.csv(combined_df, "./DATAFRAMES/combined_df.csv", append = FALSE)

# View  the dataset Table 
datatable(combined_df, 
          caption = "Dataset Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE,  scrollCollapse = TRUE),
          class = "display",
          style = "bootstrap")

# Ripulisci l'enviroment di R
rm(list=ls())

Select the columns

In this section all the column necessary to the analysis are selected from the data set.

# Load the combined data set previosly stored
combined_df <- read_csv("./DATAFRAMES/combined_df.csv")

# Delete undesired column $...1
combined_df$...1 <- NULL

# Select the necessary columns of the roots parameters
cols_fine_roots_lenght <- combined_df %>%
  select(1:4, 67:70)

cols_fine_roots_lenght <- cols_fine_roots_lenght %>%
  mutate(FRL = rowSums(select(., 5:8)))

cols_coarse_roots_lenght <- combined_df %>%
  select(1:4, 71:76)

cols_coarse_roots_lenght <- cols_coarse_roots_lenght %>%
  mutate(CRL = rowSums(select(., 5:8)))

cols_fine_roots_SA <- combined_df %>%
  select(1:4, 78:81)

cols_fine_roots_SA <- cols_fine_roots_SA %>%
  mutate( FRS = rowSums(select(., 5:8)))

cols_coarse_roots_SA <- combined_df %>%
  select(1:4, 82:87)

cols_coarse_roots_SA <- cols_coarse_roots_SA %>%
  mutate(CRS = rowSums(select(., 5:8)))

cols_fine_roots_VOL <- combined_df %>%
  select(1:4, 110:113)

cols_fine_roots_VOL <- cols_fine_roots_VOL %>%
  mutate(FVOL = rowSums(select(., 5:8)))

# Add the coulumn "FRL" to combined_df
combined_df <- left_join(combined_df, cols_fine_roots_lenght %>% select(treatment, inoculum, `Sample Id`, Plant, FRL), by = c("treatment", "inoculum", "Sample Id", "Plant"))

# Add the coulumn "CRL" to combined_df
combined_df <- left_join(combined_df, cols_coarse_roots_lenght %>% select(treatment, inoculum, `Sample Id`, Plant, CRL), by = c("treatment", "inoculum", "Sample Id", "Plant"))

# Add the coulumn "FRS" to combined_df
combined_df <- left_join(combined_df, cols_fine_roots_SA %>% select(treatment, inoculum, `Sample Id`, Plant, FRS), by = c("treatment", "inoculum", "Sample Id", "Plant"))

# Add the coulumn "CRS" to combined_df
combined_df <- left_join(combined_df, cols_coarse_roots_SA %>% select(treatment, inoculum, `Sample Id`, Plant, CRS), by = c("treatment", "inoculum", "Sample Id", "Plant"))

# Add the coulumn "FVOL" to combined_df
combined_df <- left_join(combined_df, cols_fine_roots_VOL %>% select(treatment, inoculum, `Sample Id`, Plant, FVOL), by = c("treatment", "inoculum", "Sample Id", "Plant"))

# Select only the columns number 1:4, 19, 25, 29, 33, 32, 34, 27, 126, 127, 128, 129, 130
selected_columns <- combined_df %>%
  select(1:4, 19, 25, 29, 33, 32, 34, 27, 126, 127, 128, 129, 130)

# Assing the new names to the columns
colnames(selected_columns) <- c("treatment", "inoculum", "sample_Id", "plant", "length", "avgDiam", "rootVolume", "lenPerVol", "tips", "forks", "crossings", "FRL", "CRL", "FRS", "CRS", "FVOL")

# Check if the columns have a new names 
colnames(selected_columns)

# Store the selected_columns data frame in csv format
write.csv(selected_columns, "./DATAFRAMES/selected_columns.csv", append = FALSE)

# View the dataset Table 
datatable(selected_columns, 
          caption = "selected_columns Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

# Clear the R enviroment 
rm(list=ls())

Create a new results Data Frame

In this section a new results data frame is created by grouping the data by plant, inoculum and treatment and summarize the roots parameters.

# Load the selected_columns data frame stored previously
selected_columns <- read_csv("./DATAFRAMES/selected_columns.csv")

# Delete undesired column $...1
selected_columns$...1 <- NULL

# Create the result_df dataframe with the new summary columns per sample
result_df <- selected_columns %>%
  group_by(plant, inoculum, treatment) %>%
  summarise(
    length = sum(length),
    avgDiam = mean(avgDiam),
    rootVolume = sum(rootVolume),
    tips = sum(tips),
    forks = sum(forks),
    crossings = sum(crossings),
    FRL = sum(FRL),
    CRL = sum(CRL),
    FRS = sum(FRS),
    CRS = sum(CRS),
    FVOL = sum(FVOL)
  )

# Convert values from cm/m3 to cm/cm3 for column LenPerVol
result_df <- result_df %>%
mutate(lenPerVol = (length / 0.000931) / 1000000)

# Convert the 'plant' column to characters (strings)
result_df$plant <- as.character(result_df$plant)

# Save the result_df dataframe in csv format
write.csv(result_df, "./DATAFRAMES/result_df.csv", append = FALSE)

# View the result_df Table 
datatable(result_df, 
          caption = "Results Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

# Clean the R enviroment
rm(list=ls())

Preparing weight data

In this section the weight data of the fine roots are merged with the roots parameters dataframe.

# Load the results data frame saved previously
result_df <- read_csv("./DATAFRAMES/result_df.csv")

# Delete undesired column $...1
result_df$...1 <- NULL

# Load the fine roots weight xlxs file
weight <- read_excel("./DATAFRAMES/weight.xlsX")

# Merge the two data frame
new_df <- merge(result_df, weight, by = c("plant", "inoculum", "treatment"))

# Save the new_df dataframe in csv format
write.csv(new_df, "./DATAFRAMES/result_df_weight.csv", append = FALSE)

# View the new_df Table 
datatable(new_df, 
          caption = "new_df Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

# Clean the R enviroment
rm(list=ls())

Statistical analysis

# Load the results data frame saved previously
result_df <- read_csv("./DATAFRAMES/result_df_weight.csv")

# Delete undesired column $...1
result_df$...1 <- NULL

Analysis of correlation between variables

# Calculate the correlation matrix
cor_matrix <- cor(result_df[,c("length", "avgDiam", "rootVolume", "lenPerVol", "tips", "crossings", "forks", "FRL", "CRL", "FRS", "CRS", "FVOL", "weight")])

# View the correlation matrix
datatable(cor_matrix, 
          caption = "Correlation matrix", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

The variables length, lenPerVol and crossings are strongly correlated, so only one of them should be left. I only take length into account. The variables tips, crossings, forks are to be deleted as the image analysis was performed on cut roots and their data ar not correct.

Create the box plots

# Select only the columns useful to the analysis
columns_el <- result_df %>%
  select(1:3, 4:6, 10:14, 16)

# Rearrange the factor levels.
columns_el$treatment <- factor(columns_el$treatment, levels = c("BV", "BV+F35", "F35", "Kalex", "Ridomil", "Control"))

# This is the plot from which I extract the legend.
leg <- ggplot(columns_el, aes(x = treatment, y = length, fill = inoculum)) +
  geom_boxplot() +
  labs(x = "", y = "Length (cm)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

# Extract the legend from one of the graphs.
legend <- cowplot::get_legend(leg)

# Create boxplots for each variable without the legend
plot1 <- ggplot(columns_el, aes(x = treatment, y = length, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "", y = "Total Length (cm)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

plot2 <- ggplot(columns_el, aes(x = treatment, y = rootVolume, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "", y = "Total Roots Volume(cm3)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

plot3 <- ggplot(columns_el, aes(x = treatment, y = FRL, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "", y = "Fine Roots Length (cm)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

plot4 <- ggplot(columns_el, aes(x = treatment, y = FRS, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "Treatments", y = "Fine Roots Surface (cm2)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

plot5 <- ggplot(columns_el, aes(x = treatment, y = FVOL, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "Treatments", y = "Fine Root Volume (cm3)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

plot6 <- ggplot(columns_el, aes(x = treatment, y = weight, fill = inoculum)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "Treatments", y = "Fine Root weigth (g)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

# Remove legend from individual graphs.
plot1 <- plot1 + theme(legend.position = "none")
plot2 <- plot2 + theme(legend.position = "none")
plot3 <- plot3 + theme(legend.position = "none")
plot4 <- plot4 + theme(legend.position = "none")
plot5 <- plot5 + theme(legend.position = "none")
plot6 <- plot6 + theme(legend.position = "none")

# Create a grid with graphs and legend at the bottom.
grid1 <- grid.arrange(plot1, plot2, plot3, plot4, plot5,plot6, ncol = 2, bottom = legend)

# Specifies the path to save
file_path <- "./GRAPHS/box_plots.png"

# Save the grid as a PNG file.
ggsave(file_path, grid1, width = 16, height = 12, dpi = 700)

Box plot of the roots parameters for each treatment and inoculum

Compute the outliers

# Create a new dataframe starting from result_df named outlier_df
outlier_df <- result_df

# Delete the unwanted columns
outlier_df$tips <- NULL
outlier_df$forks <- NULL
outlier_df$crossings <- NULL

# Define the function for identify the outlier
outlier <- function(x) {
  IQR <- IQR(x)
  limite_inferiore <- quantile(x, 0.25) - 1.5 * IQR
  limite_superiore <- quantile(x, 0.75) + 1.5 * IQR
  x < limite_inferiore | x > limite_superiore
}

# Apply the outilier function to each group
outlier_df <- outlier_df %>%
  group_by(inoculum, treatment) %>%
  mutate(across(where(is.numeric), list(outlier = outlier)))

library(openxlsx)

# Save the dataframe in xlxs format
write.xlsx(outlier_df, "./Analisi_statistica_risultati/outier.xlsx")

# View the outlier Table 
datatable(outlier_df, 
          caption = "Outlier Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

Create the bar graph

# Calculate the mean and the standard error for each treatment and inoculum
df_summary <- result_df %>%
  group_by(treatment, inoculum) %>%
  summarise(across(c(length, avgDiam, rootVolume, FRL, CRL, FRS, CRS, FVOL, lenPerVol), list(mean = mean, se = ~sd(.x)/sqrt(length(.x))), .names = "{.col}_{.fn}"))

# Riordina i livelli del fattore
df_summary$treatment <- factor(df_summary$treatment, levels = c("BV", "BV+F35", "F35", "Kalex", "Ridomil", "Control"))

# Save the summary dataframe
write.csv(df_summary, "./DATAFRAMES/df_summary.csv")

# View the mean and standard error Table 
datatable(df_summary, 
          caption = "Mean and standard error Table", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

# Create the bar graph with the standard error
p1 <- ggplot(df_summary, aes(x=treatment, y= length_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=length_mean - length_se, ymax=length_mean + length_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("Length (cm)")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1","Not inoculated"))

p2 <- ggplot(df_summary, aes(x=treatment, y= avgDiam_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=avgDiam_mean - avgDiam_se, ymax=avgDiam_mean + avgDiam_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("AvgDiam(mm)")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

p3 <- ggplot(df_summary, aes(x=treatment, y= rootVolume_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=rootVolume_mean - rootVolume_se, ymax=rootVolume_mean + rootVolume_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("Total Roots Volume(cm3)")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

p4 <- ggplot(df_summary, aes(x=treatment, y= FRL_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=FRL_mean - FRL_se, ymax=FRL_mean + FRL_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("Fine Roots Length (cm)")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

p5 <- ggplot(df_summary, aes(x=treatment, y= FRS_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=FRS_mean - FRS_se, ymax=FRS_mean + FRS_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("Fine Roots Surface (cm2)")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

p6 <- ggplot(df_summary, aes(x=treatment, y= FVOL_mean, fill=inoculum)) +
  geom_bar(stat="identity", position=position_dodge(), colour="black") +
  geom_errorbar(aes(ymin=FVOL_mean - FVOL_se, ymax=FVOL_mean + FVOL_se), 
                width=.2, position=position_dodge(.9)) +
  labs(x="Treatment", y=paste("Fine Roots Volume")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) +
  theme_minimal() +
  guides(fill = guide_legend(ncol = 3, title = "Inoculum"))+
  scale_fill_discrete(labels=c("AB21", "AB1", "Not inoculated"))

# Extract the legend from one of the graphs
legend2 <- cowplot::get_legend(p1)

# Delete the legend from the other plots
p1 <- p1 + theme(legend.position = "none")
p2 <- p2 + theme(legend.position = "none")
p3 <- p3 + theme(legend.position = "none")
p4 <- p4 + theme(legend.position = "none")
p5 <- p5 + theme(legend.position = "none")
p6 <- p6 + theme(legend.position = "none")

# Create a grid with graphs and legend at the bottom
grid2 <- grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 3, bottom = legend2)

# Specifies the path for saving
file_path <- "./GRAPHS/hists_SE.png"

# Save the grid as a PNG file
ggsave(file_path, grid2, width = 16, height = 12, dpi = 700)

Bar plot and standar error

Statistical Test

# Carica il df result_df salvato precedentemente
result_df <- read_csv("./DATAFRAMES/result_df_weight.csv")

# Elimina la colonna indesiderata $...1
result_df$...1 <- NULL

# Converti la colonna "plant" in caratteri (stringhe)
result_df$plant <- as.character(result_df$plant)

# Crea un nuovo dataframe uguale al precedente
df <- result_df

# df$lenPerVol <- NULL

df$crossings <- NULL

df$tips <- NULL

df$forks <- NULL

# Trasformazione della variabile treatment e inoculum in fattore
df$treatment <- as.factor(df$treatment)
df$inoculum <- as.factor(df$inoculum)

Check ANOVA assumptions

Levene’s Test

# Load the necessary packages
library(dplyr)
library(car)

# Inizializza un dataframe vuoto per memorizzare i risultati
results <- data.frame(var = character(), p.value = numeric())

# Elenco delle variabili numeriche nel dataframe
vars <- c("length", "avgDiam", "rootVolume", "FRL", "CRL", "FRS", "CRS", "weight")

# Esegui il test di Levene per ogni variabile e salva i risultati
for (var in vars) {
  df$group <- interaction(df$inoculum, df$treatment)
  formula <- as.formula(paste(var, "~ group"))
  test <- leveneTest(formula, data = df)
  results <- rbind(results, data.frame(var = var, p.value = test[1, "Pr(>F)"]))
}

# Crea un grafico a barre dei risultati del test di Levene con linea orizzontale a y = 0.05
levene_plot <- ggplot(results, aes(x = var, y = p.value)) +
  geom_bar(stat = "identity") +
  theme_minimal(base_size = 20) +
  labs(x = "Variable", y = "P Value", title = "Levene's Test for Homogeneity of Variance") +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "red")

# Specifies the path to save
file_path <- "./GRAPHS/levene_Test_plot.png"

# Save the grid as a PNG file.
ggsave(file_path, levene_plot, width = 14, height = 10, dpi = 300)

# Write the results in a csv file
write.csv(results, file = "./DATAFRAMES/levene_test_results.csv")

Bar plot of the Levene’s Test for Homogeneity of Variance

# View the Levene's Test for Homogeneity of Variance data frame
datatable(results, 
          caption = "Levene's Test for Homogeneity of Variance", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

The results of Levene’s Test allow us to accept the null hypothesis of homogeneity of variances. there is insufficient evidence to reject the hypothesis that the variances in the different groups are equal.

Shapiro-Wilk Test for Normality

# Inizializza un dataframe vuoto per memorizzare i risultati
shapiro_results <- data.frame(var = character(), p.value = numeric())

# Elenco delle variabili numeriche nel dataframe
vars <- c("length", "avgDiam", "rootVolume", "FRL", "CRL", "FRS", "CRS", "weight")

# Esegui il test di Shapiro-Wilk per ogni variabile e salva i risultati
for (var in vars) {
  model <- lm(df[[var]] ~ 1)  # Addestra il modello con intercept
  residuals_model <- residuals(model)
  test <- shapiro.test(residuals_model)
  shapiro_results <- rbind(shapiro_results, data.frame(var = var, p.value = test$p.value))
}

# Crea un grafico a barre dei risultati del test di Levene
shapiro_plot <- ggplot(shapiro_results, aes(x = var, y = p.value)) +
  geom_bar(stat = "identity") +
  theme_minimal(base_size = 20) +
  labs(x = "Variable", y = "P Value", title = "Shapiro-wilk Test for data normality")

# Specifies the path to save
file_path <- "./GRAPHS/Shapiro_Test_plot.png"

# Save the grid as a PNG file.
ggsave(file_path, shapiro_plot, width = 14, height = 10, dpi = 300)

shapiro_plot

# Write the shapiro_results in a csv file
write.csv(shapiro_results, file = "./DATAFRAMES/levene_test_results.csv")

# View the Shapiro-Wilk Test for normality
datatable(shapiro_results, 
          caption = "Shapiro-Wilk Results Test", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

vars <- c("length", "avgDiam", "rootVolume", "FRL", "CRL", "FRS", "CRS", "weight")

# Crea ggqqplot per ogni variabile e raggruppa per inoculum
qqplots_1_list <- list()

for (var in vars) {
  # Creazione del ggqqplot per ogni variabile e raggruppato per inoculum
  ggqq <- ggplot(df, aes(sample = df[[var]])) +
    geom_qq() +
    geom_qq_line() +
    labs(title = paste("Q-Q Plot for", var)) +
    facet_wrap(~inoculum)  # Aggiungi il raggruppamento per inoculum
  
  # Aggiungi il ggqqplot alla lista dei grafici
  qqplots_1_list[[var]] <- ggqq
}

# Creazione di un grafico per ogni variabile e per ogni "inoculum" su Grid arrange
qqplots_1 <- grid.arrange(grobs = qqplots_1_list, ncol = 2)  # Puoi impostare il numero di colonne desiderato

# Specifies the path to save
file_path <- "./GRAPHS/qqpolot_1.png"

# Save the grid as a PNG file.
ggsave(file_path, qqplots_1, width = 16, height = 12, dpi = 700)

###########

# Your existing code for creating Q-Q plots
vars <- c("length", "avgDiam", "rootVolume", "FRL", "CRL", "FRS", "CRS", "weight")

qqplots_2_list <- list()

for (var in vars) {
  ggqq <- ggplot(df, aes(sample = df[[var]])) +
    geom_qq() +
    geom_qq_line(col = "red") +
    labs(title = paste("Q-Q Plot for", var))
  
  qqplots_2_list[[var]] <- ggqq
}

# Adjusted layout parameters for equal plots
qqplots_2 <- grid.arrange(grobs = qqplots_2_list, ncol = 4, nrow = 2)

# Specifies the path to save
file_path <- "./GRAPHS/qqpolot_2.png"

# Save the grid as a PNG file.
ggsave(file_path, qqplots_2, width = 16, height = 12, dpi = 700)

QQplot for eaxh variable and inoculum

Anova Tests

In this section a ANOVA test is carried out for each variable

# Carica il df result_df salvato precedentemente
result_df <- read_csv("./DATAFRAMES/result_df_weight.csv")

## New names:
## Rows: 102 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): inoculum, treatment dbl (15): ...1, plant, length, avgDiam, rootVolume,
## tips, forks, crossings, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# Elimina le colonne indesiderate
result_df <- subset(result_df, select = -c(...1, tips, forks, crossings, lenPerVol))

# Converti la colonna "plant" in caratteri (stringhe)
result_df$plant <- as.character(result_df$plant)

result_df$inoculum <- as.factor(result_df$inoculum)

# Aggiungi una colonna con le combinzioni
result_df$comb<- paste(result_df$inoculum, result_df$treatment, sep = "_")

# Sposta la colononna comb al4° posto
result_df <- result_df[, c("plant", "inoculum", "treatment", "comb", "length", "avgDiam", "rootVolume","FRL", "CRL", "FRS", "CRS", "FVOL", "weight")]

# Elenco delle variabili numeriche
num_vars <- c("length", "avgDiam", "rootVolume", "FRL", "CRL", "FRS", "CRS", "FVOL", "weight")

# Imposta l'opzione per visualizzare i valori senza notazione esponenziale
options(scipen = 999)

# Creazione di un dataframe vuoto per la tabella dei risultati
result_table <- data.frame(Inoculum = character(), Variabile = character(), F_value = numeric(), P_value = numeric(), stringsAsFactors = FALSE)

# Loop attraverso le variabili numeriche
for (var in num_vars) {
  # Loop attraverso i livelli di 'inoculum' (assumendo che 'inoculum' sia una colonna nel tuo dataframe)
  for (inoculum_level in unique(result_df$inoculum)) {
    # Filtra il dataframe per il livello corrente di 'inoculum'
    subset_data <- subset(result_df, inoculum == inoculum_level)
    # Stampa il nome della variabile corrente e il livello di 'inoculum'
    cat("Inoculum:", inoculum_level, "\n", "Variabile:", var)
    # Crea la formula per l'ANOVA
    formula <- as.formula(paste(var, "~ treatment"))
    # Esegui l'ANOVA
    anova_result <- aov(formula, data = subset_data)
    # Estrai i valori di F e p
    f_value <- summary(anova_result)[[1]][["F value"]][1]
    p_value <- summary(anova_result)[[1]][["Pr(>F)"]][1]
    # Aggiungi i risultati alla tabella
    result_table <- rbind(result_table, data.frame(Variabile = var, Inoculum = inoculum_level, F_value = f_value, P_value = p_value))
    # Stampa il summary dell'ANOVA
    print(summary(anova_result))
  }
}

## Inoculum: AB1 
##  Variabile: length            Df  Sum Sq Mean Sq F value   Pr(>F)    
## treatment    5 7096083 1419217     6.2 0.000598 ***
## Residuals   27 6180812  228919                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: length            Df   Sum Sq Mean Sq F value Pr(>F)
## treatment    5  3075912  615182   1.216  0.328
## Residuals   27 13654956  505739               
## Inoculum: NI 
##  Variabile: length            Df   Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5 10061533 2012307   3.895 0.00771 **
## Residuals   30 15497431  516581                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: avgDiam            Df  Sum Sq  Mean Sq F value     Pr(>F)    
## treatment    5 0.08414 0.016828   10.94 0.00000792 ***
## Residuals   27 0.04154 0.001538                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: avgDiam            Df   Sum Sq   Mean Sq F value Pr(>F)  
## treatment    5 0.009542 0.0019085   2.133 0.0919 .
## Residuals   27 0.024153 0.0008945                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: NI 
##  Variabile: avgDiam            Df  Sum Sq  Mean Sq F value  Pr(>F)   
## treatment    5 0.01634 0.003268   4.653 0.00291 **
## Residuals   30 0.02107 0.000702                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: rootVolume            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5  8.088  1.6176   3.674 0.0115 *
## Residuals   27 11.887  0.4403                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: rootVolume            Df Sum Sq Mean Sq F value Pr(>F)
## treatment    5  2.624  0.5249   0.835  0.536
## Residuals   27 16.967  0.6284               
## Inoculum: NI 
##  Variabile: rootVolume            Df Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5  14.09  2.8176   3.912 0.00754 **
## Residuals   30  21.61  0.7202                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: FRL            Df  Sum Sq Mean Sq F value   Pr(>F)    
## treatment    5 6983686 1396737   6.312 0.000531 ***
## Residuals   27 5974439  221276                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: FRL            Df   Sum Sq Mean Sq F value Pr(>F)
## treatment    5  3011803  602361   1.221  0.326
## Residuals   27 13322395  493422               
## Inoculum: NI 
##  Variabile: FRL            Df   Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5  9863776 1972755   3.905 0.00761 **
## Residuals   30 15153728  505124                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: CRL            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5   1095  219.08   3.561 0.0133 *
## Residuals   27   1661   61.52                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: CRL            Df Sum Sq Mean Sq F value Pr(>F)
## treatment    5  671.2   134.2   1.554  0.207
## Residuals   27 2332.7    86.4               
## Inoculum: NI 
##  Variabile: CRL            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5   1314   262.8   2.382 0.0621 .
## Residuals   30   3310   110.3                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: FRS            Df Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5  53385   10677   5.233 0.00174 **
## Residuals   27  55092    2040                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: FRS            Df Sum Sq Mean Sq F value Pr(>F)
## treatment    5  19112    3822   0.907  0.491
## Residuals   27 113838    4216               
## Inoculum: NI 
##  Variabile: FRS            Df Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5  75282   15056   3.747 0.00938 **
## Residuals   30 120539    4018                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: CRS            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5  769.6  153.91   3.675 0.0115 *
## Residuals   27 1130.9   41.88                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: CRS            Df Sum Sq Mean Sq F value Pr(>F)
## treatment    5  493.7   98.73   1.726  0.163
## Residuals   27 1544.6   57.21               
## Inoculum: NI 
##  Variabile: CRS            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5  890.3  178.06   2.242 0.0759 .
## Residuals   30 2383.0   79.43                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB1 
##  Variabile: FVOL            Df    Sum Sq  Mean Sq F value   Pr(>F)    
## treatment    5 183488852 36697770   6.563 0.000408 ***
## Residuals   27 150970234  5591490                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: FVOL            Df    Sum Sq  Mean Sq F value Pr(>F)
## treatment    5  51809651 10361930   0.975  0.451
## Residuals   27 287061953 10631924               
## Inoculum: NI 
##  Variabile: FVOL            Df    Sum Sq  Mean Sq F value Pr(>F)
## treatment    5 113923133 22784627   1.284  0.297
## Residuals   30 532505706 17750190               
## Inoculum: AB1 
##  Variabile: weight            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5  10.08  2.0156   3.675 0.0115 *
## Residuals   27  14.81  0.5485                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: AB21 
##  Variabile: weight            Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    5  8.066   1.613   2.231 0.0801 .
## Residuals   27 19.520   0.723                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Inoculum: NI 
##  Variabile: weight            Df Sum Sq Mean Sq F value  Pr(>F)   
## treatment    5  7.099  1.4198   5.488 0.00106 **
## Residuals   30  7.762  0.2587                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# View the ANOVA TEST Table 
datatable(result_table, 
          caption = "ANOVA TEST", 
          filter = 'top',
          options = list(scrollX = TRUE, scrollY = TRUE),
          class = "display",
          style = "bootstrap")

Permanova

Perform a PERMANOVA test for each inoculum.

PERMANOVA, (Permutational Multivariate ANOVA), is a non-parametric alternative to MANOVA, or multivariate ANOVA test. It is appropriate with multiple sets of variables that do not meet the assumptions of MANOVA, namely multivariate normality. It operates on a distance matrix constructed from any dissimilarity measure, and tests the null hypothesis that there are no differences in the relative magnitude (or presence/absence) of a set of variables among objects from different treatments or groups. P-values are calculated via permutation tests.

# Set the seed option with high values
set.seed(1234)

# Subset the data for AB21
ab21<-subset(df,result_df$inoculum=="AB21")

# Crate a distance matrix
matrix_dist_AB21<-vegdist(ab21[,c(4:11,13)],method="gower")

# Perform the permanova test
permanova_ab21 <-adonis2(sqrt(matrix_dist_AB21)~ab21$treatment,permutations = 9999)
permanova_ab21

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = sqrt(matrix_dist_AB21) ~ ab21$treatment, permutations = 9999)
##                Df SumOfSqs      R2      F Pr(>F)
## ab21$treatment  5   0.7511 0.19479 1.3063 0.1322
## Residual       27   3.1050 0.80521              
## Total          32   3.8561 1.00000

# Set the seed option with high values
set.seed(1234)

# Subset the data for AB1
ab1<-subset(df,result_df$inoculum=="AB1")

# Crate a distance matrix
matrix_dist_AB1<-vegdist(ab1[,c(4:11,13)],method="gower")

# Perform the permanova test
permanova_ab1 <-adonis2(sqrt(matrix_dist_AB1)~ab1$treatment,permutations = 9999)
permanova_ab1

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = sqrt(matrix_dist_AB1) ~ ab1$treatment, permutations = 9999)
##               Df SumOfSqs      R2      F Pr(>F)    
## ab1$treatment  5   1.4072 0.36077 3.0477 0.0001 ***
## Residual      27   2.4933 0.63923                  
## Total         32   3.9004 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Set the seed option with high values
set.seed(1234)

# Subset the data for NI
ni <-subset(df,result_df$inoculum=="NI")

# Crate a distance matrix
matrix_dist_NI<-vegdist(ni[,c(4:11,13)],method="gower")

# Perform the permanova test
permanova_ni <-adonis2(sqrt(matrix_dist_NI)~ni$treatment,permutations = 9999)
permanova_ni

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = sqrt(matrix_dist_NI) ~ ni$treatment, permutations = 9999)
##              Df SumOfSqs    R2      F Pr(>F)   
## ni$treatment  5   1.4215 0.268 2.1967 0.0029 **
## Residual     30   3.8826 0.732                 
## Total        35   5.3041 1.000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The permanova results show a significative difference between treatments in NI and AB1. There is not significative statistic difference between treatments in AB21.

Statistic analysis Castanea

Gabriele Giuseppe Antonio Satta

2023-12-04 11:40:27