# -------------------------------------
# Script: import_fadn.R
# Author: Hugo Scherer
# Purpose: Preparing FADN data for use in FarmDyn
# Version: 2.0.0
# Date: 23.01.2023
# Notes:
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------
# Source import_fadn.R
## fadn2fd ----
#' `fadn2fd()` imports the FADN data from the csv files
#' @param fadn_data FADN
#' @param mapping A vector with the names of the columns to be mapped
#' @param farmbranch Either arable or dairy
#' @param save_gdx Logical. If TRUE, it saves to gdx files
#' @return A list of dataframes with the FADN data agggregated and in p_farmData format
#' @export fadn2fd
#'
fadn2fd <- function(fadn_data, mapping, farmbranch = c("arable", "dairy"), save_gdx = FALSE) {
FADN$ID <- factor(FADN$ID)
FADN2 <- FADN
if (farmbranch != "arable") {
# Dairy conditions ----
FADN <- FADN[FADN$TF14 == 45 & FADN$SE086 >= 5, ]
# Remove observations with the lowest 10% of milk yields
# and the highest 1% of milk yields
FADN <- FADN[order(FADN$SE126), ] %>%
dplyr::slice(-c(
1:round(nrow(FADN) * 0.1),
(nrow(FADN) - round(nrow(FADN) * 0.01)):nrow(FADN)
))
} else if (farmbranch == "arable") {
# Arable conditions ----
FADN <- FADN[FADN$TF14 %in% c(15, 16, 20), ]
# Subset the data to those whose arable land is 0.98
# the sum of the area of the crops
narbl2 <- FADN[, c("ID", FADN2Dyn$`AA(ha)`), with = FALSE]
# Get the total arable area
narbl2[,
`global%nArabLand` := rowSums(.SD, na.rm = TRUE),
by = ID, .SDcols = FADN2Dyn$`AA(ha)`
]
# Select only ID and global%nArabLand
narbl2 <- narbl2[, c("ID", "global%nArabLand")]
narbl_uaa <- merge(narbl2, FADN[, c("ID", "SE025")], by = "ID")
narbl_uaa$ratio <- narbl_uaa$`global%nArabLand` / narbl_uaa$SE025
gr_cult_ids <- narbl_uaa[narbl_uaa$ratio >= 0.8, ]$ID
narbl2 <- narbl2[narbl2$ID %in% gr_cult_ids, ]
FADN <- FADN[FADN$ID %in% gr_cult_ids, ]
} else {
rlang::abort("Please select a valid option for farmbranch")
}
FADNcrops_NUTS0 <- yields_gen(FADN, FADN2, reporting = FALSE)
message("Preparing FADN data")
new_fadn <- FADN[
,
.(
FADN[,
c("ID", "COUNTRY", "TF14", "NUTS0", "NUTS1", "NUTS2", "NUTS3", "SYS02"),
with = FALSE
],
`global%Aks` = SE010,
`misc%milkPrice` = PMLKCOW_SV * 0.1 / PMLKCOW_SQ,
`global%milkYield` = SE126 / 100,
`global%farmbranchDairy` = ifelse(TF14 == 45, -1, -2),
`global%farmbranchMotherCows` = -2,
`global%farmbranchBeef` = ifelse(TF14 == 49, -1, -2),
`global%farmbranchSows` = -2,
`global%farmbranchBiogas` = -2,
`global%farmbranchFattners` = ifelse(TF14 == 51, -1, -2),
`misc%farmincome` = SE410,
`misc%net_value_added` = SE415,
`global%nCows` = SE086,
`global%nCalves` = LBOV1_AN,
`global%nFattners` = LPIGFAT_AN,
`global%nHeifs` = LHEIFBRE_AN,
`global%nSows` = LSOWBRE_AN,
`global%nGrasLand` = SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A,
`global%ShareGrassLand` = (SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A) / SE025,
`misc%UAA` = SE025,
`misc%Winterbarley` = CBRL_A,
`misc%WinterWheat` = CWHTC_A,
`misc%Potatoes` = CPOT_A,
`misc%SugarBeet` = CSUGBT_A,
`misc%WinterRape` = CRAPE_A,
`misc%SummerCere` = CCEROTH_A + COAT_A + CRYE_A,
`misc%MaizSil` = CFODMZ_A,
`misc%MaizCorn` = CMZ_A,
`misc%summerBeans` = CPEA_A,
`misc%totaloutput` = SE131,
`misc%subsidies` = SE605, # excluding investment subsidides
`misc%farm_net_income` = SE420,
`misc%own_assets` = SE436,
`misc%depreciation` = SE360,
`misc%depreciationpercow` = SE360 / SE086,
`misc%N_use` = INUSE_Q,
`misc%total_specific_costs` = SE281,
`misc%total_feed_costs` = SE310 + SE320,
`misc%contractors` = SE350,
`misc%wagespaid` = SE375,
`misc%cropprotection` = SE300,
`misc%rent` = SE375,
`misc%fertiliser` = SE295,
`misc%seedsandplants` = SE285,
`misc%othercosts` = IOVHSOTH_V,
`misc%paidinterest` = SE380,
`misc%purchasedconc` = IGRFEDCNCTRPUR_V,
`misc%machineryequipment` = SE455,
`misc%buildings` = SE450,
`misc%OrganicCode` = ORGANIC,
FADN[,
colnames(FADN)[grepl(colnames(FADN), pattern = "(^(SE).{3,4})$")],
with = FALSE
]
)
]
# Add `misc%` to Standard Results
colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")] <- paste0("misc.", colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")])
# I use % as a separator, _ is used for some variables
# (meaning that if I separate with _,
# these var names would be split in two or more),
# so I can't use that one, but % causes troubles sometimes
# and somehow R/DT does not like it, dplyr does though
colnames(new_fadn) <- gsub("\\.", "%", colnames(new_fadn))
new_fadn[is.nan(new_fadn) | new_fadn == Inf | is.na(new_fadn)] <- 0
# nuts3_yields <- FADNcrops_NUTS3 %>%
# select(-c(NUTS0, NUTS1, NUTS2))
nuts0_yields <- FADNcrops_NUTS0 %>%
select(-c(NUTS1, NUTS2, NUTS3))
third_fadn <- base::Reduce(
function(...) merge(..., by = "NUTS0", all.x = TRUE),
list(new_fadn, nuts0_yields, defaultvalsNUTS0)
)
third_fadn[is.na(third_fadn)] <- 0
# Better with dplyr, reshaping data
# (no use of sep in DT, longer and more tedious)
# alternatively: reshape
third_fadn <- tibble(third_fadn)
test_dyn <- third_fadn %>%
pivot_longer(
cols = -c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02"
),
names_sep = "%",
names_to = c("item1", "item2")
) %>%
pivot_wider(id_cols = c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02", "item1"
), names_from = "item2", values_from = "value", values_fn = sum)
test_dyn[is.na(test_dyn)] <- 0
map2gui <- test_dyn %>%
pivot_longer(
cols = 10:length(colnames(test_dyn)),
names_to = "item2",
values_to = "value"
) %>%
pivot_wider(
id_cols = 1:8,
names_from = c("item1", "item2"),
values_from = "value",
names_sep = "%"
)
# Joining all data together
map2gui[] <- lapply(map2gui, function(x) if (is.factor(x)) as.factor(x) else x)
map2gui[is.na(map2gui)] <- 0
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)] <- lapply(
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)],
as.character
)
# For each mapping, group by the mapping and summarise
# With this configuration, you could do a list of different mappings