# -------------------------------------
# Script: import_fadn.R
# Author: Hugo Scherer
# Purpose: Preparing FADN data for use in FarmDyn
# Version: 2.0.0
# Date: 23.01.2023
# Notes:
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------

# Source import_fadn.R
## fadn2fd ----
#' `fadn2fd()` imports the FADN data from the csv files
#' @param fadn_data FADN
#' @param mapping A vector with the names of the columns to be mapped
#' @param farmbranch Either arable or dairy
#' @param save_gdx Logical. If TRUE, it saves to gdx files
#' @param rm_lown Logical. If TRUE, it removes farms with less than 15 observations
#' @return A list of dataframes with the FADN data agggregated and in p_farmData format
#' @export fadn2fd
#'
fadn2fd <- function(fadn_data, mapping, farmbranch = c("arable", "dairy"), save_gdx = FALSE, rm_lown = FALSE) {
  FADN$ID <- factor(FADN$ID)

  FADN2 <- FADN

  if (farmbranch != "arable") {
    # Dairy conditions ----
    FADN <- FADN[FADN$TF14 == 45 & FADN$SE086 >= 5, ]

    # Remove observations with the lowest 10% of milk yields
    # and the highest 1% of milk yields

    FADN <- FADN[order(FADN$SE126), ] %>%
      dplyr::slice(-c(
        1:round(nrow(FADN) * 0.1),
        (nrow(FADN) - round(nrow(FADN) * 0.01)):nrow(FADN)
      ))
  } else if (farmbranch == "arable") {
    # Arable conditions ----
    FADN <- FADN[FADN$TF14 %in% c(15, 16, 20), ]

    # Subset the data to those whose arable land is 0.98
    # the sum of the area of the crops
    narbl2 <- FADN[, c("ID", FADN2Dyn$`AA(ha)`), with = FALSE]

    # Get the total arable area
    narbl2[,
      `global%nArabLand` := rowSums(.SD, na.rm = TRUE),
      by = ID, .SDcols = FADN2Dyn$`AA(ha)`
    ]

    # Select only ID and global%nArabLand
    narbl2 <- narbl2[, c("ID", "global%nArabLand")]

    narbl_uaa <- merge(narbl2, FADN[, c("ID", "SE025")], by = "ID")

    narbl_uaa$ratio <- narbl_uaa$`global%nArabLand` / narbl_uaa$SE025
    gr_cult_ids <- narbl_uaa[narbl_uaa$ratio >= 0.8, ]$ID
    narbl2 <- narbl2[narbl2$ID %in% gr_cult_ids, ]
    FADN <- FADN[FADN$ID %in% gr_cult_ids, ]
  } else {
    rlang::abort("Please select a valid option for farmbranch")
  }

Hugo Scherer's avatar
Hugo Scherer committed
  FADNcrops_NUTS0 <- yields_gen(FADN, FADN2, reporting = FALSE)

  message("Preparing FADN data")


  new_fadn <- FADN[
    ,
    .(
      FADN[,
        c("ID", "COUNTRY", "TF14", "NUTS0", "NUTS1", "NUTS2", "NUTS3", "SYS02"),
        with = FALSE
      ],
      `global%Aks` = SE010,
      `misc%milkPrice` = PMLKCOW_SV * 0.1 / PMLKCOW_SQ,
      `global%milkYield` = SE126 / 100,
      `global%farmbranchDairy` = ifelse(TF14 == 45, -1, -2),
      `global%farmbranchMotherCows` = -2,
      `global%farmbranchBeef` = ifelse(TF14 == 49, -1, -2),
      `global%farmbranchSows` = -2,
      `global%farmbranchBiogas` = -2,
      `global%farmbranchFattners` = ifelse(TF14 == 51, -1, -2),
      `misc%farmincome` = SE410,
      `misc%net_value_added` = SE415,
      `global%nCows` = SE086,
      `global%nCalves` = LBOV1_AN,
      `global%nFattners` = LPIGFAT_AN,
      `global%nHeifs` = LHEIFBRE_AN,
      `global%nSows` = LSOWBRE_AN,
      `global%nGrasLand` = SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A,
      `global%ShareGrassLand` = (SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A) / SE025,
      `misc%UAA` = SE025,
      `misc%Winterbarley` = CBRL_A,
      `misc%WinterWheat` = CWHTC_A,
      `misc%Potatoes` = CPOT_A,
      `misc%SugarBeet` = CSUGBT_A,
      `misc%WinterRape` = CRAPE_A,
      `misc%SummerCere` = CCEROTH_A + COAT_A + CRYE_A,
      `misc%MaizSil` = CFODMZ_A,
      `misc%MaizCorn` = CMZ_A,
      `misc%summerBeans` = CPEA_A,
      `misc%totaloutput` = SE131,
      `misc%subsidies` = SE605, # excluding investment subsidides
      `misc%farm_net_income` = SE420,
      `misc%own_assets` = SE436,
      `misc%depreciation` = SE360,
      `misc%depreciationpercow` = SE360 / SE086,
      `misc%N_use` = INUSE_Q,
      `misc%total_specific_costs` = SE281,
      `misc%total_feed_costs` = SE310 + SE320,
      `misc%contractors` = SE350,
      `misc%wagespaid` = SE375,
      `misc%cropprotection` = SE300,
      `misc%rent` = SE375,
      `misc%fertiliser` = SE295,
      `misc%seedsandplants` = SE285,
      `misc%othercosts` = IOVHSOTH_V,
      `misc%paidinterest` = SE380,
      `misc%purchasedconc` = IGRFEDCNCTRPUR_V,
      `misc%machineryequipment` = SE455,
      `misc%buildings` = SE450,
      `misc%OrganicCode` = ORGANIC,
      `fadn%SIZ6` = SIZ6,
      FADN[,
        colnames(FADN)[grepl(colnames(FADN), pattern = "(^(SE).{3,4})$")],
        with = FALSE
      ]
    )
  ]

  # Add `misc%` to Standard Results
  colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")] <- paste0("fadn.", colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")])

  # I use % as a separator, _ is used for some variables
  # (meaning that if I separate with _,
  # these var names would be split in two or more),
  # so I can't use that one, but % causes troubles sometimes
  # and somehow R/DT does not like it, dplyr does though
  colnames(new_fadn) <- gsub("\\.", "%", colnames(new_fadn))
  new_fadn[is.nan(new_fadn) | new_fadn == Inf | is.na(new_fadn)] <- 0

Hugo Scherer's avatar
Hugo Scherer committed
  #  nuts3_yields <- FADNcrops_NUTS3 %>%
  #    select(-c(NUTS0, NUTS1, NUTS2))

  nuts0_yields <- FADNcrops_NUTS0 %>%
    select(-c(NUTS1, NUTS2, NUTS3))

  third_fadn <- base::Reduce(
    function(...) merge(..., by = "NUTS0", all.x = TRUE),
    list(new_fadn, nuts0_yields, defaultvalsNUTS0)
  )

  third_fadn[is.na(third_fadn)] <- 0

  # Better with dplyr, reshaping data
  # (no use of sep in DT, longer and more tedious)
  # alternatively: reshape
  third_fadn <- tibble(third_fadn)
  test_dyn <- third_fadn %>%
    pivot_longer(
      cols = -c(
        "NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02"
      ),
      names_sep = "%",
      names_to = c("item1", "item2")
    ) %>%
    pivot_wider(id_cols = c(
      "NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02", "item1"
    ), names_from = "item2", values_from = "value", values_fn = sum)

  test_dyn[is.na(test_dyn)] <- 0

  map2gui <- test_dyn %>%
    pivot_longer(
      cols = 10:length(colnames(test_dyn)),
      names_to = "item2",
      values_to = "value"
    ) %>%
    pivot_wider(
      id_cols = 1:8,
      names_from = c("item1", "item2"),
      values_from = "value",
      names_sep = "%"
    )

  # Joining all data together
  map2gui[] <- lapply(map2gui, function(x) if (is.factor(x)) as.factor(x) else x)

  map2gui[is.na(map2gui)] <- 0

  map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)] <- lapply(
    map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)],
    as.character
  )