GitLab at IIASA

import_fadn.R 10.8 KiB
Newer Older
# -------------------------------------
# Script: import_fadn.R
# Author: Hugo Scherer
# Purpose: Preparing FADN data for use in FarmDyn
# Version: 2.0.0
# Date: 23.01.2023
# Notes:
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------

# Source import_fadn.R
## fadn2fd ----
#' `fadn2fd()` imports the FADN data from the csv files
#' @param fadn_data FADN
#' @param mapping A vector with the names of the columns to be mapped
#' @param farmbranch Either arable or dairy
#' @param save_gdx Logical. If TRUE, it saves to gdx files
#' @return A list of dataframes with the FADN data agggregated and in p_farmData format
#' @export fadn2fd
#'
fadn2fd <- function(fadn_data, mapping, farmbranch = c("arable", "dairy"), save_gdx = FALSE) {
  FADN$ID <- factor(FADN$ID)

  FADN2 <- FADN

  if (farmbranch != "arable") {
    # Dairy conditions ----
    FADN <- FADN[FADN$TF14 == 45 & FADN$SE086 >= 5, ]

    # Remove observations with the lowest 10% of milk yields
    # and the highest 1% of milk yields

    FADN <- FADN[order(FADN$SE126), ] %>%
      dplyr::slice(-c(
        1:round(nrow(FADN) * 0.1),
        (nrow(FADN) - round(nrow(FADN) * 0.01)):nrow(FADN)
      ))
  } else if (farmbranch == "arable") {
    # Arable conditions ----
    FADN <- FADN[FADN$TF14 %in% c(15, 16, 20), ]

    # Subset the data to those whose arable land is 0.98
    # the sum of the area of the crops
    narbl2 <- FADN[, c("ID", FADN2Dyn$`AA(ha)`), with = FALSE]

    # Get the total arable area
    narbl2[,
      `global%nArabLand` := rowSums(.SD, na.rm = TRUE),
      by = ID, .SDcols = FADN2Dyn$`AA(ha)`
    ]

    # Select only ID and global%nArabLand
    narbl2 <- narbl2[, c("ID", "global%nArabLand")]

    narbl_uaa <- merge(narbl2, FADN[, c("ID", "SE025")], by = "ID")

    narbl_uaa$ratio <- narbl_uaa$`global%nArabLand` / narbl_uaa$SE025
    gr_cult_ids <- narbl_uaa[narbl_uaa$ratio >= 0.8, ]$ID
    narbl2 <- narbl2[narbl2$ID %in% gr_cult_ids, ]
    FADN <- FADN[FADN$ID %in% gr_cult_ids, ]
  } else {
    rlang::abort("Please select a valid option for farmbranch")
  }

  yield_gen(FADN, FADN2, reporting = FALSE)

  message("Preparing FADN data")


  new_fadn <- FADN[
    ,
    .(
      FADN[,
        c("ID", "COUNTRY", "TF14", "NUTS0", "NUTS1", "NUTS2", "NUTS3", "SYS02"),
        with = FALSE
      ],
      `global%Aks` = SE010,
      `misc%milkPrice` = PMLKCOW_SV * 0.1 / PMLKCOW_SQ,
      `global%milkYield` = SE126 / 100,
      `global%farmbranchDairy` = ifelse(TF14 == 45, -1, -2),
      `global%farmbranchMotherCows` = -2,
      `global%farmbranchBeef` = ifelse(TF14 == 49, -1, -2),
      `global%farmbranchSows` = -2,
      `global%farmbranchBiogas` = -2,
      `global%farmbranchFattners` = ifelse(TF14 == 51, -1, -2),
      `misc%farmincome` = SE410,
      `misc%net_value_added` = SE415,
      `global%nCows` = SE086,
      `global%nCalves` = LBOV1_AN,
      `global%nFattners` = LPIGFAT_AN,
      `global%nHeifs` = LHEIFBRE_AN,
      `global%nSows` = LSOWBRE_AN,
      `global%nGrasLand` = SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A,
      `global%ShareGrassLand` = (SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A) / SE025,
      `misc%UAA` = SE025,
      `misc%Winterbarley` = CBRL_A,
      `misc%WinterWheat` = CWHTC_A,
      `misc%Potatoes` = CPOT_A,
      `misc%SugarBeet` = CSUGBT_A,
      `misc%WinterRape` = CRAPE_A,
      `misc%SummerCere` = CCEROTH_A + COAT_A + CRYE_A,
      `misc%MaizSil` = CFODMZ_A,
      `misc%MaizCorn` = CMZ_A,
      `misc%summerBeans` = CPEA_A,
      `misc%totaloutput` = SE131,
      `misc%subsidies` = SE605, # excluding investment subsidides
      `misc%farm_net_income` = SE420,
      `misc%own_assets` = SE436,
      `misc%depreciation` = SE360,
      `misc%depreciationpercow` = SE360 / SE086,
      `misc%N_use` = INUSE_Q,
      `misc%total_specific_costs` = SE281,
      `misc%total_feed_costs` = SE310 + SE320,
      `misc%contractors` = SE350,
      `misc%wagespaid` = SE375,
      `misc%cropprotection` = SE300,
      `misc%rent` = SE375,
      `misc%fertiliser` = SE295,
      `misc%seedsandplants` = SE285,
      `misc%othercosts` = IOVHSOTH_V,
      `misc%paidinterest` = SE380,
      `misc%purchasedconc` = IGRFEDCNCTRPUR_V,
      `misc%machineryequipment` = SE455,
      `misc%buildings` = SE450,
      `misc%OrganicCode` = ORGANIC,
      FADN[,
        colnames(FADN)[grepl(colnames(FADN), pattern = "(^(SE).{3,4})$")],
        with = FALSE
      ]
    )
  ]

  # Add `misc%` to Standard Results
  colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")] <- paste0("misc.", colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")])

  # I use % as a separator, _ is used for some variables
  # (meaning that if I separate with _,
  # these var names would be split in two or more),
  # so I can't use that one, but % causes troubles sometimes
  # and somehow R/DT does not like it, dplyr does though
  colnames(new_fadn) <- gsub("\\.", "%", colnames(new_fadn))
  new_fadn[is.nan(new_fadn) | new_fadn == Inf | is.na(new_fadn)] <- 0

  nuts3_yields <- FADNcrops_NUTS3 %>%
    select(-c(NUTS0, NUTS1, NUTS2))

  nuts0_yields <- FADNcrops_NUTS0 %>%
    select(-c(NUTS1, NUTS2, NUTS3))

  third_fadn <- base::Reduce(
    function(...) merge(..., by = "NUTS0", all.x = TRUE),
    list(new_fadn, nuts0_yields, defaultvalsNUTS0)
  )

  third_fadn[is.na(third_fadn)] <- 0

  # Better with dplyr, reshaping data
  # (no use of sep in DT, longer and more tedious)
  # alternatively: reshape
  third_fadn <- tibble(third_fadn)
  test_dyn <- third_fadn %>%
    pivot_longer(
      cols = -c(
        "NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02"
      ),
      names_sep = "%",
      names_to = c("item1", "item2")
    ) %>%
    pivot_wider(id_cols = c(
      "NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02", "item1"
    ), names_from = "item2", values_from = "value", values_fn = sum)

  test_dyn[is.na(test_dyn)] <- 0

  map2gui <- test_dyn %>%
    pivot_longer(
      cols = 10:length(colnames(test_dyn)),
      names_to = "item2",
      values_to = "value"
    ) %>%
    pivot_wider(
      id_cols = 1:8,
      names_from = c("item1", "item2"),
      values_from = "value",
      names_sep = "%"
    )

  # Joining all data together
  map2gui[] <- lapply(map2gui, function(x) if (is.factor(x)) as.factor(x) else x)

  map2gui[is.na(map2gui)] <- 0

  map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)] <- lapply(
    map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)],
    as.character
  )

  # For each mapping, group by the mapping and summarise
  # With this configuration, you could do a list of different mappings
  # Create a list to store the results
  map2gui_list <- vector("list", length = length(mapping))

  message("Aggregating to ", length(mapping), " different mappings")
  for (map in seq_along(mapping)) {
    map2gui_list[[map]] <- map2gui %>%
      dplyr::group_by(
        across(mapping[[map]])
      ) %>%
      select(-c(ID, COUNTRY, TF14, NUTS0, NUTS1, NUTS2, NUTS3)) %>%
      summarise(
        dplyr::across(
          c(everything(), -SYS02),
          ~ if (is.numeric(.)) {
            weighted.mean(., na.rm = TRUE, w = as.numeric(as.character(SYS02)))
          } else {
            Modes(.)[1]
          }
        ),
        `misc%nFarms` = n(),
        # Weights are correct, tested
        `misc%weights` = sum(as.numeric(as.character(SYS02)))
      )
  }

  # Concatenate the items in the list of list mapping
  mapping <- lapply(mapping, paste0, collapse = "_")

  # Name the list elements
  names(map2gui_list) <- paste("map2gui", mapping, sep = "_")

  # Set the column name to farmIds
  map2gui_list <- map2gui_list %>%
    purrr::map2(
      ., seq_along(mapping), ~ dplyr::rename_with(
        .x, ~"farmIds", all_of(.y)
      )
    )


  # Add conditional variables to all NUTS regions
  map2gui_list <- purrr::map(map2gui_list, ~ .x %>%
    group_by(farmIds) %>%
    mutate(
      `global%nArabLand` = sum(across(paste0("misc%", german_crops))),
      `global%farmbranchArable` = ifelse(`global%nArabLand` > 0, -1, -2)
    ))

  # For all map2guiNUTSx, remove observations with n < 15

  # for (i in 0:3) {
  #  assign(
  #    paste0("map2guiNUTS", i),
  #    assign(
  #      paste0("map2guiNUTS", i),
  #      get(paste0("map2guiNUTS", i))[get(paste0("map2guiNUTS", i))$`misc%nFarms` >= 15, ]
  #    )
  #  )
  # }

  # Condition to see if any of the rowSums of maxrot per farmIds is not equal to 1 and show farmId for which the condition is true
  # might throw an NA there sometimes but it is not a true NA

  # for (i in seq_along(map2gui_list)) {
  #    if (any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98) |
  #        any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) > 1.02)) {
  #        warning(
  #            paste(map2gui_list[[i]][
  #                rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98, ,
  #            ]$farmIds, "has a problem with maxrot (sum not equal to 1)\n")
  #        )
  #    }
  #    if (any(is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])))) {
  #        warning(
  #            paste(map2gui_list[[i]][
  #                is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])), ,
  #            ]$farmIds, "has a problem with maxrot (NaN), check if there are any crops")
  #        )
  #    }
  # }


  for (i in seq_along(map2gui_list)) {
    setDT(map2gui_list[[i]])
    map2gui_list[[i]][, paste0("maxrot%", german_crops) := lapply(.SD, function(x) x / `global%nArabLand`), .SDcols = paste0("misc%", german_crops), by = farmIds]
    # Convert nonumericals into numerics
    cols_to_convert <- colnames(map2gui_list[[i]])[colnames(map2gui_list[[i]]) %in% paste0("global%", nonnumericals)]
    map2gui_list[[i]][, (cols_to_convert) := lapply(.SD, as.numeric), .SDcols = cols_to_convert]
    cols_to_melt <- setdiff(colnames(map2gui_list[[i]]), "farmIds")
    # Pivot from wide to long and then wide again for all map2guiNUTS
    map2gui_list[[i]] <- melt(map2gui_list[[i]], id.vars = "farmIds", measure.vars = cols_to_melt, variable.name = "item", value.name = "value")
    map2gui_list[[i]][, c("item1", "item2") := tstrsplit(item, split = "%", fixed = TRUE)]
    map2gui_list[[i]] <- dcast(map2gui_list[[i]], farmIds + item1 ~ item2, value.var = "value")
    # Replace NAs with 0
    map2gui_list[[i]][is.na(map2gui_list[[i]])] <- 0
  }

  # Add to gdx p_farmdata
  if (save_gdx == TRUE) {
    for (map in seq_along(map2gui_list)) {
      gdxreshape(
        map2gui_list[[map]],
        symDim = 3, order = c(1, 2, 0),
        gdxName = file.path(
          "data",
          "farmdata", paste("farmData", farmbranch, mapping[map], sep = "_")
        ),
        symName = "p_farmData"
      )
    }
  } else {
    return(map2gui_list)
  }
}