Hugo Scherer's avatar
Hugo Scherer committed
# FarmDynR.R ----
#
# Copyright (c) 2022 Hugo Scherer - Wageningen Economic Research
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.


## Modes ----
#' Retrieve mode of vector
#'
#' This function returns the mode of a vector. If the vector contains a character or factor, the most common character/factor is returned. Numbers written as characters will be compatible with non-character numbers (i.e. doubles/numeric), but the function returns a character.
#'
#' @param x vector from which to retrieve the mode from.
#' @return same class as 'x'.
#' @examples
#' Modes(x = c(1, 1, 3, 0, 2, 4, 2, 1, 5, 2, 1))
#' Modes(x = c('a','b', 'c', 'a', 'c', 'a'))
#' Modes(x = c('a', 2, 'x', 7895, 1, '2', 't', 2, 1))
#' @seealso tabulate()
Hugo Scherer's avatar
Hugo Scherer committed
#' @export Modes
Modes <- function(x) { # Function found on StackOverflow made by Ken Williams and expanded by digEmAll
  ux <- unique(x)
  tab <- tabulate(match(x, ux))
  ux[tab == max(tab)] # Takes the highest incidence value
}


## gdxbinwider ----
#' Join BIN data together, make joined dataset wider, and group by a mapping
#'
#' @description
#' This function has been conceived with the Dutch FADN in mind, please use `fadn2fd()` for EU FADN data.
#' 
Hugo Scherer's avatar
Hugo Scherer committed
#' The `gdxbinwider()` function takes in a GDX file with BIN data as parameters p_farmData_NL and p_farmData2GUI, and a mapping as a set.
#' Then the data is widened, and the output is a tibble.
#'
#' @param filename Name of the GDX file with BIN data and mappings.
#' @param BINDir Directory where the FADN data is located.
#' @param gdxmap Name of the set in the GDX file that contains the mapping (e.g. Regs2BINID)
#' @param mapping Column name of the characteristic/variable to be grouped by (e.g. "Regions" or "Regs")
#' @return A tibble `tbl_df`
#' @seealso NULL
Hugo Scherer's avatar
Hugo Scherer committed
#' @export gdxbinwider
gdxbinwider <- function(filename, BINDir, gdxmap, mapping){

  if ('BINDir' %in% ls(envir = .GlobalEnv) & missing(BINDir)) { # Checks if BINDir is in Global Environment and uses it
    BINDir <- get('BINDir', envir = .GlobalEnv)
  } else {
    BINDir
  }

fd2guicolnames <- c("all_binid", "item1", "item2", "value")

fdnlcolnames <- c( "year", "all_binid","item1", "value")
Hugo Scherer's avatar
Hugo Scherer committed

fd2gui <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE,
                                   squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
Hugo Scherer's avatar
Hugo Scherer committed

fdnl <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE,
                                 squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
Hugo Scherer's avatar
Hugo Scherer committed


gdxmapping <- (gdxrrw::rgdx.set(file.path(BINDir,filename), gdxmap, compress=FALSE, ts=FALSE,
                                     useDomInfo = TRUE, check.names = TRUE))

# Pick entries for non-numerical globals (1.6e+303 is code for numerical globals).

dummies <- fd2gui[which(fd2gui$all_binid=="dummy"),]

nonnumdummies <- dummies[dummies$value != 1.6e+303,]

# Separation of dummy from fd2gui

fd2gui <- fd2gui[which(fd2gui$all_binid!="dummy"),]

# Widening of the data for better and easier viewing and handling

fd2gui <- fd2gui %>%
  tidyr::pivot_wider(id_cols = "all_binid", names_from = c("item1", "item2"), values_from = "value", names_sep = "%")


fdnl <- fdnl %>%
  tidyr::pivot_wider(id_cols = c("all_binid", "year"), names_from = c("item1"), values_from = "value", names_sep = "%")

# Since subsetting with !duplicated() is done following the data order, reorder the data to start from youngest to oldest
# Result: all_binids are preserved for youngest years (2020, 2019...) and removed from oldest years (2013, 2014...)

fdnl <- fdnl[order(-as.numeric(fdnl$year)),]

fdnl <- fdnl[!duplicated(fdnl$all_binid),]

# Note: Some of the Weights appear in 2019 (n-1), but not 2020 (n = earliest data year), leading to NAs later on. Make sure at least the latest year has weights.

# Keep list of variables that are NOT numeric, BUT binary or other variables with other meanings.

tokeep <- c(paste("global", nonnumdummies$item2, sep = "%"), "global%soilTypeFirm", "global%derogatie")

# Finding index of global to keep only NON-numerical values based on "tokeep"

keepmatch <- match(tokeep, colnames(fd2gui))

# Making these factors
fd2gui[keepmatch] <- lapply(fd2gui[keepmatch], as.factor)


# Joining all data together

fdnl2gui <- dplyr::left_join(fd2gui, fdnl[,c('all_binid', 'Weight')], by='all_binid')

fdnl2gui[] <- lapply(fdnl2gui, function(x) if(is.factor(x)) as.factor(x) else x)

map2gui <- dplyr::right_join(fdnl2gui, gdxmapping, by='all_binid')

map2gui <- map2gui %>% dplyr::group_by(dplyr::across(dplyr::all_of({{ mapping }})))

return(map2gui)
}


## gdxreshape ----
#' Reshape from wide to long and save to GDX
#'
#' @description
#' `gdxreshape()` formats the data to be saved in GDX into long format. It is imported from the gdxrrw package with a few improvements for performance and usability, since there is a risk of it being removed from the gdxrrw package in the future.
#' We would like to thank the R GAMS team for this useful function.
#'
#' @param inDF wide dataframe.
#' @param symDim Dimensions of symbol
#' @param symName Symbol name
#' @param tName "Time"
#' @param gdxName Name of gdx file
#' @param setsToo if sets too
#' @param order order of data
#' @param setNames name of sets
Hugo Scherer's avatar
Hugo Scherer committed
#'
#' @return A tibble `tbl_df`.
#' @seealso
#' \itemize{
#' \item{\code{\link[gdxrrw]{wgdx}}}{Write R data to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.lst}}}{Write multiple symbols to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.reshape}}}{Write multiple symbols to GDX}
#' \item{\code{\link[tidyr]{pivot_longer}}}{Make dataframes longer}
Hugo Scherer's avatar
Hugo Scherer committed
#' }
#' @export gdxreshape
gdxreshape <- function (inDF, symDim, symName=NULL, tName="time",
                        gdxName=NULL, setsToo=TRUE, order=NULL,
                        setNames=NULL) {
  # Function based on gdxrrw::wgdx.reshape of the gdxrrw package, modified for performance and usability.

  nCols <- ncol(inDF)
  timeIdx <- symDim                     # default index position for time aggregate
  if (is.null(order)) {
    idCols <- 1:(symDim-1)

    inDF[idCols] <- lapply(inDF[idCols], as.factor)

    outDF <- (tidyr::pivot_longer(inDF, cols=-dplyr::all_of(idCols)))
  }
  else if ((! is.vector(order)) || (symDim != length(order))) {
    stop ("specified order must be a vector of length symDim")
  }
  else {
    timeIdx <- -1
    if (is.character(order)) {
      stop ("order must be numeric for now")
    }
    else if (! is.numeric(order)) {
      stop ("optional order vector must be numeric or character")
    }

    idCols <- 1:(symDim-1)                                   # for k in 1:symDim
    if (any(duplicated(order))) {
      stop ('duplicate entry in order vector: nonsense')
    }

    if ((symDim-1) != sum(order>0)) {
      stop ('order vector must specify symDim-1 ID columns')
    }
    if (all(order>0)) {
      stop ('order vector must have a non-positive entry to specify the "time" index')
    }

    timeIdx <- match(0, order)

    oo <- c(idCols,(1:nCols)[-idCols])
    df2 <- inDF[oo]
    idCols <- 1:(symDim-1)

    df2[idCols] <- lapply(df2[idCols], factor)