# FarmDynR.R ----
#
# Copyright (c) 2022 Hugo Scherer - Wageningen Economic Research
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
## Modes ----
#' Retrieve mode of vector
#'
#' This function returns the mode of a vector. If the vector contains a character or factor, the most common character/factor is returned. Numbers written as characters will be compatible with non-character numbers (i.e. doubles/numeric), but the function returns a character.
#'
#' @param x vector from which to retrieve the mode from.
#' @return same class as 'x'.
#' @examples
#' Modes(x = c(1, 1, 3, 0, 2, 4, 2, 1, 5, 2, 1))
#' Modes(x = c('a','b', 'c', 'a', 'c', 'a'))
#' Modes(x = c('a', 2, 'x', 7895, 1, '2', 't', 2, 1))
#' @seealso [tabulate()]
#' @export Modes
Modes <- function(x) { # Function found on StackOverflow made by Ken Williams and expanded by digEmAll
ux <- unique(x)
tab <- tabulate(match(x, ux))
ux[tab == max(tab)] # Takes the highest incidence value
}
## gdxbinwider ----
#' Join BIN data together, make joined dataset wider, and group by a mapping
#'
#' @description
#' This function has been conceived with the Dutch FADN in mind, please use `fadn2fd()` for EU FADN data.
#'
#' The `gdxbinwider()` function takes in a GDX file with BIN data as parameters p_farmData_NL and p_farmData2GUI, and a mapping as a set.
#' Then the data is widened, and the output is a tibble.
#'
#' @param filename Name of the GDX file with BIN data and mappings.
#' @param BINDir Directory where the FADN data is located.
#' @param gdxmap Name of the set in the GDX file that contains the mapping (e.g. Regs2BINID)
#' @param mapping Column name of the characteristic/variable to be grouped by (e.g. "Regions" or "Regs")
#' @return A tibble `tbl_df`.
#' @examples
#' BINDir <- "inst/extdata/GAMS"
#' datafile <- 'FarmDynRexampledata.gdx'
#' gdxbinwider(datafile, BINDir, 'map2binid', 'mapping')
#' @seealso
#' \itemize{
#' \item{\code{\link[gdxrrw]{rgdx.param}}}{Load GDX parameters}
#' \item{\code{\link[gdxrrw]{rgdx.set}}}{Load GDX sets}
#' \item{\code{\link[tidyr]{pivot_wider}}}{Make dataframes wider}
#' }
#' @export gdxbinwider
gdxbinwider <- function(filename, BINDir, gdxmap, mapping){
if ('BINDir' %in% ls(envir = .GlobalEnv) & missing(BINDir)) { # Checks if BINDir is in Global Environment and uses it
BINDir <- get('BINDir', envir = .GlobalEnv)
} else {
BINDir
}
#TODO Make functional column names with if statements and regex given the values of the columns when loading (e.g. if column value == 20.., colname = year)
fd2guicolnames <- c("all_binid", "item1", "item2", "value")
fdnlcolnames <- c( "year", "all_binid","item1", "value")
fd2gui <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE,
squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
fdnl <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE,
squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
#TODO Make gdxmapping compatible with multiple mappings, with lapply? for loop?
# first idea: gdxmapping[] <- lapply(gdxmapping, rgdx.set(...)); gdxmapping <- list(), for i in gdxmap...
gdxmapping <- (gdxrrw::rgdx.set(file.path(BINDir,filename), gdxmap, compress=FALSE, ts=FALSE,
useDomInfo = TRUE, check.names = TRUE))
# Pick entries for non-numerical globals (1.6e+303 is code for numerical globals).
dummies <- fd2gui[which(fd2gui$all_binid=="dummy"),]
nonnumdummies <- dummies[dummies$value != 1.6e+303,]
# Separation of dummy from fd2gui
fd2gui <- fd2gui[which(fd2gui$all_binid!="dummy"),]
# Widening of the data for better and easier viewing and handling
fd2gui <- fd2gui %>%
tidyr::pivot_wider(id_cols = "all_binid", names_from = c("item1", "item2"), values_from = "value", names_sep = "%")
fdnl <- fdnl %>%
tidyr::pivot_wider(id_cols = c("all_binid", "year"), names_from = c("item1"), values_from = "value", names_sep = "%")
# Since subsetting with !duplicated() is done following the data order, reorder the data to start from youngest to oldest
# Result: all_binids are preserved for youngest years (2020, 2019...) and removed from oldest years (2013, 2014...)
fdnl <- fdnl[order(-as.numeric(fdnl$year)),]
fdnl <- fdnl[!duplicated(fdnl$all_binid),]
# Note: Some of the Weights appear in 2019 (n-1), but not 2020 (n = earliest data year), leading to NAs later on. Make sure at least the latest year has weights.
# Keep list of variables that are NOT numeric, BUT binary or other variables with other meanings.
tokeep <- c(paste("global", nonnumdummies$item2, sep = "%"), "global%soilTypeFirm", "global%derogatie")
# Finding index of global to keep only NON-numerical values based on "tokeep"
keepmatch <- match(tokeep, colnames(fd2gui))
# Making these factors
fd2gui[keepmatch] <- lapply(fd2gui[keepmatch], as.factor)
# Joining all data together
fdnl2gui <- dplyr::left_join(fd2gui, fdnl[,c('all_binid', 'Weight')], by='all_binid')
fdnl2gui[] <- lapply(fdnl2gui, function(x) if(is.factor(x)) as.factor(x) else x)
map2gui <- dplyr::right_join(fdnl2gui, gdxmapping, by='all_binid')
map2gui <- map2gui %>% dplyr::group_by(dplyr::across(dplyr::all_of({{ mapping }})))
return(map2gui)
}
## gdxreshape ----
#' Reshape from wide to long and save to GDX
#'
#' @description
#' `gdxreshape()` formats the data to be saved in GDX into long format. It is imported from the gdxrrw package with a few improvements for performance and usability, since there is a risk of it being removed from the gdxrrw package in the future.
#' We would like to thank the R GAMS team for this useful function.
#'
#' @param inDF wide dataframe.
#' @param symDim wide dataframe.
#' @param symName wide dataframe.
#' @param tName wide dataframe.
#' @param gdxName wide dataframe.
#' @param setsToo wide dataframe.
#' @param order wide dataframe.
#' @param setNames wide dataframe.
#'
#' @return A tibble `tbl_df`.
#' @seealso
#' \itemize{
#' \item{\code{\link[gdxrrw]{wgdx}}}{Write R data to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.lst}}}{Write multiple symbols to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.reshape}}}{Write multiple symbols to GDX}
#' \item{\code{\link[tidyr]{pivot_longer}}}{Make dataframes longer}
#' }
#' @export gdxreshape
gdxreshape <- function (inDF, symDim, symName=NULL, tName="time",
gdxName=NULL, setsToo=TRUE, order=NULL,
setNames=NULL) {
# Function based on gdxrrw::wgdx.reshape of the gdxrrw package, modified for performance and usability.
nCols <- ncol(inDF)
timeIdx <- symDim # default index position for time aggregate
if (is.null(order)) {
idCols <- 1:(symDim-1)
inDF[idCols] <- lapply(inDF[idCols], as.factor)
outDF <- (tidyr::pivot_longer(inDF, cols=-dplyr::all_of(idCols)))
}
else if ((! is.vector(order)) || (symDim != length(order))) {
stop ("specified order must be a vector of length symDim")
}
else {
timeIdx <- -1
if (is.character(order)) {
stop ("order must be numeric for now")
}
else if (! is.numeric(order)) {
stop ("optional order vector must be numeric or character")
}
idCols <- 1:(symDim-1) # for k in 1:symDim
if (any(duplicated(order))) {
stop ('duplicate entry in order vector: nonsense')
}
if ((symDim-1) != sum(order>0)) {
stop ('order vector must specify symDim-1 ID columns')