Newer
Older
# FarmDynR.R ----
#
# Copyright (c) 2022 Hugo Scherer - Wageningen Economic Research
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
## Modes ----
#' Retrieve mode of vector
#'
#' This function returns the mode of a vector. If the vector contains a character or factor, the most common character/factor is returned. Numbers written as characters will be compatible with non-character numbers (i.e. doubles/numeric), but the function returns a character.
#'
#' @param x vector from which to retrieve the mode from.
#' @return same class as 'x'.
#' @examples
#' Modes(x = c(1, 1, 3, 0, 2, 4, 2, 1, 5, 2, 1))
#' Modes(x = c('a','b', 'c', 'a', 'c', 'a'))
#' Modes(x = c('a', 2, 'x', 7895, 1, '2', 't', 2, 1))
#' @export Modes
Modes <- function(x) { # Function found on StackOverflow made by Ken Williams and expanded by digEmAll
ux <- unique(x)
tab <- tabulate(match(x, ux))
ux[tab == max(tab)] # Takes the highest incidence value
}
## gdxbinwider ----
#' Join BIN data together, make joined dataset wider, and group by a mapping
#'
#' @description
#' This function has been conceived with the Dutch FADN in mind, please use `fadn2fd()` for EU FADN data.
#'
#' The `gdxbinwider()` function takes in a GDX file with BIN data as parameters p_farmData_NL and p_farmData2GUI, and a mapping as a set.
#' Then the data is widened, and the output is a tibble.
#'
#' @param filename Name of the GDX file with BIN data and mappings.
#' @param BINDir Directory where the FADN data is located.
#' @param gdxmap Name of the set in the GDX file that contains the mapping (e.g. Regs2BINID)
#' @param mapping Column name of the characteristic/variable to be grouped by (e.g. "Regions" or "Regs")
#' @return A tibble `tbl_df`
#' @seealso NULL
#' @export gdxbinwider
gdxbinwider <- function(filename, BINDir, gdxmap, mapping){
if ('BINDir' %in% ls(envir = .GlobalEnv) & missing(BINDir)) { # Checks if BINDir is in Global Environment and uses it
BINDir <- get('BINDir', envir = .GlobalEnv)
} else {
BINDir
}
fd2guicolnames <- c("all_binid", "item1", "item2", "value")
fdnlcolnames <- c( "year", "all_binid","item1", "value")
fd2gui <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE,
Hugo Scherer
committed
squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
fdnl <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE,
Hugo Scherer
committed
squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
gdxmapping <- (gdxrrw::rgdx.set(file.path(BINDir,filename), gdxmap, compress=FALSE, ts=FALSE,
useDomInfo = TRUE, check.names = TRUE))
# Pick entries for non-numerical globals (1.6e+303 is code for numerical globals).
dummies <- fd2gui[which(fd2gui$all_binid=="dummy"),]
nonnumdummies <- dummies[dummies$value != 1.6e+303,]
# Separation of dummy from fd2gui
fd2gui <- fd2gui[which(fd2gui$all_binid!="dummy"),]
# Widening of the data for better and easier viewing and handling
fd2gui <- fd2gui %>%
tidyr::pivot_wider(id_cols = "all_binid", names_from = c("item1", "item2"), values_from = "value", names_sep = "%")
fdnl <- fdnl %>%
tidyr::pivot_wider(id_cols = c("all_binid", "year"), names_from = c("item1"), values_from = "value", names_sep = "%")
# Since subsetting with !duplicated() is done following the data order, reorder the data to start from youngest to oldest
# Result: all_binids are preserved for youngest years (2020, 2019...) and removed from oldest years (2013, 2014...)
fdnl <- fdnl[order(-as.numeric(fdnl$year)),]
fdnl <- fdnl[!duplicated(fdnl$all_binid),]
# Note: Some of the Weights appear in 2019 (n-1), but not 2020 (n = earliest data year), leading to NAs later on. Make sure at least the latest year has weights.
# Keep list of variables that are NOT numeric, BUT binary or other variables with other meanings.
tokeep <- c(paste("global", nonnumdummies$item2, sep = "%"), "global%soilTypeFirm", "global%derogatie")
# Finding index of global to keep only NON-numerical values based on "tokeep"
keepmatch <- match(tokeep, colnames(fd2gui))
# Making these factors
fd2gui[keepmatch] <- lapply(fd2gui[keepmatch], as.factor)
# Joining all data together
fdnl2gui <- dplyr::left_join(fd2gui, fdnl[,c('all_binid', 'Weight')], by='all_binid')
fdnl2gui[] <- lapply(fdnl2gui, function(x) if(is.factor(x)) as.factor(x) else x)
map2gui <- dplyr::right_join(fdnl2gui, gdxmapping, by='all_binid')
map2gui <- map2gui %>% dplyr::group_by(dplyr::across(dplyr::all_of({{ mapping }})))
return(map2gui)
}
## gdxreshape ----
#' Reshape from wide to long and save to GDX
#'
#' @description
#' `gdxreshape()` formats the data to be saved in GDX into long format. It is imported from the gdxrrw package with a few improvements for performance and usability, since there is a risk of it being removed from the gdxrrw package in the future.
#' We would like to thank the R GAMS team for this useful function.
#'
#' @param inDF wide dataframe.
#' @param symDim Dimensions of symbol
#' @param symName Symbol name
#' @param tName "Time"
#' @param gdxName Name of gdx file
#' @param setsToo if sets too
#' @param order order of data
#' @param setNames name of sets
#'
#' @return A tibble `tbl_df`.
#' @seealso
#' \itemize{
#' \item{\code{\link[gdxrrw]{wgdx}}}{Write R data to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.lst}}}{Write multiple symbols to GDX}
#' \item{\code{\link[gdxrrw]{wgdx.reshape}}}{Write multiple symbols to GDX}
#' \item{\code{\link[tidyr]{pivot_longer}}}{Make dataframes longer}
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#' }
#' @export gdxreshape
gdxreshape <- function (inDF, symDim, symName=NULL, tName="time",
gdxName=NULL, setsToo=TRUE, order=NULL,
setNames=NULL) {
# Function based on gdxrrw::wgdx.reshape of the gdxrrw package, modified for performance and usability.
nCols <- ncol(inDF)
timeIdx <- symDim # default index position for time aggregate
if (is.null(order)) {
idCols <- 1:(symDim-1)
inDF[idCols] <- lapply(inDF[idCols], as.factor)
outDF <- (tidyr::pivot_longer(inDF, cols=-dplyr::all_of(idCols)))
}
else if ((! is.vector(order)) || (symDim != length(order))) {
stop ("specified order must be a vector of length symDim")
}
else {
timeIdx <- -1
if (is.character(order)) {
stop ("order must be numeric for now")
}
else if (! is.numeric(order)) {
stop ("optional order vector must be numeric or character")
}
idCols <- 1:(symDim-1) # for k in 1:symDim
if (any(duplicated(order))) {
stop ('duplicate entry in order vector: nonsense')
}
if ((symDim-1) != sum(order>0)) {
stop ('order vector must specify symDim-1 ID columns')
}
if (all(order>0)) {
stop ('order vector must have a non-positive entry to specify the "time" index')
}
timeIdx <- match(0, order)
oo <- c(idCols,(1:nCols)[-idCols])
df2 <- inDF[oo]
idCols <- 1:(symDim-1)
df2[idCols] <- lapply(df2[idCols], factor)
Loading full blame...