Package: FarmDynR Package: FarmDynR
Type: Package Type: Package
Title: Run FarmDyn from R and create sample farms Title: Run FarmDyn from R and create sample farms
Date: 2022-10-03 Date: 2023-12-07
Version: 0.2.0 Version: 0.5.0
Authors@R: c(person('Hugo', 'Scherer', role=c('aut', 'cre'), email="hugo.scherer@wur.nl"), Authors@R: c(person('Hugo', 'Scherer', role=c('aut', 'cre'), email="hugo.scherer@wur.nl"),
person('Marc', 'Müller', role='ctb', email="marc.muller@wur.nl")) person('Marc', 'Müller', role='ctb', email="marc.muller@wur.nl"))
Author: Hugo Scherer [aut, cre] Author: Hugo Scherer [aut, cre]
Marc Müller [ctb] Marc Müller [ctb]
Maintainer: Hugo Scherer <hugo.scherer@wur.nl> Maintainer: Hugo Scherer <hugo.scherer@wur.nl>
Description: This package allows the user to run FarmDyn from R and includes useful functions such as: Description: This package allows the user to run FarmDyn from R, create sample farms from FADN data for use in FarmDyn, and includes useful functions to work with the results from FarmDyn.
Modes() to retrieve the mode of any set of numbers or characters in a dataset or vector, and
groupstats() to generate descriptive statistics used for reporting based on a grouping.
It also writes gdx files for groupstats() and samplr(), and includes a faster version of gdxrrw:wgdx.reshape() called gdxreshape()
Depends: R (>= 4.2.1) Depends: R (>= 4.2.1)
Imports: tidyverse, gdxrrw Imports:
tidyverse,
gdxrrw,
magrittr
License: GPL (>= 3) License: GPL (>= 3)
Encoding: UTF-8 Encoding: UTF-8
LazyData: false LazyData: true
URL: https://www.wur.nl/ URL: https://www.wur.nl/
RoxygenNote: 7.2.2 RoxygenNote: 7.2.3
Suggests: Suggests:
stringr (>= 1.4.1) stringr (>= 1.4.1)
File deleted
File added
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
export("%>%")
export(Modes) export(Modes)
export(abs_diff)
export(fadn2fd)
export(fd_desc)
export(first_match_col)
export(gdxbinwider) export(gdxbinwider)
export(gdxload) export(gdxload)
export(gdxreshape) export(gdxreshape)
export(groupstats) export(groupstats)
export(is.nan.data.frame)
export(load_dump_marg)
export(load_dump_par)
export(load_dump_scalar)
export(load_dumps)
export(replace_first_match_col)
export(rgdx.var)
export(rm_lown)
export(runFarmDynfromBatch) export(runFarmDynfromBatch)
export(scen_analysis)
export(scen_diff)
export(str_firstLine_replace) export(str_firstLine_replace)
export(str_lastLine_replace) export(str_lastLine_replace)
export(str_line_replace) export(str_line_replace)
export(updateFarmData) export(updateFarmData)
export(vars_dump)
export(writeBatch) export(writeBatch)
importFrom(magrittr,"%>%")
# FarmDynR 0.2.0 # FarmDynR 0.5.0
* Additional capacities to work with FADN data
* Improved code
* Improved documentation
* Added useful functions to work with FarmDyn results
* Added a `NEWS.md` file to track changes to the package.
* Added `str_line_replace()` and `writeBatch()` functions
...@@ -73,10 +73,10 @@ fd2guicolnames <- c("all_binid", "item1", "item2", "value") ...@@ -73,10 +73,10 @@ fd2guicolnames <- c("all_binid", "item1", "item2", "value")
fdnlcolnames <- c( "year", "all_binid","item1", "value") fdnlcolnames <- c( "year", "all_binid","item1", "value")
fd2gui <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE, fd2gui <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE,
squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
fdnl <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE, fdnl <- (gdxrrw::rgdx.param(file.path(BINDir, filename), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE,
squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE))
#TODO Make gdxmapping compatible with multiple mappings, with lapply? for loop? #TODO Make gdxmapping compatible with multiple mappings, with lapply? for loop?
# first idea: gdxmapping[] <- lapply(gdxmapping, rgdx.set(...)); gdxmapping <- list(), for i in gdxmap... # first idea: gdxmapping[] <- lapply(gdxmapping, rgdx.set(...)); gdxmapping <- list(), for i in gdxmap...
...@@ -334,6 +334,7 @@ gdxreshape <- function (inDF, symDim, symName=NULL, tName="time", ...@@ -334,6 +334,7 @@ gdxreshape <- function (inDF, symDim, symName=NULL, tName="time",
#' } #' }
#' @export groupstats #' @export groupstats
#FIXME fix the example data!! Not working #FIXME fix the example data!! Not working
#TODO Include options for different types of aggregation based on different years and if only farms that are available every year should be selected.
groupstats <- function(filename, BINDir, gdxmap, mapping, cols, w, writegdx = TRUE, filtern = FALSE) { groupstats <- function(filename, BINDir, gdxmap, mapping, cols, w, writegdx = TRUE, filtern = FALSE) {
if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) { if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) {
...@@ -422,7 +423,7 @@ updateFarmData <- function(filename, BINDir, gdxmap, mapping, writegdx = TRUE, ...@@ -422,7 +423,7 @@ updateFarmData <- function(filename, BINDir, gdxmap, mapping, writegdx = TRUE,
if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) { if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) {
BINDir <- get("BINDir", envir = .GlobalEnv) BINDir <- get("BINDir", envir = .GlobalEnv)
} else { } else {
BINDIR BINDir
} }
map2gui <- gdxbinwider(filename, BINDir, gdxmap, mapping) map2gui <- gdxbinwider(filename, BINDir, gdxmap, mapping)
...@@ -430,7 +431,7 @@ updateFarmData <- function(filename, BINDir, gdxmap, mapping, writegdx = TRUE, ...@@ -430,7 +431,7 @@ updateFarmData <- function(filename, BINDir, gdxmap, mapping, writegdx = TRUE,
if (cptcoeffs == TRUE) { if (cptcoeffs == TRUE) {
# TODO: Could be done more programatically # TODO: Could be done more programatically
farmchars <- gdxload(farmchars, 'p_farmCharBIN', symbol = 'param', symName= "p_farmCharBIN", names=c('all_binid', 'year', 'char', 'value'), compress=FALSE, ts=FALSE, farmchars <- gdxload(farmchars, 'p_farmCharBIN', symbol = 'param', symName= "p_farmCharBIN", names=c('all_binid', 'year', 'char', 'value'), compress=FALSE, ts=FALSE,
squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE) squeeze=FALSE, useDomInfo = TRUE, check.names = TRUE)
cptcoeffs <- read_xlsx(cptcoeffsxl,sheet = 'dat') cptcoeffs <- read_xlsx(cptcoeffsxl,sheet = 'dat')
...@@ -707,3 +708,409 @@ gdxload <- function(filename, symbol=c('set', 'param', 'scalar'), symName, names ...@@ -707,3 +708,409 @@ gdxload <- function(filename, symbol=c('set', 'param', 'scalar'), symName, names
return(symb) return(symb)
} }
## rgdx.var ----
#' Improved function to load variables from gdx files (from Renger in GAMSWorld forum)
#' @param varname Name of the variable to load
#' @param scen_name Name of the scenarios to load
#' @param res_fold Path to the folder where the gdx files are
#' @return A data.table with the variable
#' @export rgdx.var
#' @references https://forum.gamsworld.org/viewtopic.php?t=9966
rgdx.var <- function(varname, scen_name, res_fold) {
varname <- rgdx(
paste(
res_fold, scen_name,
sep = "/"
),
requestList = list(name = varname)
)
var.data <- data.frame(varname$val)
var.dim <- length(varname$uels)
domains <- varname$domains
for (j in (1:(var.dim))) {
if (domains[j] == "*") {
domains[j] <- paste("X", j, sep = "")
}
}
for (i in 1:var.dim) {
dim <- data.frame(varname$uels[[i]])
dim$id <- seq_along(dim[, 1])
index <- varname$domains[i]
colnames(dim) <- c(index, "id")
var.colname <- paste("X", i, sep = "")
var.data <- merge(dim, var.data, by.x = "id", by.y = var.colname)
var.data <- var.data[, -which(colnames(var.data) == "id")]
}
var.data <- var.data[, c(var.dim:1, var.dim + 1)]
colnames(var.data)[var.dim + 1] <- c("value")
colnames(var.data)[var.dim] <- "field"
attributes(var.data)$domains <- varname$domains
attributes(var.data)$type <- "variable"
attributes(var.data)$symName <- varname$name
attributes(var.data)$description <- varname$description
return(var.data)
}
## vars_dump ----
#' Load variables from dump gdx files and return a data.table
#' @inheritParams rgdx.var
#' @return A data.table with the variable
#' @export vars_dump
vars_dump <- function(res_fold, varname, scen_name) {
files_in_res <- list.files(res_fold, pattern = "gdx", full.names = FALSE)
files <- lapply(scen_name, function(x) {
files_in_res[grepl(paste0("dump_", x, "_", "[[:alnum:]]{4}"), files_in_res)]
})
scen_call <- list()
var_call <- list()
for (y in seq_along(scen_name)) {
scen_call[[y]] <- lapply(files[[y]], function(x) {
rgdx.var(
varname = varname,
scen_name = x,
res_fold = res_fold
) %>%
select(-c("t")) %>%
mutate(
farmIds = gsub("^\\w+_", "", x) %>% gsub(".gdx", "", .)
)
})
# Add farm ids (4 last letters and numbers of the file name) to the data.table
var_call[[y]] <-
var_call[[y]] <- scen_call[[y]] %>% rbindlist()
var_call[[y]]$sims <- scen_name[y]
}
return(var_call)
}
## scen_analysis ----
#' Data analysis function for repeated tasks
#' This loads p_res from different scenarios
#' @inheritParams rgdx.var
#' @return A data.table with the parameters
#' @export scen_analysis
scen_analysis <- function(res_fold, scen_name) {
# Load the sims for all the files in files_in_resNUTS2 using rgdx.param and load p_res
# Then, select the columns "farmIds", "scen", "resItem1", "resItem2", "value" from the data.table
scen_call <- list()
for (i in seq_along(scen_name)) {
scen_call[[i]] <- rgdx.param(
paste(
res_fold, paste0("res_", scen_name[i], "_until_2010.gdx"),
sep = "/"
), "p_res",
names = c(
"sims", "farmIds", "resItem1", "resItem2", "resItem3", "time", "value"
), compress = FALSE, ts = FALSE,
squeeze = FALSE, useDomInfo = TRUE, check.names = TRUE
)
scen_call[[i]]$sims <- scen_name[i]
}
scen_call <- rbindlist(scen_call)
scen_call <- scen_call[scen_call$time == "mean", ]
return(scen_call)
}
## scen_diff ----
#' Calculates the relative difference of the reference and the scenarios in a new column
#' @param data A dataframe
#' @param vars_to_diff A vector with the names of the variables to calculate the difference
#' @return A dataframe with the new columns
#' @export scen_diff
# Function for diffs
scen_diff <- function(data, vars_to_diff) {
for (i in seq_along(vars_to_diff)) {
data <- data %>%
dplyr::group_by(farmIds) %>%
dplyr::mutate(
!!paste0(vars_to_diff[i], "_diff") := (get(vars_to_diff[i]) - first(get(vars_to_diff[i]))) / first(get(vars_to_diff[i]))
)
}
return(data)
}
## abs_diff ----
#' Calculates the absolute difference of the reference and the scenarios in a new column
#' @inheritParams scen_diff
#' @return A dataframe with the new columns
#' @export abs_diff
abs_diff <- function(data, vars_to_diff) {
for (i in seq_along(vars_to_diff)) {
data <- data %>%
dplyr::group_by(farmIds) %>%
dplyr::mutate(
!!paste0(vars_to_diff[i], "_absolute_diff") := (get(vars_to_diff[i]) - first(get(vars_to_diff[i])))
)
}
return(data)
}
# Function in case I need to use the dumps instead of res
#' `load_dumps()` loads the p_res from the dump gdx files
#' @inheritParams scen_analysis
#' @return A data.table with the parameters
#' @export load_dumps
load_dumps <- function(res_fold, scen_name) {
files_in_res <- list.files(res_fold, pattern = "gdx", full.names = FALSE)
files <- lapply(scen_name, function(x) {
files_in_res[grepl(paste0("dump_", x, "_", "[[:alnum:]]{4}"), files_in_res)]
})
scen_call <- list()
for (i in seq_along(scen_name)) {
scen_call[[i]] <- lapply(files[[i]], function(x) {
rgdx.param(
paste(
res_fold, x,
sep = "/"
), "p_res",
names = c(
"sims", "farmIds", "resItem1", "resItem2", "resItem3", "time", "value"
), compress = FALSE, ts = FALSE,
squeeze = FALSE, useDomInfo = TRUE, check.names = TRUE
)
})
scen_call[[i]] <- rbindlist(scen_call[[i]])
scen_call[[i]]$sims <- scen_name[i]
}
return(scen_call)
}
# Function to load any dump parameter
## load_dump_par ----
#' `load_dump_par()` loads any parameter from the dump gdx files
#' @inheritParams scen_analysis
#' @param param Name of the parameter to load
#' @param names Names of the columns to load
#' @return A data.table with the parameters
#' @export load_dump_par
load_dump_par <- function(res_fold, scen_name, param, names = NULL) {
files_in_res <- list.files(res_fold, pattern = "gdx", full.names = FALSE)
files <- lapply(scen_name, function(x) {
files_in_res[grepl(paste0("dump_", x, "_", "[[:alnum:]]{4}"), files_in_res)]
})
scen_call <- list()
for (i in seq_along(scen_name)) {
scen_call[[i]] <- lapply(files[[i]], function(x) {
rgdx.param(
paste(
res_fold, x,
sep = "/"
), param,
names = names, compress = FALSE, ts = FALSE,
squeeze = FALSE, useDomInfo = TRUE, check.names = TRUE
) %>%
mutate(
farmIds = gsub("^\\w+_", "", x) %>% gsub(".gdx", "", .)
)
})
scen_call[[i]] <- rbindlist(scen_call[[i]])
scen_call[[i]]$sims <- scen_name[i]
}
return(scen_call)
}
# Function for other things that rgdx is too stubborn to take out of the dumps
## load_dump_scalar ----
#' `load_dump_scalar()` loads any scalar from the dump gdx files
#' @inheritParams scen_analysis
#' @param scalar Name of the scalar to load
#' @return A data.table with the scalar
#' @export load_dump_scalar
#' @examples
#' load_dump_scalar(res_fold = res_fold, scen_name = scen_name, scalar = "p_Nmin")
load_dump_scalar <- function(res_fold, scen_name, scalar) {
files_in_res <- list.files(res_fold, pattern = "gdx", full.names = FALSE)
files <- lapply(scen_name, function(x) {
files_in_res[grepl(paste0("dump_", x, "_", "[[:alnum:]]{4}"), files_in_res)]
})
scen_call <- list()
for (i in seq_along(scen_name)) {
scen_call[[i]] <- lapply(files[[i]], function(x) {
rgdx(
paste(
res_fold, x,
sep = "/"
), list(name = scalar)
)$val
})
# scen_call[[i]] <- rbindlist(t(scen_call[[i]]))
names(scen_call[[i]]) <- gsub("^\\w+_", "", files[[i]]) %>% gsub(".gdx", "", .)
scen_call[[i]]$sims <- scen_name[i]
# scen_call[[i]]$farmIds <- gsub("^\\w+_", "", files[[i]]) %>% gsub(".gdx", "", .)
}
scen_call <- t(scen_call)
scen_call <- rbindlist(scen_call)
scen_call <- pivot_longer(scen_call, cols = c(-sims), names_to = "farmIds", values_to = scalar)
return(scen_call)
}
# Function for other things that rgdx is too stubborn to take out of the dumps
## load_dump_scalar ----
#' `load_dump_marg()` loads any marginal from the dump gdx files
#' @inheritParams scen_analysis
#' @param marginal Name of the marginal to load
#' @return A data.table with the marginal
#' @export load_dump_marg
load_dump_marg <- function(res_fold, scen_name, marginal) {
files_in_res <- list.files(res_fold, pattern = "gdx", full.names = FALSE)
files <- lapply(scen_name, function(x) {
files_in_res[grepl(paste0("dump_", x, "_", "[[:alnum:]]{4}"), files_in_res)]
})
scen_call <- list()
for (i in seq_along(scen_name)) {
scen_call[[i]] <- lapply(files[[i]], function(x) {
rgdx(
paste(
res_fold, x,
sep = "/"
), list(name = marginal, field = "m")
)$val[, 4]
})
# scen_call[[i]] <- rbindlist(t(scen_call[[i]]))
names(scen_call[[i]]) <- gsub("^\\w+_", "", files[[i]]) %>% gsub(".gdx", "", .)
scen_call[[i]]$sims <- scen_name[i]
# scen_call[[i]]$farmIds <- gsub("^\\w+_", "", files[[i]]) %>% gsub(".gdx", "", .)
}
scen_call <- t(scen_call)
scen_call <- rbindlist(scen_call)
scen_call <- pivot_longer(scen_call, cols = c(-sims), names_to = "farmIds", values_to = marginal)
return(scen_call)
}
# Function for descriptive statistics WITH exclusion of farms with less than 15 farms
## fd_desc ----
#' `fd_desc()` calculates the descriptive statistics of the farm data
#' @param farm_data A dataframe with the p_farmData
#' @param type Type of farm data to analyse (Dairy or Arable farms)
#' @param csv Logical. If TRUE, it saves the results as a csv
#' @return A dataframe with the descriptive statistics
#' @export fd_desc
fd_desc <- function(farm_data, type = c("arable", "dairy"), csv = FALSE, dir = NULL) {
ifelse(!"NUTS0" %in% colnames(farm_data), farm_data$NUTS0 <- substr(farm_data$farmIds, 1, 2), farm_data$NUTS0 <- farm_data$NUTS0)
if (type == "arable") {
descstats <- farm_data %>%
group_by(NUTS0) %>%
summarise(
`Land [ha]` = weighted.mean(`global%nArabLand`, `misc%weights`, na.rm = TRUE),
SummerCere = weighted.mean(`misc%SummerCere`, `misc%weights`, na.rm = TRUE),
Winterbarley = weighted.mean(`misc%Winterbarley`, `misc%weights`, na.rm = TRUE),
WinterWheat = weighted.mean(`misc%WinterWheat`, `misc%weights`, na.rm = TRUE),
MaizCorn = weighted.mean(`misc%MaizCorn`, `misc%weights`, na.rm = TRUE),
MaizSil = weighted.mean(`misc%MaizSil`, `misc%weights`, na.rm = TRUE),
# `Annual Work Units` = median(`global%Aks`, `misc%weights`, na.rm = TRUE),
`Farm Net Value Added [EUR]` = weighted.mean(`misc%net cashflow`, `misc%weights`, na.rm = TRUE),
`Median FNVA [EUR]` = median(`misc%net cashflow`, na.rm = TRUE),
`Annual Work Units` = weighted.mean(`global%Aks`, `misc%weights`, na.rm = TRUE),
`FNVA per AWU` = `Farm Net Value Added [EUR]` / `Annual Work Units`,
N_use = weighted.mean(`misc%N_use`, `misc%weights`, na.rm = TRUE),
n = sum(`misc%nFarms`)
)
descstats <- descstats %>%
mutate(
`Share of Cereals [%]` = ((SummerCere + Winterbarley + WinterWheat) / `Land [ha]`) * 100,
`kg N per ha` = N_use * 1000 / `Land [ha]`
) %>%
select(
NUTS0, `Land [ha]`, `Share of Cereals [%]`,
`Annual Work Units`, `kg N per ha`, `Farm Net Value Added [EUR]`, n,
`Median FNVA [EUR]`, `FNVA per AWU`
) %>%
filter(n >= 15)
}
if (type == "dairy") {
descstats <- farm_data %>%
group_by(NUTS0) %>%
summarise(
nCows_mean = weighted.mean(`global%nCows`, `misc%weights`, na.rm = TRUE),
nArabLand_mean = weighted.mean(`global%nArabLand`, `misc%weights`, na.rm = TRUE),
nGrasLand_mean = weighted.mean(`global%nGrasLand`, `misc%weights`, na.rm = TRUE),
milkYield_mean = weighted.mean(`global%milkYield`, `misc%weights`, na.rm = TRUE),
`Share of Grassland [%]` = weighted.mean(`global%ShareGrassLand`, `misc%weights`, na.rm = TRUE) * 100,
# `Annual Work Units` = median(`global%Aks`, `misc%weights`, na.rm = TRUE),
`Farm Net Value Added [EUR]` = weighted.mean(`misc%net_value_added`, `misc%weights`, na.rm = TRUE),
`Median FNVA [EUR]` = median(`misc%net_value_added`, na.rm = TRUE),
`Annual Work Units` = weighted.mean(`global%Aks`, `misc%weights`, na.rm = TRUE),
`FNVA per AWU` = `Farm Net Value Added [EUR]` / `Annual Work Units`,
`Livestock density` = nCows_mean / (nArabLand_mean + nGrasLand_mean),
n = sum(`misc%nFarms`)
) %>%
filter(n >= 15)
}
if (csv == TRUE) {
descstats %>% write.csv(file.path(dir, paste0(type, "_descstats.csv")), row.names = FALSE)
}
return(descstats)
}
# Is nan function
## is.nan.data.frame ----
#' `is.nan.data.frame()` checks if there are any NaNs in a dataframe (`is.nan()` does not work for dfs)
#' @param x A dataframe
#' @return A dataframe with TRUE or FALSE for each column
#' @export is.nan.data.frame
is.nan.data.frame <- function(x) do.call(cbind, lapply(x, is.nan))
# Remove aggregated farms with less than 15 farms for reporting
## remove_aggregated_farms ----
#' `rm_lown()` removes aggregated farms with less than 15 farms for reporting
#' @param data A dataframe with the data to plot
#' @param farm_data A dataframe with the farm data
#' @return A dataframe with the data without the aggregated farms
#' @export rm_lown
rm_lown <- function(data, farm_data) {
data <- data[data$farmIds %in% farm_data[!farm_data$`misc%nFarms` < 15, ]$farmIds, ]
return(data)
}
# Find the first matching column
## first_match_col ----
#' `first_match_col()` finds the first matching column in a dataframe
#' @param x A dataframe
#' @param pattern A pattern to match
#' @param how How to match the pattern (all or any)
#' @return The name of the first matching column
#' @export first_match_col
#' @examples
#' data <- data.frame(a = c("a", "b", "c"), b = c("a", " ", "c"), c = c("a", "b", "1"))
#' first_match_col(data, "\\D", "all")
#' first_match_col(data, "\\d", "any")
first_match_col <- function(x, pattern, how = c("all", "any")) {
found <- NULL
for (i in seq_along(x)) {
if (how == "all") {
if (all(grepl(x[[i]], pattern = pattern))) {
found <- colnames(x)[i]
break
}
} else if (how == "any") {
if (any(grepl(x[[i]], pattern = pattern))) {
found <- colnames(x)[i]
break
}
}
if (i == length(x) && length(found) == 0) {
rlang::abort("No matching columns found")
}
}
return(found)
}
# Make a function that replaces the name of the column in first_match_col with what the user inputs
## replace_first_match_col ----
#' `replace_first_match_col()` replaces the name of the column in `first_match_col()` with what the user inputs
#' @inheritParams first_match_col
#' @param replace_with The name to replace the column name with
#' @return dataframe with the replaced column name
#' @export replace_first_match_col
#' @examples
#' data <- data.frame(a = c("a", "b", "c"), b = c("a", " ", "c"), c = c("a", "b", "1"))
#' replace_first_match_col(data, "\\D", "all", "new")
#' replace_first_match_col(data, "\\d", "any", "new")
replace_first_match_col <- function(x, pattern, how = c("all", "any"), replace_with) {
first_match_col(x, pattern, how) -> found
colnames(x)[colnames(x) == found] <- replace_with
return(x)
}
R/crop_prices.r 0 → 100644
# -------------------------------------
# Script: crop_prices.R
# Author: Hugo Scherer
# Purpose: Preparing crop prices for EU FarmDyn
# Version: 0.0.0
# Date: 09.10.2023
# Notes: Not in use yet
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------
# Crop prices calculation ----
crop_prices <- function(FADN2) {
sales_value_var <- unique(
sub(cropprod$item1, pattern = "PRQ", replacement = "SV")
)
crop_sales_quant <- pivot_longer(
cols = -"ID", crop_sales_quant,
names_to = "crop_new",
values_to = "sales_quant"
)
crop_sv <- pivot_longer(FADN2[colnames(FADN2) %in% c("ID", sales_value_var)],
cols = colnames(FADN2)[colnames(FADN2) %in% sales_value_var],
names_to = c("salesvalue"),
values_to = "SV"
)
sv_crop_new <- FADN2Dyn %>% select(salesvalue, crop_new)
crop_sv <- merge(crop_sv, sv_crop_new)
crop_sv <- crop_sv %>% pivot_wider(
id_cols = c("ID"),
names_from = "crop_new",
values_from = "SV",
values_fn = sum
)
crop_sv <- crop_sv %>%
pivot_longer(
cols = -"ID",
names_to = "crop_new",
values_to = "SV"
)
sv_prod <- left_join(crop_sv, crop_sales_quant, by = c("ID", "crop_new"))
# Divide sales value over sales_quant to get the price
sv_prod$price <- sv_prod$SV / sv_prod$sales_quant
# Remove observations with NaNs and infinite values
sv_prod <- sv_prod %>%
filter(!is.na(price) & !is.infinite(price) & !is.nan(price))
crop_prices <- pivot_wider(sv_prod,
id_cols = c("ID"),
names_from = "crop_new",
values_from = "price"
)
# Remove outliers by removing the top 10% of all observations
# for all columns and the bottom 10% of observations for all columns
crop_prices <- crop_prices %>%
mutate_if(is.numeric, ~ ifelse(. > quantile(., 0.9, na.rm = TRUE), NA, .)) %>%
mutate_if(is.numeric, ~ ifelse(. < quantile(., 0.1, na.rm = TRUE), NA, .))
# Replaces NaNs with NAs
crop_prices[is.nan(crop_prices)] <- NA
gdxreshape(crop_prices,
symDim = 2, order = c(1, 0),
gdxName = file.path(
"data", "temp", "prices", "cropprices", "cropPrices_FADNtest"
),
symName = "p_cropPrices"
)
id_nuts <- FADN2 %>%
select("ID",
"NUTS3", "NUTS2", "NUTS1", "NUTS0",
Weight = "SYS02"
)
crop_prices_nuts <- merge(crop_prices, id_nuts, by = "ID")
for (i in 0:3) {
assign(
paste0("crop_prices_nuts", i),
crop_prices_nuts %>%
dplyr::group_by(dplyr::across(paste0("NUTS", i))) %>%
dplyr::summarise(dplyr::across(
everything(),
~ if (is.numeric(.)) {
weighted.mean(.,
na.rm = TRUE, w = Weight
)
} else {
Modes(.)[1]
}
)) %>%
select(-"ID", -"Weight")
)
}
# Replace NAs in crop_prices_nuts0 with the mean of the column
crop_prices_nuts0[] <- lapply(crop_prices_nuts0, function(x) {
if (is.numeric(x)) {
x[is.na(x)] <- mean(x, na.rm = TRUE)
}
x
})
# Replace NAs in crop_prices_nutsX with values
# from crop_prices_nuts0 that are not NA and have the same NUTS0.
for (k in 0:3) {
for (i in seq_len(nrow(get(paste0("crop_prices_nuts", k))))) {
for (j in seq_along(get(paste0("crop_prices_nuts", k)))) {
if (is.na(get(paste0("crop_prices_nuts", k)))[i, j]) {
df <- get(paste0("crop_prices_nuts", k))
df[i, j] <- crop_prices_nuts0[crop_prices_nuts0$NUTS0 == get(paste0("crop_prices_nuts", k))$NUTS0[i], j]
assign(paste0("crop_prices_nuts", k), df)
}
}
}
}
# Remove superfluous columns
# (they are just the most commonly appearing NUTS code)
crop_prices_nuts3 <- crop_prices_nuts3 %>%
select(-"NUTS0", -"NUTS1", -"NUTS2")
crop_prices_nuts2 <- crop_prices_nuts2 %>%
select(-"NUTS0", -"NUTS1", -"NUTS3")
crop_prices_nuts1 <- crop_prices_nuts1 %>%
select(-"NUTS0", -"NUTS2", -"NUTS3")
crop_prices_nuts0 <- crop_prices_nuts0 %>%
select(-"NUTS2", -"NUTS1", -"NUTS3")
# Saving crop prices to GDX
lapply(paste0("crop_prices_nuts", 0:3), function(x) {
gdxreshape(get(x),
symDim = 2, order = c(1, 0),
gdxName = file.path(
"data", "temp", "prices", "cropprices", paste0("crop_prices_", x)
),
symName = "p_crop_prices",
)
})
}
...@@ -4,14 +4,10 @@ ...@@ -4,14 +4,10 @@
#' #'
#' @docType data #' @docType data
#' #'
#' @usage data(samplefd2gui); data(samplefdnl); data(samplegdxmapping)
#' #'
#' @format Objects of class `tbl_df` #' @format Objects of class `tbl_df`
#' #'
#' @keywords datasets, FarmDynR #' @keywords datasets, FarmDynR
#' #'
#' @examples #' @examples
#' data(fd2gui) #'
#' data(fdnl)
#' data(gdxmapping)
#'samplefd2gui'
R/import_fadn.R 0 → 100644
# -------------------------------------
# Script: import_fadn.R
# Author: Hugo Scherer
# Purpose: Preparing FADN data for use in FarmDyn
# Version: 2.0.0
# Date: 23.01.2023
# Notes:
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------
# Source import_fadn.R
## fadn2fd ----
#' `fadn2fd()` imports the FADN data from the csv files
#' @param fadn_data FADN
#' @param mapping A vector with the names of the columns to be mapped
#' @param farmbranch Either arable or dairy
#' @param save_gdx Logical. If TRUE, it saves to gdx files
#' @return A list of dataframes with the FADN data
#' @export fadn2fd
#'
fadn2fd <- function(fadn_data, mapping, farmbranch = c("arable", "dairy"), save_gdx = FALSE) {
FADN$ID <- factor(FADN$ID)
FADN2 <- FADN
if (farmbranch != "arable") {
# Dairy conditions ----
FADN <- FADN[FADN$TF14 == 45 & FADN$SE086 >= 5, ]
# Remove observations with the lowest 10% of milk yields
# and the highest 1% of milk yields
FADN <- FADN[order(FADN$SE126), ] %>%
dplyr::slice(-c(
1:round(nrow(FADN) * 0.1),
(nrow(FADN) - round(nrow(FADN) * 0.01)):nrow(FADN)
))
} else if (farmbranch == "arable") {
# Arable conditions ----
FADN <- FADN[FADN$TF14 %in% c(15, 16, 20), ]
# Subset the data to those whose arable land is 0.98
# the sum of the area of the crops
narbl2 <- FADN[, c("ID", FADN2Dyn$`AA(ha)`), with = FALSE]
# Get the total arable area
narbl2[,
`global%nArabLand` := rowSums(.SD, na.rm = TRUE),
by = ID, .SDcols = FADN2Dyn$`AA(ha)`
]
# Select only ID and global%nArabLand
narbl2 <- narbl2[, c("ID", "global%nArabLand")]
narbl_uaa <- merge(narbl2, FADN[, c("ID", "SE025")], by = "ID")
narbl_uaa$ratio <- narbl_uaa$`global%nArabLand` / narbl_uaa$SE025
gr_cult_ids <- narbl_uaa[narbl_uaa$ratio >= 0.8, ]$ID
narbl2 <- narbl2[narbl2$ID %in% gr_cult_ids, ]
FADN <- FADN[FADN$ID %in% gr_cult_ids, ]
} else {
rlang::abort("Please select a valid option for farmbranch")
}
yield_gen(FADN, FADN2, reporting = FALSE)
message("Preparing FADN data")
new_fadn <- FADN[
,
.(
FADN[,
c("ID", "COUNTRY", "TF14", "NUTS0", "NUTS1", "NUTS2", "NUTS3", "SYS02"),
with = FALSE
],
`global%Aks` = SE010,
`misc%milkPrice` = PMLKCOW_SV * 0.1 / PMLKCOW_SQ,
`global%milkYield` = SE126 / 100,
`global%farmbranchDairy` = ifelse(TF14 == 45, -1, -2),
`global%farmbranchMotherCows` = -2,
`global%farmbranchBeef` = ifelse(TF14 == 49, -1, -2),
`global%farmbranchSows` = -2,
`global%farmbranchBiogas` = -2,
`global%farmbranchFattners` = ifelse(TF14 == 51, -1, -2),
`misc%farmincome` = SE410,
`misc%net_value_added` = SE415,
`global%nCows` = SE086,
`global%nCalves` = LBOV1_AN,
`global%nFattners` = LPIGFAT_AN,
`global%nHeifs` = LHEIFBRE_AN,
`global%nSows` = LSOWBRE_AN,
`global%nGrasLand` = SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A,
`global%ShareGrassLand` = (SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A) / SE025,
`misc%UAA` = SE025,
`misc%Winterbarley` = CBRL_A,
`misc%WinterWheat` = CWHTC_A,
`misc%Potatoes` = CPOT_A,
`misc%SugarBeet` = CSUGBT_A,
`misc%WinterRape` = CRAPE_A,
`misc%SummerCere` = CCEROTH_A + COAT_A + CRYE_A,
`misc%MaizSil` = CFODMZ_A,
`misc%MaizCorn` = CMZ_A,
`misc%summerBeans` = CPEA_A,
`misc%totaloutput` = SE131,
`misc%subsidies` = SE605, # excluding investment subsidides
`misc%farm_net_income` = SE420,
`misc%own_assets` = SE436,
`misc%depreciation` = SE360,
`misc%depreciationpercow` = SE360 / SE086,
`misc%N_use` = INUSE_Q,
`misc%total_specific_costs` = SE281,
`misc%total_feed_costs` = SE310 + SE320,
`misc%contractors` = SE350,
`misc%wagespaid` = SE375,
`misc%cropprotection` = SE300,
`misc%rent` = SE375,
`misc%fertiliser` = SE295,
`misc%seedsandplants` = SE285,
`misc%othercosts` = IOVHSOTH_V,
`misc%paidinterest` = SE380,
`misc%purchasedconc` = IGRFEDCNCTRPUR_V,
`misc%machineryequipment` = SE455,
`misc%buildings` = SE450,
`misc%OrganicCode` = ORGANIC,
FADN[,
colnames(FADN)[grepl(colnames(FADN), pattern = "(^(SE).{3,4})$")],
with = FALSE
]
)
]
# Add `misc%` to Standard Results
colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")] <- paste0("misc.", colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")])
# I use % as a separator, _ is used for some variables
# (meaning that if I separate with _,
# these var names would be split in two or more),
# so I can't use that one, but % causes troubles sometimes
# and somehow R/DT does not like it, dplyr does though
colnames(new_fadn) <- gsub("\\.", "%", colnames(new_fadn))
new_fadn[is.nan(new_fadn) | new_fadn == Inf | is.na(new_fadn)] <- 0
nuts3_yields <- FADNcrops_NUTS3 %>%
select(-c(NUTS0, NUTS1, NUTS2))
nuts0_yields <- FADNcrops_NUTS0 %>%
select(-c(NUTS1, NUTS2, NUTS3))
third_fadn <- base::Reduce(
function(...) merge(..., by = "NUTS0", all.x = TRUE),
list(new_fadn, nuts0_yields, defaultvalsNUTS0)
)
third_fadn[is.na(third_fadn)] <- 0
# Better with dplyr, reshaping data
# (no use of sep in DT, longer and more tedious)
# alternatively: reshape
third_fadn <- tibble(third_fadn)
test_dyn <- third_fadn %>%
pivot_longer(
cols = -c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02"
),
names_sep = "%",
names_to = c("item1", "item2")
) %>%
pivot_wider(id_cols = c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02", "item1"
), names_from = "item2", values_from = "value", values_fn = sum)
test_dyn[is.na(test_dyn)] <- 0
map2gui <- test_dyn %>%
pivot_longer(
cols = 10:length(colnames(test_dyn)),
names_to = "item2",
values_to = "value"
) %>%
pivot_wider(
id_cols = 1:8,
names_from = c("item1", "item2"),
values_from = "value",
names_sep = "%"
)
# Joining all data together
map2gui[] <- lapply(map2gui, function(x) if (is.factor(x)) as.factor(x) else x)
map2gui[is.na(map2gui)] <- 0
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)] <- lapply(
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)],
as.character
)
# For each mapping, group by the mapping and summarise
# With this configuration, you could do a list of different mappings
# Create a list to store the results
map2gui_list <- vector("list", length = length(mapping))
message("Aggregating to ", length(mapping), " different mappings")
for (map in seq_along(mapping)) {
map2gui_list[[map]] <- map2gui %>%
dplyr::group_by(
across(mapping[[map]])
) %>%
select(-c(ID, COUNTRY, TF14, NUTS0, NUTS1, NUTS2, NUTS3)) %>%
summarise(
dplyr::across(
c(everything(), -SYS02),
~ if (is.numeric(.)) {
weighted.mean(., na.rm = TRUE, w = as.numeric(as.character(SYS02)))
} else {
Modes(.)[1]
}
),
`misc%nFarms` = n(),
# Weights are correct, tested
`misc%weights` = sum(as.numeric(as.character(SYS02)))
)
}
# Concatenate the items in the list of list mapping
mapping <- lapply(mapping, paste0, collapse = "_")
# Name the list elements
names(map2gui_list) <- paste("map2gui", mapping, sep = "_")
# Set the column name to farmIds
map2gui_list <- map2gui_list %>%
purrr::map2(
., seq_along(mapping), ~ dplyr::rename_with(
.x, ~"farmIds", all_of(.y)
)
)
# Add conditional variables to all NUTS regions
map2gui_list <- purrr::map(map2gui_list, ~ .x %>%
group_by(farmIds) %>%
mutate(
`global%nArabLand` = sum(across(paste0("misc%", german_crops))),
`global%farmbranchArable` = ifelse(`global%nArabLand` > 0, -1, -2)
))
# For all map2guiNUTSx, remove observations with n < 15
# for (i in 0:3) {
# assign(
# paste0("map2guiNUTS", i),
# assign(
# paste0("map2guiNUTS", i),
# get(paste0("map2guiNUTS", i))[get(paste0("map2guiNUTS", i))$`misc%nFarms` >= 15, ]
# )
# )
# }
# Condition to see if any of the rowSums of maxrot per farmIds is not equal to 1 and show farmId for which the condition is true
# might throw an NA there sometimes but it is not a true NA
# for (i in seq_along(map2gui_list)) {
# if (any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98) |
# any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) > 1.02)) {
# warning(
# paste(map2gui_list[[i]][
# rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98, ,
# ]$farmIds, "has a problem with maxrot (sum not equal to 1)\n")
# )
# }
# if (any(is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])))) {
# warning(
# paste(map2gui_list[[i]][
# is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])), ,
# ]$farmIds, "has a problem with maxrot (NaN), check if there are any crops")
# )
# }
# }
for (i in seq_along(map2gui_list)) {
setDT(map2gui_list[[i]])
map2gui_list[[i]][, paste0("maxrot%", german_crops) := lapply(.SD, function(x) x / `global%nArabLand`), .SDcols = paste0("misc%", german_crops), by = farmIds]
# Convert nonumericals into numerics
cols_to_convert <- colnames(map2gui_list[[i]])[colnames(map2gui_list[[i]]) %in% paste0("global%", nonnumericals)]
map2gui_list[[i]][, (cols_to_convert) := lapply(.SD, as.numeric), .SDcols = cols_to_convert]
cols_to_melt <- setdiff(colnames(map2gui_list[[i]]), "farmIds")
# Pivot from wide to long and then wide again for all map2guiNUTS
map2gui_list[[i]] <- melt(map2gui_list[[i]], id.vars = "farmIds", measure.vars = cols_to_melt, variable.name = "item", value.name = "value")
map2gui_list[[i]][, c("item1", "item2") := tstrsplit(item, split = "%", fixed = TRUE)]
map2gui_list[[i]] <- dcast(map2gui_list[[i]], farmIds + item1 ~ item2, value.var = "value")
# Replace NAs with 0
map2gui_list[[i]][is.na(map2gui_list[[i]])] <- 0
}
# Add to gdx p_farmdata
if (save_gdx == TRUE) {
for (map in seq_along(map2gui_list)) {
gdxreshape(
map2gui_list[[map]],
symDim = 3, order = c(1, 2, 0),
gdxName = file.path(
"data",
"farmdata", paste("farmData", farmbranch, mapping[map], sep = "_")
),
symName = "p_farmData"
)
}
} else {
return(map2gui_list)
}
}
R/utils-pipe.R 0 → 100644
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
#' @param lhs A value or the magrittr placeholder.
#' @param rhs A function call using the magrittr semantics.
#' @return The result of calling `rhs(lhs)`.
NULL
R/yields.R 0 → 100644
# -------------------------------------
# Script: yields.R
# Author: Hugo Scherer
# Purpose: Yield data preparation
# Version: 0.6.0
# Date: 06.10.2023
# Notes:
# Originally in arable_farmdata.R
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------
yields_gen <- function(FADN, FADN2, reporting = FALSE) {
message("Preparing yields data")
# FADN2Dyn filtering
FADN2Dyn <- FADN2Dyn[FADN2Dyn$crop_new %in% german_crops, ]
# Select only what's needed (FarmDyn variable and FADN variables)
# Subset FADN variables on area in ha of crops that map to FarmDyn crops
croparea <- tibble(FADN2)[colnames(tibble(FADN2)) %in% c(area, "ID") & !colnames(tibble(FADN2)) %in% dis_crops]
croparea2 <- FADN[, (intersect(c("ID", area), colnames(FADN))), with = FALSE]
# Group by ID and sum all crop area variables into one column
setDT(croparea)
croparea$croparea <- base::Reduce(`+`, croparea[, -80])
croparea <- croparea[!duplicated(croparea)]
croparea$ID <- factor(croparea$ID)
# Same as above but with production of crops in t
cropprod <- FADN2[, (intersect(c("ID", production), colnames(FADN))), with = FALSE]
# Same as above but with sales value of crops in EUR
cropsalesvalue <- FADN2[, intersect(c(sales_value, "ID"), colnames(FADN)), with = FALSE]
# Same as above but with sales quantity of crops in t
cropsalesquant <- FADN2[, intersect(c(sales_quant, "ID"), colnames(FADN)), with = FALSE]
crop_sales_quant <- FADN2[, intersect(c(FADN2Dyn$cropsalesquant, "ID"), colnames(FADN)), with = FALSE]
# Pivotting to long because if the operations are done in wide format it will produce duplicates that will be corrected with '.1' at the end (or another nr depending on how many times it is duplicated)
# with added benefits that the aprod/aarea are also in long format and that when pivotting to wide again a sum function can be added, so everything that belongs to the same FarmDyn crop category can be summed up
colnames(aprod)[2] <- "item1"
# Some columns are integers so it will throw a warning
cropprod <- data.table::melt(
cropprod,
id.vars = "ID", variable.name = "item1", value.name = "prod"
)
cropprod <- merge(cropprod, aprod)
colnames(aarea)[2] <- "item1"
croparea <- melt(
croparea,
id.vars = "ID", variable.name = "item1", value.name = "area"
)
croparea <- merge(croparea, aarea)
areacrop <- dcast(
croparea, ID ~ crop_new,
value.var = "area", fun.aggregate = sum
)
# operation above in data.table
areacrop <- areacrop[, (setdiff(colnames(areacrop), "ID")) := lapply(.SD, function(x) ifelse(x < 1, 0, x)), .SDcols = (setdiff(colnames(areacrop), "ID"))]
# Croparea2 is used for the calculation of the area of the
# selected farms, croparea instead is used for
# the calculation of yields because it contains ALL obs
croparea2 <- melt(
croparea2,
id.vars = "ID", variable.name = "item1", value.name = "area"
)
colnames(aarea)[2] <- "item1"
croparea2 <- merge(croparea2, aarea)
areacrop2 <- dcast(
croparea2, ID ~ crop_new,
value.var = "area", fun.aggregate = sum
)
# Removing those crops with less than 1 ha
setDT(areacrop2)
areacrop2 <- areacrop2[, setdiff(colnames(areacrop2), "ID") := lapply(.SD, function(x) ifelse(x < 1, 0, x)), .SDcols = setdiff(colnames(areacrop2), "ID")]
crop_sales_quant <- melt(
crop_sales_quant,
id.vars = "ID", variable.name = "item1", value.name = "sales_quant"
)
colnames(a_salesquant)[2] <- "item1"
crop_sales_quant <- as.data.table(merge(a_salesquant, crop_sales_quant))
crop_sales_quant <- dcast(
crop_sales_quant, ID ~ crop_new,
value.var = "sales_quant", fun.aggregate = sum
)
# Division of production over area to get yields (t) per ha
setDT(cropprod)
cropprod[, prod := sum(prod), by = list(ID, crop_new)]
croparea[, area := sum(area), by = list(ID, crop_new)]
cropprod <- merge(
cropprod, croparea[, c("ID", "crop_new", "area")],
by = c("ID", "crop_new")
)
cropprod[, yields := prod / area]
# Condititonal to check if FADNcrops exists
if (!file.exists(file.path("data", "temp", "yields", "FADNcrops_NUTS0.csv"))) {
FADNcrops <- cropprod[-c(1, 3)] %>%
pivot_wider(
id_cols = "ID",
names_from = "crop_new",
values_from = "yields",
values_fn = ~ sum(., na.rm = TRUE)
)
# Division by 0 problem,
# logic tells us that if you have 0 production of something
# over 0 hectares then that is going to be 0.
FADNcrops[FADNcrops == Inf] <- NA
names(FADNcrops) <- c("ID", paste0("yields%", names(FADNcrops[-1])))
# Add NUTS level and SYS02 to FADNcrops from FADN2
FADNcrops <- tibble(FADNcrops[, colSums(is.na(FADNcrops)) < nrow(FADNcrops)])
FADNcrops <- merge(FADNcrops, FADN2[, c("ID", "NUTS0", "NUTS1", "NUTS2", "SYS02")])
FADNcrops[FADNcrops == 0] <- NA
# Summarize by NUTS level using weighted mean and weight SYS02
for (i in 0:3) {
assign(
paste0("FADNcrops_NUTS", i),
FADNcrops %>%
dplyr::group_by(
dplyr::across(
colnames(FADNcrops)[colnames(FADNcrops) %in% c(paste0("NUTS", i))]
)
) %>%
select(-c(ID)) %>%
dplyr::summarise(dplyr::across(
tidyr::everything(),
~ if (is.numeric(.)) {
weighted.mean(.,
na.rm = TRUE, w = as.numeric(as.character(SYS02))
)
} else {
Modes(.)[1]
}
), n = dplyr::n())
)
}
# Remove the n, SYS columns
for (i in 0:3) {
df <- get(paste0("FADNcrops_NUTS", i))
df <- df[, -which(colnames(df) %in% c("n", "SYS02"))]
assign(paste0("FADNcrops_NUTS", i), df)
}
# Impute with mean of column for NUTS0
FADNcrops_NUTS0 <- FADNcrops_NUTS0 %>%
dplyr::mutate(
dplyr::across(dplyr::where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))
)
# Impute with mean of column for each nuts level
FADNcrops_NUTS1 <- dplyr::rows_patch(
FADNcrops_NUTS1, FADNcrops_NUTS0,
by = "NUTS0"
)
FADNcrops_NUTS2 <- dplyr::rows_patch(
FADNcrops_NUTS2, FADNcrops_NUTS1,
by = "NUTS1"
)
# Write to csv
for (i in 0:3) {
readr::write_csv(
get(paste0("FADNcrops_NUTS", i)),
file.path("data", "temp", "yields", paste0("FADNcrops_NUTS", i, ".csv"))
)
}
} else {
FADNcrops_NUTS0 <- readr::read_csv(
file.path("data", "temp", "yields", "FADNcrops_NUTS0.csv"),
col_names = TRUE,
show_col_types = FALSE
)
FADNcrops_NUTS1 <- readr::read_csv(
file.path("data", "temp", "yields", "FADNcrops_NUTS1.csv"),
col_names = TRUE,
show_col_types = FALSE
)
FADNcrops_NUTS2 <- readr::read_csv(
file.path("data", "temp", "yields", "FADNcrops_NUTS2.csv"),
col_names = TRUE,
show_col_types = FALSE
)
}
# Create descriptive statistics of yields for NUTS0
# Remove the "yields%" from the column names
if (reporting == TRUE) {
crops_rep <- FADNcrops_NUTS0 %>%
left_join(defaultvalsNUTS0, by = "NUTS0")
colnames(crops_rep) <- gsub("yields%", "", colnames(crops_rep))
crops_rep <- crops_rep %>%
select(
"NUTS0", "MaizCorn", "MaizSil", "SummerCere", "summerBeans",
"Winterbarley", "SugarBeet", "WinterRape", "WinterWheat", "Potatoes",
"gra1", "gra2", "gra3", "gra4"
)
}
write.csv(crops_rep, file.path("temp", "crops_rep.csv"), row.names = FALSE)
}
...@@ -20,12 +20,15 @@ Developed by Hugo Scherer and Marc Müller at Wageningen Economic Research. ...@@ -20,12 +20,15 @@ Developed by Hugo Scherer and Marc Müller at Wageningen Economic Research.
<!-- badges: start --> <!-- badges: start -->
<!-- badges: end --> <!-- badges: end -->
The goal of FarmDynR is to give the user the ability to aggregate FADN Data from GDX files to create representative farms for any grouping, generate descriptive statistics, and to run FarmDyn from R. The goal of FarmDynR is to give the user the ability to aggregate FADN data to create representative farms for any grouping available, generate descriptive statistics, and to run FarmDyn from R.
## Requirements ## Requirements
This package requires that you have GAMS and FarmDyn installed. Additionally, the package imports from: This package requires that you have GAMS and FarmDyn installed. Additionally, the package imports from:
- gdxrrw (remember to load GAMS with `igdx()` with the path to your version of GAMS) - gdxrrw (remember to load GAMS with `igdx()` with the path to your version of GAMS)
- tidyverse - readr
- dplyr
- tidyr
- readxl
It also suggests: It also suggests:
- stringr - stringr
...@@ -39,20 +42,36 @@ You can install the development version of FarmDynR like so: ...@@ -39,20 +42,36 @@ You can install the development version of FarmDynR like so:
``` ```
## Workflow ## Workflow
1. Create GDX files with the mappings, global settings per farm, farm data, and weights The workflow for this package is as follows:
2. Use `updateFarmData()` to create sample farms based on your selected mapping 1. Read the FADN data into R
2. Run `fadn2fd()` with the FADN data, farmbranch desired, the mapping and the option to save GDX files based on the mapping
- Yields will be calculated and the FADN data will be prepared
- Outliers are removed
3. Write batch file with `writeBatch()` and run FarmDyn with `runFarmDynfromBatch()` using the created batch file 3. Write batch file with `writeBatch()` and run FarmDyn with `runFarmDynfromBatch()` using the created batch file
- Optional: Create descriptive statistics for reporting with `groupstats()` - Optional: Create descriptive statistics for reporting with `fd_desc()` with the farmbranch
## Example ## Example
This is a basic example which shows you how to solve a common problem:
```{r example, eval=FALSE, include=TRUE} ```{r example, eval=FALSE, include=TRUE}
library(FarmDynR) library(FarmDynR)
## basic example code
groupstats(filename = 'FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = gdxmapping, mapping = mapping, cols = c(a,b,c), w = Weight, writegdx = FALSE, filtern = FALSE) # Bad, won't work
groupstats(filename = 'FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = 'gdxmapping', mapping = 'mapping', cols = c('a','b','c'), w = 'Weight', writegdx = FALSE, filtern = FALSE) # Good, there can also be more than one mapping with c('mapping1', 'mapping2'...) # Read in FADN data
fadn <- read_fadn("path/to/fadn/data")
# Create mapping
mapping <- list(c("NUTS0", "misc%OrganicCode"), "NUTS0", "NUTS2")
# Create FarmDyn data
fd_data <- fadn2fd(fadn, "Dairy", mapping, save_gdx = FALSE)
# Write batch file
writeBatch(fd_data, "path/to/batch/file")
# Run FarmDyn
runFarmDynfromBatch("path/to/batch/file")
# Create descriptive statistics
fd_desc(fd_data)
``` ```
...@@ -10,14 +10,15 @@ Research. ...@@ -10,14 +10,15 @@ Research.
<!-- badges: end --> <!-- badges: end -->
The goal of FarmDynR is to give the user the ability to aggregate FADN The goal of FarmDynR is to give the user the ability to aggregate FADN
Data from GDX files to create representative farms for any grouping, data to create representative farms for any grouping available, generate
generate descriptive statistics, and to run FarmDyn from R. descriptive statistics, and to run FarmDyn from R.
## Requirements ## Requirements
This package requires that you have GAMS and FarmDyn installed. This package requires that you have GAMS and FarmDyn installed.
Additionally, the package imports from: - gdxrrw (remember to load GAMS Additionally, the package imports from: - gdxrrw (remember to load GAMS
with `igdx()` with the path to your version of GAMS) - tidyverse with `igdx()` with the path to your version of GAMS) - readr - dplyr -
tidyr - readxl
It also suggests: - stringr It also suggests: - stringr
...@@ -31,23 +32,35 @@ You can install the development version of FarmDynR like so: ...@@ -31,23 +32,35 @@ You can install the development version of FarmDynR like so:
## Workflow ## Workflow
1. Create GDX files with the mappings, global settings per farm, farm The workflow for this package is as follows: 1. Read the FADN data into
data, and weights R 2. Run `fadn2fd()` with the FADN data, farmbranch desired, the mapping
2. Use `updateFarmData()` to create sample farms based on your selected and the option to save GDX files based on the mapping - Yields will be
mapping calculated and the FADN data will be prepared - Outliers are removed 3.
3. Write batch file with `writeBatch()` and run FarmDyn with Write batch file with `writeBatch()` and run FarmDyn with
`runFarmDynfromBatch()` using the created batch file `runFarmDynfromBatch()` using the created batch file - Optional: Create
- Optional: Create descriptive statistics for reporting with descriptive statistics for reporting with `fd_desc()` with the
`groupstats()` farmbranch
## Example ## Example
This is a basic example which shows you how to solve a common problem:
``` r ``` r
library(FarmDynR) library(FarmDynR)
## basic example code
groupstats(filename = 'FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = gdxmapping, mapping = mapping, cols = c(a,b,c), w = Weight, writegdx = FALSE, filtern = FALSE) # Bad, won't work
groupstats(filename = 'FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = 'gdxmapping', mapping = 'mapping', cols = c('a','b','c'), w = 'Weight', writegdx = FALSE, filtern = FALSE) # Good, there can also be more than one mapping with c('mapping1', 'mapping2'...) # Read in FADN data
fadn <- read_fadn("path/to/fadn/data")
# Create mapping
mapping <- list(c("NUTS0", "misc%OrganicCode"), "NUTS0", "NUTS2")
# Create FarmDyn data
fd_data <- fadn2fd(fadn, "Dairy", mapping, save_gdx = FALSE)
# Write batch file
writeBatch(fd_data, "path/to/batch/file")
# Run FarmDyn
runFarmDynfromBatch("path/to/batch/file")
# Create descriptive statistics
fd_desc(fd_data)
``` ```
data/FADN2Dyn.rda 0 → 100644
File added
File added
data/aarea.rda 0 → 100644
File added
data/aprod.rda 0 → 100644
File added
data/area.rda 0 → 100644
File added
data/avsta.rda 0 → 100644
File added
File added