### Preamble ---- ## Library loading ---- library(gdxrrw) library(data.table) library(arsenal) library(tidyverse) ## Directory specification ---- GAMSDir = "C:/GAMS/40" # Change to your GAMS Version! #ADCDir = "C:/FARMDYNDATA/ADC" BINDir = "C:/FARMDYNDATA/BIN" #LMMDir = "W:\\WECR\\PIA\\PW\\DZK\\GHG2020\\output Jakob" MyLMMDir = "C:/FARMDYNDATA/LMM" MyBINDir = "C:/FARMDYNDATA/MYBIN" MyDRAMDir = "C:/FARMDYNDATA/DRAM" FDDir = "C:/FARMDYNTRUNK/DAT" WWLDir <- "C:/FARMDYNDATA/WWL" RDir = "C:/FARMDYNDATA/R" ### Filenames ---- farmdata2abn <- "farmdata2ABN.gdx" # For testing # GAMS loading igdx(GAMSDir) # Working directory setwd(RDir) ## Functions ---- Modes <- function(x) { # Function found on StackOverflow made by Ken Williams and expanded by digEmAll ux <- unique(x) tab <- tabulate(match(x, ux)) ux[tab == max(tab)] } fd2gui <- (gdxrrw::rgdx.param("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", "p_farmData2GUI", names=c("all_binid", 'items', "varias", "value"), compress=FALSE, ts=FALSE, squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) fdnl <- (gdxrrw::rgdx.param("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", "p_farmData_NL", names=c("all_binid", 'years', "items2", "value"), compress=FALSE, ts=FALSE, squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) gdxmapping <- (rgdx.set("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", 'map2binid',names = c('all_binid', 'mapping'), compress=FALSE, ts=FALSE, useDomInfo = TRUE, check.names = TRUE)) gdxbinwider('FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = 'map2binid', mapping = 'mapping') gdxbinwider <- function(filename, BINDir, gdxmap, mapping){ if ('BINDir' %in% ls(envir = .GlobalEnv) & missing(BINDir)) { BINDir <- get('BINDir', envir = .GlobalEnv) } else { BINDir } fd2guicolnames <- c("all_binid", "item1", "item2", "value") fdnlcolnames <- c("year", "all_binid", "item1", "value") fd2gui <- (rgdx.param(paste(BINDir,filename, sep="/"), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE, squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) fdnl <- (rgdx.param(paste(BINDir,filename, sep="/"), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE, squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE)) gdxmapping <- (rgdx.set(paste(BINDir,filename, sep="/"), gdxmap, compress=FALSE, ts=FALSE, useDomInfo = TRUE, check.names = TRUE)) # Original values of "dummy" are turned into number codes when loading into R. This changes the values to the original ones. # # dummies <- fd2gui[which(fd2gui$all_binid=="dummy"),] # dummies$value[which(dummies$value==1.6e+303)] <- "num" # dummies$value[which(dummies$value==1.59e+303)] <- "TRUE" # dummies$value[which(dummies$value==1.56e+303)] <- "on" # Restrict to only those that have value num (i.e. numeric). Numerical values may change. # Pick entries for numerical non-numerical globals with the highest incidence. nonnumdummies <- dummies[dummies$value != 1.6e+303,] # Separation of dummy and misc from fd2gui fd2gui <- fd2gui[which(fd2gui$all_binid!="dummy"),] # Widening of the data for better and easier viewing and handling fd2gui <- fd2gui %>% pivot_wider(id_cols = "all_binid", names_from = c("item1", "item2"), values_from = "value", names_sep = "%") fdnl <- fdnl %>% pivot_wider(id_cols = c("all_binid", "year"), names_from = c("item1"), values_from = "value", names_sep = "%") # Since subsetting with !duplicated() is done following the data order, reorder the data to start from youngest to oldest # Result: all_binids are preserved for youngest years (2020, 2019...) and removed from oldest years (2013, 2014...) fdnl <- fdnl[order(-as.numeric(fdnl$year)),] fdnl <- fdnl[!duplicated(fdnl$all_binid),] # Keep list of variables that are NOT numeric, BUT binary or other variables with other meanings. tokeep <- c(paste("global", nonnumdummies$item2, sep = "%"), "global%soilTypeFirm", "global%derogatie") # Finding index of global to keep only NON-numerical values based on "tokeep" keepmatch <- match(tokeep, colnames(fd2gui)) # Making these factors fd2gui[keepmatch] <- lapply(fd2gui[keepmatch], as.factor) # Joining all data together fdnl2gui <- right_join(fd2gui, fdnl[,c('all_binid', 'Weight')], by='all_binid') fdnl2gui[] <- lapply(fdnl2gui, function(x) if(is.factor(x)) as.factor(x) else x) map2gui <- right_join(fdnl2gui, gdxmapping, by='all_binid') map2gui <- map2gui %>% dplyr::group_by(across(all_of({{ mapping }}))) return(map2gui) } gdxreshape <- function (inDF, symDim, symName=NULL, tName="time", gdxName=NULL, setsToo=TRUE, order=NULL, setNames=NULL) { # Function based on wgdx.reshape of the gdxrrw package, modified for performance and usability by Hugo Scherer. # 23-09-2022 hugo.scherer@wur.nl nCols <- ncol(inDF) timeIdx <- symDim # default index position for time aggregate if (is.null(order)) { idCols <- 1:(symDim-1) inDF[idCols] <- lapply(inDF[idCols], as.factor) outDF <- (pivot_longer(inDF, cols=-all_of(idCols))) } else if ((! is.vector(order)) || (symDim != length(order))) { stop ("specified order must be a vector of length symDim") } else { timeIdx <- -1 if (is.character(order)) { stop ("order must be numeric for now") } else if (! is.numeric(order)) { stop ("optional order vector must be numeric or character") } idCols <- 1:(symDim-1) # for k in 1:symDim if (any(duplicated(order))) { stop ('duplicate entry in order vector: nonsense') } if ((symDim-1) != sum(order>0)) { stop ('order vector must specify symDim-1 ID columns') } if (all(order>0)) { stop ('order vector must have a non-positive entry to specify the "time" index') } timeIdx <- match(0, order) oo <- c(idCols,(1:nCols)[-idCols]) df2 <- inDF[oo] idCols <- 1:(symDim-1) df2[idCols] <- lapply(df2[idCols], factor) if (symDim == timeIdx) { # no need to re-order after reshaping outDF <- pivot_longer(df2, cols=-all_of(idCols)) } else { df3 <- pivot_longer(df2, cols=-all_of(idCols)) oo <- vector(mode="integer",length=symDim+1) oo[1:(timeIdx-1)] = 1:(timeIdx-1) oo[timeIdx] = symDim oo[(timeIdx+1):symDim] = (timeIdx+1):symDim-1 oo[symDim+1] = symDim+1 outDF <- (df3[oo]) } } outDF$name <- as.factor(outDF$name) if (is.null(symName)) { symName <- attr(inDF, "symName", exact=TRUE) } if (! is.character(symName)) { stop ("symName must be a string") } attr(outDF,"symName") <- symName symText <- attr(inDF, "ts", exact=TRUE) if (is.character(symText)) { attr(outDF,"ts") <- symText } if (is.character(tName)) { names(outDF)[timeIdx] <- tName } else { names(outDF)[timeIdx] <- 'time' } names(outDF)[symDim+1] <- "value" # str(outDF) if (setsToo) { ## write index sets first, then symName outLst <- list() length(outLst) <- symDim + 1 setNamesLen <- 0 if (! is.null(setNames)) { if (! is.character(setNames)) { stop ("setNames must be a string or string vector") } else if (! is.vector(setNames)) { stop ("setNames must be a string or string vector") } else { ## guard against zero-length vector setNamesLen <- length(setNames) } } kk <- 0 for (i in 1:symDim) { lvls <- levels(as.factor(outDF[[i]])) outLst[[i]] <- list(name=names(outDF)[[i]], type='set', uels=c(list(lvls))) if (setNamesLen > 0) { # tack on the next set text kk <- kk + 1 outLst[[i]]$ts <- setNames[[kk]] if (kk >= setNamesLen) kk <- 0 } } outLst[[symDim+1]] <- (outDF) if (is.character(gdxName)) { wgdx.lst(gdxName,outLst) } else { return(outLst) } } else { if (is.character(gdxName)) { wgdx.lst(gdxName,outDF) } else { return(list(outDF)) } } } # gdxreshape groupstats <- function(filename, BINDir, gdxmap, mapping, cols, w, writegdx = TRUE, filtern = FALSE) { if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) { BINDir <- get("BINDir", envir = .GlobalEnv) } else { BINDir } # In this function, you need to have dplyr and tidyr, require() will show a warning if the tidyverse package is not found # However, it will still (try to) run the function require(tidyverse) data <- gdxbinwider(filename, BINDir, gdxmap, mapping) names(data) <- gsub(x=names(data), pattern = "\\w*%", '') groupmap <- data %>% select(all_of(mapping), all_of(cols), all_of(w)) %>% dplyr::summarise(weightedmean = across(all_of(cols),~ weighted.mean(as.numeric(as.character(.x)), w=.data[[w]],na.rm=TRUE)), # Make a new column named weightedmean where the values are the weighted means of only the numeric columns (otherwise error) min = across(all_of(cols),~ min(as.numeric(as.character(.x))), na.rm =TRUE), # Same as weightedmean but with min max = across(all_of(cols),~ max(as.numeric(as.character(.x))), na.rm =TRUE), # Idem median = across(all_of(cols),~ median(as.numeric(as.character(.x))), na.rm =TRUE), # Idem mode = across(all_of(cols),~ Modes(as.numeric(as.character(.x)))), n = across(all_of(cols),~ sum(!is.na(.x))), # Make a column with n of each variable in the group .groups = 'keep' # .keep is to keep the grouped groups as in the original mapping (otherwise it will group with only one group, but the results are the same) ) groupmap <- groupmap %>% tidyr::unpack(cols = c(weightedmean, min, max, median, mode, n), names_sep = "%") %>% # Data cleaning activities and making a simple table tidyr::pivot_longer(cols = -all_of(mapping), names_to = c('item1', 'variable'), names_sep = "%") %>% tidyr::pivot_wider(id_cols = c(all_of(mapping), variable), names_from = item1, values_from = value) groupmap[] <- lapply(groupmap, function(x) if(is.numeric(x)) round(x, digits = 2) else x) # Rounding the results, comment if not needed groupmap[] <- lapply(groupmap, function(x) if(is.factor(x)) as.factor(x) else x) # For some reason, factors include ALL factors (BIN_IDs, soil type, random things), # this results in gdx file with 1000s of unwanted data that serves no use, factor(x) eliminates that. if (filtern == TRUE) { groupmap <- dplyr::filter(groupmap, n >= 15) # Keep variables that have more than 15 observations (as seen in column 'n', created earlier) } # Should GROUPS be filtered or VARIABLES? if (writegdx == TRUE) { allmaps <- paste0(mapping[1:length(mapping)], '.gdx') gdxfilename <- paste('farmStats', allmaps, sep = '_') gdxreshape(gdxName = gdxfilename, as.data.frame(groupmap), sum(length(mapping), 2), symName = 'p_farmStats', tName = 'colsFarmStats', order = c(1:sum(length(mapping),1),0) ) return(groupmap) } else return(groupmap) } samplr <- function(filename, BINDir, gdxmap, mapping, writegdx = TRUE) { if ("BINDir" %in% ls(envir = .GlobalEnv) & missing(BINDir)) { BINDir <- get("BINDir", envir = .GlobalEnv) } else { x } require(tidyverse) map2gui <- gdxbinwider(filename, BINDir, gdxmap, mapping) map2gui <- map2gui %>% select(-all_binid) %>% summarise(across(everything(),~ if(is.numeric(.)) weighted.mean(., w=.data[['Weight']], na.rm = TRUE) else Modes(.))) # In RStudio, ignore the (X) error unmatched bracket, everything is fine and all works. map2gui$Weight <- NULL map2gui[!colnames(map2gui) %in% mapping] <- lapply(map2gui[!colnames(map2gui) %in% mapping], function(x) if(is.factor(x)) (as.numeric(as.character(x))) else x) map2gui <- map2gui %>% pivot_longer(cols = where(is.numeric), names_to = c('item1', 'item2'), names_sep = "%") %>% pivot_wider(id_cols = c(all_of(mapping), 'item1'), names_from = 'item2', values_from = 'value') map2gui[] <- lapply(map2gui, function(x) if(is.numeric(x)) round(x, digits = 2) else x) # Rounding the results, comment if not needed map2gui[is.na(map2gui)] <- 0 # By bringing these values from long to wide, there are undefined values and, therefore, NAs. BUT these are not real NAs, plus it's for modelling purposes on GAMS, which will ignore the 0s, so we can safely assign a 0 to the NAs. if (writegdx == TRUE) { allmaps <- paste0(mapping[1:length(mapping)], '.gdx') gdxfilename <- paste('farmData', allmaps, sep = '_') gdxreshape(map2gui, symDim = 3, order = c(1,2,0), gdxName = gdxfilename, symName = 'p_farmData') return(map2gui) } else return(map2gui) } a <- samplr("farmdata2ABN", gdxmap = "ABNRegs2BINIds", mapping = 'ABNRegs') c <- groupstats("farmdata2ABN", gdxmap = "ABNRegs2BINIds", mapping = 'ABNRegs', cols = c('AllowManureExport', 'soilTypeFirm', 'seedPotatoes'), w='Weight', writegdx = TRUE) b <- gdxbinwider(filename = farmdata2abn, gdxmap = 'ABNRegs2BINIds', mapping = 'ABNRegs') str_firstLine_replace <- function(str, pattern, replacement) { str[grepl(pattern=pattern, x=str)][1] <- replacement return(str) } str_lastLine_replace <- function(str, pattern, replacement) { str[grepl(pattern=pattern, x=str)][length(str[grepl(pattern=pattern, x=str)])] <- replacement return(str) } readLines( file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms') )[((which(readLines(file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms'))=='* Setting for executing the task in batch file mode'))-1):which(readLines(file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms'))=='* end batch execution file')] %>% str_replace(pattern = 'Scenario description = \\w*', replacement = paste0('Scenario description = ', 'ABNRegs')) %>% str_firstLine_replace(pattern='Farm sample file = \\w*', replacement = paste0(' Farm sample file = ', 'farmData_', 'ABNRegs')) %>% str_lastLine_replace(pattern='Farm sample file = \\w*', replacement = paste0(' Farm sample file = ', 'farmData_', 'ABNRegs', '\n macro = ', paste(a, collapse = '\\'))) %>% str_replace('farmIds = \\w*', replacement = paste0('farmIds = ', paste0('ABNRegs', '\\'))) %>% str_replace('execute=Gamsrun', replacement = ' startparallel FOR farmidinloop = %allfarms% farmIds = %farmidinloop% execute = Gamsrun ENDFOR collectparallel * execute=Gamsrun ') %>% writeLines(con = 'test.txt') readLines( file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms') )[((which(readLines(file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms'))=='* Setting for executing the task in batch file mode'))-1):which(readLines(file.path('/FARMDYNTRUNK', 'gams', 'incgen', 'runInc.gms'))=='* end batch execution file')] %>% tibble() microbenchmark::microbenchmark(file.path={file.path('FARMDYNTRUNK', 'GUI')}, paste={paste('FARMDYNTRUNK', 'GUI', sep = '/')})