### Preamble ----

## Library loading ----
library(gdxrrw)
library(data.table)
library(arsenal)
library(tidyverse)


## Directory specification ----
GAMSDir     = "C:/GAMS/40" # Change to your GAMS Version!
#ADCDir     = "C:/FARMDYNDATA/ADC"
BINDir     = "C:/FARMDYNDATA/BIN"
#LMMDir     = "W:\\WECR\\PIA\\PW\\DZK\\GHG2020\\output Jakob"
MyLMMDir     = "C:/FARMDYNDATA/LMM"
MyBINDir     = "C:/FARMDYNDATA/MYBIN"
MyDRAMDir     = "C:/FARMDYNDATA/DRAM"
FDDir     = "C:/FARMDYNTRUNK/DAT"
WWLDir     <- "C:/FARMDYNDATA/WWL"
RDir     = "C:/FARMDYNDATA/R"

### Filenames ----
farmdata2abn <- "farmdata2ABN.gdx" # For testing

# GAMS loading
igdx(GAMSDir)

# Working directory
setwd(RDir)

## Functions ----
Modes <- function(x) { # Function found on StackOverflow made by Ken Williams and expanded by digEmAll
  ux <- unique(x)
  tab <- tabulate(match(x, ux))
  ux[tab == max(tab)]
}

fd2gui <- (gdxrrw::rgdx.param("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", "p_farmData2GUI", names=c("all_binid", 'items', "varias", "value"), compress=FALSE, ts=FALSE,
                              squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE))

fdnl <- (gdxrrw::rgdx.param("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", "p_farmData_NL", names=c("all_binid", 'years', "items2", "value"), compress=FALSE, ts=FALSE,
                            squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE))

gdxmapping <- (rgdx.set("C:/FARMDYNDATA/R/FarmDynR/data/GAMS/FarmDynRexampledata.gdx", 'map2binid',names = c('all_binid', 'mapping'), compress=FALSE, ts=FALSE,
                        useDomInfo = TRUE, check.names = TRUE))

gdxbinwider('FarmDynRexampledata.gdx', BINDir = 'inst/extdata/GAMS/', gdxmap = 'map2binid', mapping = 'mapping')

gdxbinwider <- function(filename, BINDir, gdxmap, mapping){

  if ('BINDir' %in% ls(envir = .GlobalEnv) & missing(BINDir)) {
    BINDir <- get('BINDir', envir = .GlobalEnv)
  } else {
    BINDir
  }


fd2guicolnames <- c("all_binid", "item1", "item2", "value")

fdnlcolnames <- c("year", "all_binid", "item1", "value")

fd2gui <- (rgdx.param(paste(BINDir,filename, sep="/"), "p_farmData2GUI", names=fd2guicolnames, compress=FALSE, ts=FALSE,
                                   squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE))

fdnl <- (rgdx.param(paste(BINDir,filename, sep="/"), "p_farmData_NL", names=fdnlcolnames, compress=FALSE, ts=FALSE,
                                 squeeze=TRUE, useDomInfo = TRUE, check.names = TRUE))

gdxmapping <- (rgdx.set(paste(BINDir,filename, sep="/"), gdxmap, compress=FALSE, ts=FALSE,
                                     useDomInfo = TRUE, check.names = TRUE))

# Original values of "dummy" are turned into number codes when loading into R. This changes the values to the original ones.
#
# dummies <- fd2gui[which(fd2gui$all_binid=="dummy"),]
# dummies$value[which(dummies$value==1.6e+303)] <- "num"
# dummies$value[which(dummies$value==1.59e+303)] <- "TRUE"
# dummies$value[which(dummies$value==1.56e+303)] <- "on"

# Restrict to only those that have value num (i.e. numeric). Numerical values may change.
# Pick entries for numerical non-numerical globals with the highest incidence.

nonnumdummies <- dummies[dummies$value != 1.6e+303,]

# Separation of dummy and misc from fd2gui

fd2gui <- fd2gui[which(fd2gui$all_binid!="dummy"),]

# Widening of the data for better and easier viewing and handling

fd2gui <- fd2gui %>%
  pivot_wider(id_cols = "all_binid", names_from = c("item1", "item2"), values_from = "value", names_sep = "%")


fdnl <- fdnl %>%
  pivot_wider(id_cols = c("all_binid", "year"), names_from = c("item1"), values_from = "value", names_sep = "%")

# Since subsetting with !duplicated() is done following the data order, reorder the data to start from youngest to oldest
# Result: all_binids are preserved for youngest years (2020, 2019...) and removed from oldest years (2013, 2014...)

fdnl <- fdnl[order(-as.numeric(fdnl$year)),]

fdnl <- fdnl[!duplicated(fdnl$all_binid),]

# Keep list of variables that are NOT numeric, BUT binary or other variables with other meanings.

tokeep <- c(paste("global", nonnumdummies$item2, sep = "%"), "global%soilTypeFirm", "global%derogatie")

# Finding index of global to keep only NON-numerical values based on "tokeep"

keepmatch <- match(tokeep, colnames(fd2gui))

# Making these factors
fd2gui[keepmatch] <- lapply(fd2gui[keepmatch], as.factor)

# Joining all data together

fdnl2gui <- right_join(fd2gui, fdnl[,c('all_binid', 'Weight')], by='all_binid')

fdnl2gui[] <- lapply(fdnl2gui, function(x) if(is.factor(x)) as.factor(x) else x)

map2gui <- right_join(fdnl2gui, gdxmapping, by='all_binid')

map2gui <- map2gui %>% dplyr::group_by(across(all_of({{ mapping }})))

return(map2gui)
}



gdxreshape <- function (inDF, symDim, symName=NULL, tName="time",
                        gdxName=NULL, setsToo=TRUE, order=NULL,
                        setNames=NULL) {
  # Function based on wgdx.reshape of the gdxrrw package, modified for performance and usability by Hugo Scherer.
  # 23-09-2022 hugo.scherer@wur.nl

  nCols <- ncol(inDF)
  timeIdx <- symDim                     # default index position for time aggregate
  if (is.null(order)) {
    idCols <- 1:(symDim-1)

    inDF[idCols] <- lapply(inDF[idCols], as.factor)

    outDF <- (pivot_longer(inDF, cols=-all_of(idCols)))
  }
  else if ((! is.vector(order)) || (symDim != length(order))) {
    stop ("specified order must be a vector of length symDim")
  }
  else {
    timeIdx <- -1
    if (is.character(order)) {
      stop ("order must be numeric for now")
    }
    else if (! is.numeric(order)) {
      stop ("optional order vector must be numeric or character")
    }

    idCols <- 1:(symDim-1)                                   # for k in 1:symDim
    if (any(duplicated(order))) {
      stop ('duplicate entry in order vector: nonsense')
    }

    if ((symDim-1) != sum(order>0)) {
      stop ('order vector must specify symDim-1 ID columns')
    }
    if (all(order>0)) {
      stop ('order vector must have a non-positive entry to specify the "time" index')
    }

    timeIdx <- match(0, order)

    oo <- c(idCols,(1:nCols)[-idCols])
    df2 <- inDF[oo]
    idCols <- 1:(symDim-1)

    df2[idCols] <- lapply(df2[idCols], factor)

    if (symDim == timeIdx) {     # no need to re-order after reshaping
      outDF <- pivot_longer(df2, cols=-all_of(idCols))
    }
    else {
      df3 <- pivot_longer(df2, cols=-all_of(idCols))
      oo <- vector(mode="integer",length=symDim+1)

      oo[1:(timeIdx-1)] = 1:(timeIdx-1)

      oo[timeIdx] = symDim

      oo[(timeIdx+1):symDim] = (timeIdx+1):symDim-1

      oo[symDim+1] = symDim+1
      outDF <- (df3[oo])
    }
  }
  outDF$name <- as.factor(outDF$name)
  if (is.null(symName)) {
    symName <- attr(inDF, "symName", exact=TRUE)
  }
  if (! is.character(symName)) {
    stop ("symName must be a string")
  }
  attr(outDF,"symName") <- symName