rm(list=ls())
sink(stdout(), type="message")
writeLines("output to the log file")
args <- commandArgs(TRUE)


print(args[1])
source(args[1])

EU_list <- unlist(EU_list)
#print(EU_list);

setwd(paste(curdir,"", collapse=NULL, sep = ""));
Xinxin Yang's avatar
Xinxin Yang committed

# Setup -------------------------------------------------------------------
Xinxin Yang's avatar
Xinxin Yang committed

getwd()
.libPaths("D:/R/R-3.6.3/library")
.libPaths()

## load libraries
## Install fadnutils from thuenen gitlab

library(devtools)
#install.packages('jsonlite', dependencies=TRUE, repos='http://cran.rstudio.com/')
#devtools::install_git("https://git-dmz.thuenen.de/mindstep/fadnutilspackages",credentials = git2r::cred_user_pass("yang", "******"))
#install.packages("jsonlite", dependencies=TRUE)
if (!require("pacman")) install.packages("pacman")
pacman::p_load('fadnUtils','data.table', 'ggplot2','gdxdt', 'tidyverse', 'xlsx', 'gdxrrw','Hmisc')
#lsf.str("package:fadnUtils")
## set gams path
igdx(gamsPath)
Xinxin Yang's avatar
Xinxin Yang committed

## Check your current working directory
## Determin if Git directory is current working dir.
Xinxin Yang's avatar
Xinxin Yang committed
getwd()
#setwd("../R/fadntocapri")
## Set FADN project directory
CurrentProjectDirectory = paste0(str_trim(restartdir), "//data_preparation//convert_FADN_to_CAPRI", collapse = NULL)

print(getwd())
print(CurrentProjectDirectory)

Xinxin Yang's avatar
Xinxin Yang committed

## Ceate a data.dir
Xinxin Yang's avatar
Xinxin Yang committed
create.data.dir(folder.path = CurrentProjectDirectory)

## Once the data.dir is created, we must declare that we are working with it
Xinxin Yang's avatar
Xinxin Yang committed
set.data.dir(CurrentProjectDirectory)

rds.dir = paste0(get.data.dir(),"/rds/")

print(rds.dir);

Xinxin Yang's avatar
Xinxin Yang committed

# set fadn data dir

fadn.data.dir = "D:/data/fadn/lieferung_20210414/csv/"

# set gdx out
gdx.results_out =  "../output/results/data_preparation/convert_FADN_to_CAPRI"
Xinxin Yang's avatar
Xinxin Yang committed

## Load functions
#source(" ..\R\fadntocapri\myfun_fadn.R ")
#---------------------------------
#          ---   functions
#---------------------------------

# group by different NUTS with YEAR, CROP/ANIM, VARIABLE,ORGANIC

fadn.filter <- function(data, group.by, type ) {
  if (group.by == "EU"){
    filtered <- data %>% filter(COUNTRY %in% EU_list) %>%
      group_by(YEAR,.data[[type]],ORGANIC,VARIABLE) %>%
      summarise(sum_Value = sum(value2), .groups ="drop") %>%
      as.data.table() %>%
      mutate(REGION = group.by,
             REG_TYPE = group.by)
  } else{
    filtered <- data %>%
      group_by(.data[[group.by]],
               YEAR,
               .data[[type]],
               VARIABLE,
               ORGANIC) %>%
      summarise(sum_Value = sum(value2), .groups ="drop") %>%
      as.data.table() %>%
      rename(REGION = .data[[group.by]]) %>%
      mutate(REG_TYPE = group.by)
  }
  return(filtered)
}

# convert csv in raw rds
convert.raw <- function(countries){
  csv_file_names <- list.files(path = fadn.data.dir, pattern= "*.csv$")
  if(countries == "all"){
    csv_selected = csv_file_names
  }else{
    toMacth.countries = paste(countries,collapse="|")
    csv_selected = grep(toMacth.countries, csv_file_names, value = TRUE)
  }

  cat("Create fadn.raw.rds files for: ", countries,"\n")
  for (file in csv_selected){
    # extract 4-7 char
    year = substr(file, 4, 7)
    countries = substr(file, 1, 3)
    convert.to.fadn.raw.rds(
      file.path = paste0(fadn.data.dir,file),
      sepS = ",",
      fadn.country = countries,
      fadn.year = year
      #keep.csv = T # copy csv file in csv.dir
    )
  }
}
# load fadn raw data
load.raw <- function(countries){

  # check if raw data exist
  fadn.raw.rds.avail <- get.available.fadn.raw.rds()[COUNTRY %in% countries]
  if(nrow(fadn.raw.rds.avail)==0){
    cat("Raw data does not exist, converting raw data ...\n")
    convert.raw(countries = countries)
  }
  
  # if (countries == "all") {countries = EU_list}
  # raw.rds.avail <- get.available.fadn.raw.rds()[COUNTRY %in% countries ]


  
  # countries.avail <- unique( raw.rds.avail$COUNTRY)
  raw_data <- load.fadn.raw.rds(countries = countries,years = "all")

  return(raw_data)
}
# convert raw rds in str data 
convert.str <- function(countries){
  # Convert FADN data, save the str data in path: ../output/restart/fadn/
  before2013.json = "../r/fadntocapri/corrected.json.full/corrected.2013_before.json"
  after2014.json = "../r/fadntocapri/corrected.json.full/corrected.2014_after.json"
  #Check if the str data already exists
  # extr.dirs = list.dirs(path = paste0(get.data.dir(),"/rds"), full.names = F, recursive = F)
  # extr.dirs.full = paste0(rds.dir,extr.dirs)
  # list.files(extr.dirs.full, pattern = paste0(fadn.countries,".rds") )

  if ( "all" %in% countries) {
    beforeyears = "before2013"
    afteryears = "after2014"
    # all countries and years 719.24s
    # convert raw data to structured data ---
    # before 2013 and 2013
    convert.to.fadn.str.rds(countries,
                            beforeyears,
                            raw_str_map.file = before2013.json,
                            str.name = "forcapri",
                            force_external_raw_str_map = T)# 413.25 for all countries
    # after 2014 and 2014
    convert.to.fadn.str.rds(countries,
                            afteryears,
                            raw_str_map.file = after2014.json,
                            str.name = "forcapri",
                            force_external_raw_str_map = T)# 305.99 for all countries
    }else{
    beforeyears = c(2004:2013)
    afteryears = c(2014:2018)
    # before 2014
    # only DEU 84s
    # BEL and DEU 107.26s
    for (country in countries ){

      sapply(seq_along(beforeyears), function(i)
        convert.to.fadn.str.rds(country,
                                beforeyears[i],
                                raw_str_map.file = after2014.json,
                                str.name = "forcapri",
                                force_external_raw_str_map = T) )
      # after 2013
      sapply(seq_along(afteryears), function(i)
        convert.to.fadn.str.rds(country,
                                afteryears[i],
                                raw_str_map.file = after2014.json,
                                str.name = "forcapri",
                                force_external_raw_str_map = T) )
    }
  }
}
# load str crops data
load.str <- function(countries ) {

    # check if str data exist
  fadn.str.rds.avail <- get.available.fadn.str.rds(extract_dir = "forcapri")[COUNTRY %in% countries]
  if(nrow(fadn.str.rds.avail)==0){
    cat("Str data does not exist, converting str data ...\n")
    convert.str(countries = countries)
  }
  # load crops str data
  fadn.str.data <- load.fadn.str.rds("forcapri",countries,"all")
  # fadn.str.cro <- fadn.str.data[[filter]]