Xinxin Yang's avatar
Xinxin Yang committed




#' Imports a DG-AGRI csv into fadnUtils
#'
#' It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
#'
#' @param file.path the full path of the file (the filename must be included)
#' @param raw.f the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files; else do not copy
#'
#' @return NULL
#' @export
#'
#' @examples
import.fadn.csv <- function (file.path,
                             raw.f=NULL,
                             sepS=",",
                             fadn.year= NA,
                             fadn.country = NA,
                             keep.csv=F) {

  #if file exist
  if(!file.exists(file.path)) {
    cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
    return(invisible(FALSE))
  }

  # check for fadnUtils.data.dir
  if(is.null(get.data.dir())) {
    cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
    return(FALSE)
  } else {
    data.dir = get.data.dir();
    csv.file = basename(file.path)
  }

  if(is.null(raw.f)) {
    cat("You have to give a raw_str_map. Exiting ....\n")
    return(FALSE)
  }

  if(convert.to.fadn.raw.rds(file.path,sepS,fadn.year,fadn.country,keep.csv)) {
    convert.to.fadn.str.rds(fadn.country,fadn.year,raw.f)
  } else {
    cat("Failed to import. Exiting ...\n")
    return(invisible(NULL))
  }



}



#' Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds
#'
#' It saves two files:
#'  - One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
#'  - One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
#'
#' @param file.path the full path of the csv file (the filename must be included)
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files to the CSV directory; else do not copy
#'
#' @return Saves the fadn.raw.rds file and returns TRUE if everything goes well
#' @import data.table
#'
#' @export
#' @examples
convert.to.fadn.raw.rds <- function(file.path="",
                           sepS=",",
                           fadn.year= NA,
                           fadn.country = NA,
                           keep.csv = F,
                           col.id = "ID") {

  library(data.table)

  #if file exist
  if(!file.exists(file.path)) {
    cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
    return(FALSE)
  }

  # check for fadnUtils.data.dir
  if(is.null(get.data.dir())) {
    cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
    return(FALSE)
  } else {
    data.dir = get.data.dir();
    csv.file = basename(file.path)
  }

  if(file.exists(paste0(data.dir,"/csv/",csv.file))) {cat("File exists. Overwriting ...\n")}

  #copy csv to data.dir/csv
  if(keep.csv) {
    print("      copying file")
    file.copy(file.path,paste0(data.dir,"/csv/",csv.file))
  }

  #convert to uncompressed rds and save
  print("      creating fadn.raw.rds")
  data.raw = data.table(read.csv(file.path,header = T, as.is = T))

  attr(data.raw,"original.file.path") <-file.path
  attr(data.raw,"fadn.year")<-fadn.year
  attr(data.raw,"fadn.country")<-fadn.country

  data.name  = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")

  saveRDS(data.raw,paste0(data.dir,"/rds/",data.name))


  # #convert to compressed rds and save
  # data.raw.classes = data.table(col.name=names(data.raw),col.class=sapply(data.raw,class))
  # data.raw.compr = list()
  #
  # char.cols = c(col.id, data.raw.classes[col.class=="character",col.name])
  # data.raw.compr$data.char = data.raw[,..char.cols]
  #
  # num.cols = c(col.id, data.raw.classes[!col.class=="character",col.name])
  # data.raw.compr$data.num = melt(data.raw[,..num.cols],id.vars = col.id)[!value==0]
  #
  # attr(data.raw.compr,"original.file.path") <-file.path
  # attr(data.raw.compr,"fadn.year")<-fadn.year
  # attr(data.raw.compr,"fadn.country")<-fadn.country
  # attr(data.raw.compr,"col.names")<-names(data.raw)
  # attr(data.raw.compr,"col.id")<-col.id
  #
  # data.name  = paste0("fadn.raw.",fadn.year,".",fadn.country,".compressed.rds")
  # saveRDS(data.raw.compr,paste0(data.dir,"/rds/",data.name))
Xinxin Yang's avatar
Xinxin Yang committed



  return(invisible(TRUE))

}



#' Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file
#'
#' The raw_str_map.json specification is as follows:
#'
#' {
#'    "id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
#'    "info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
#'    "livestock": {}
#'    "crops": {
#'               "CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....}   },
#'               "CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....}   },
#'               ....
#'    }
#' }
#'
#'
#' The structure of the str.dir:
#'  - A data.dir can hold more than one extractions.
#'  - Each extraction has a short name (20 or less characters, whitespace is not allowed)
#'  - Each extraction is stored in the data.dir/rds/<extraction_name>
#'  - That folder contains the following files:
#'       + raw_str_map.json: the raw_str_map
#'       + fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
#'
#' Notes:
#' 1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
#'    All are data.tables. For all of them, the first columns are those that are contained in the "id" object
#'    "info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
#'        related raw_str_map.json file.
#'    "crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
#'        row is a farm-crop-variableName-value combination
#'
#' 2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
#'     i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
#'     ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
#'
#' 3) We attach certain attributes that are useful for identifying informations:
#'     i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
#'     ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
#'     iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
#'
#'
#'
#'
#' @param fadn.country string with the country to extract the str data
#' @param fadn.year the year to extract the structured data
#' @param raw_str_map.file the full path to the raw_str_map file.
#' @param str.short_name the short name of the str data. No spaces and text up to 20 characters
#' @param DEBUG if TRUE, prints more details on the conversion process
#'
#' @return Saves the rds.str.fadn and returns TRUE if everything goes well
#'