Xinxin Yang's avatar
Xinxin Yang committed
#....................................................................
#
# Functions to check the variables which are used for calculating the aggregate variables in a json file and they are
# also in the csv or rds file.
# To create a txt-file containing the information of R output, and the results of unmatched variables.
#....................................................................


#' Check the variables/column names for calculating the aggregate variables
#' @details
#' If variables exist in a json-file and not in the fadn.raw.rds file or fadn csv file, then returning all unmatched variables.
#' Json file has 6 objects/categries: "id", "info", "costs", "crops", "subsides", "livstock".
#'
#' @description
#' The check.column function checks the variables if they exist in a json-file matching the variables
#'  in the fadn.raw.rds or fadn.raw.csv (csv-file from FADN-AGRI),
#' returning a list of variables which are not in the raw data file. Then a new json file without unmatched variables can be saved in the extraction_dir.
#' A txt-file (my_logfile.txt) is created in a specific directory (spool.dir) where stores the output messages.
#'
#' @param importfilepath A fadn.raw.rds or fadn.raw.csv file address.
#' @param jsonfile A json file address.
#' @param rewrite_json Logical, if TRUE (default), a new json file without unmatched variables will be saved. The string "rewrite" will be added in front of the original file name, and they are separated through "_". For example, the name of original json file is A.json, the new json file will be saved as rewrite_A.json.
#' Otherwise, do not rewrite json file.
#' @param extraction_dir Extraction_dir is the folder for extracting the data.
#'
#'
#'
#' @return A list of multiple objects. The objects are in the json-file, which have the unmatched variables.
#' @export
#'
#' @author Xinxin Yang <xinxin.yang@thuenen.de>
#'
#' @examples
#' check.column("./fadn.raw.2009.BEL.rds", "./2014_after.json", TRUE, "./OV")
#' check.column("BEL2009.csv", "2013_before.json", TRUE, "./OV")
#'
#'


check.column <- function(importfilepath,
                         jsonfile,
                         rewrite_json = TRUE,
                         extraction_dir)

{

  # check for fadnUtils.data.dir
  if(is.null(get.data.dir())) {
    cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
    return(FALSE)

  } else {
    data.dir = get.data.dir()
    rds.dir = paste0(data.dir, "/rds/")
    spool.dir = paste0(data.dir, "/spool/")

  }

  print(extraction_dir)

  # check the file type and read the file
  fadn.raw = tryCatch(check_file_type(importfilepath))

  # read json
  raw_str_map = fromJSON(paste(readLines(jsonfile), collapse = "\n"))

  cat("Loading a json file...\n")

  json.name = basename(jsonfile)

  # get the categorie of the json file
  categories.json = names(raw_str_map)

  # create a empty list for saving the unmatching variables
  res_final = list()

  # create a list list for saving the json file
  json_list = list()

  for (i in categories.json)
  {
    message("Doing the category: ", i)
    key = raw_str_map[[i]]


    res_key = nested_var(key, fadn.raw)

    res_final[[i]] <- res_key[[1]]
    json_list[[i]] <- res_key[[2]]

  }

  jsonc = toJSON(json_list, auto_unbox = TRUE, pretty = TRUE)



  if (rewrite_json){

    json.name =  paste('rewrite', json.name, sep='_')

    write(jsonc, paste0(extraction_dir, "/", json.name))


      cat("The new json file has been successfully written, please check the file in the current dir:", extraction_dir, ".\n")

    if(length(res_final)==0)
    {cat("The new json file has been no changed!")}

  }
  else {
    cat("No json file is rewritten, all variables exist in the import file.\n")
    if (length(res_final)!=0)
    {
      warning("Unmatched variables exist, please rewrite a new json file!!!")
    }
  }

  # save the list of unmatched variables in a txt file
  my_logfile = file(paste0(spool.dir,"my_logfile.txt" ), open = "wt") # wt open for writing or appending???

  #system time output
  cat("==================================================================\n",
      as.character(Sys.time()),
      "\n==================================================================\n",
      file= my_logfile)


  if (rewrite_json)
  {
    cat("A new jsonfile has been successfully written in the current dir:", extraction_dir, file = my_logfile, sep="\n" )

    if(length(res_final)==0)
    {cat("The new json file has been no changed!", file = my_logfile, sep="\n" )}
  }
  else{
    cat("No json file is rewritten.", file = my_logfile, sep="\n" )
    if (length(res_final)!=0)
    {
      cat("Unmatched variables exist, please rewrite a new json file!!!", file = my_logfile, sep="\n" )
    }
  }

  cat("\nUnmatched variables: \n",file=my_logfile,sep="\n" )
  cat(capture.output(res_final),file=my_logfile, sep="\n" )

  return(res_final)

}


#' Check a objest in the json file
#' @description
#'
#' This function checks the node of chosen object/category for the json file and find out the variables
#' which are in json file but not in fadn.raw data file.
#' Returning two lists: unmatched variables/column names and modified json.
#' If unmatched variable exists, this variable will be deleted from the json list.
#'
#' @param var A object or category of raw json.
#' @param rds All variables/column names in fadn.raw.rds file.
#'
#' @details A json file has 6 parent objects/categories: "id", "info", "costs", "crops", "subsides", "livstock". This function checks all objects inside the parent object.
#'
#'
#'
#' @author Xinxin Yang
#'
#' @return A list of multiple objects. This list combines no machted variables and the modified json for the chosen object/category.


nested_var <- function(var, rds)
{
  res= NULL
  newjson = NULL
  col_names = names(var)

  cat("Number of the totoal objects: ", length(col_names), "\n")

  for (var.key in col_names ){


    var.key.map = var[[var.key]]

    # id, info, costs, subsidies
    if (!is.null(var.key.map[["source"]]))
    {
     #print("no nested, doing*****************************")

      cmd = parse(text = (var.key.map[["source"]]))
      extracted_element = all.vars(cmd)


      for (i in extracted_element){
        d = i %in% names(rds)


        if (!isTRUE(d)){

          cat(i, " is not in rds\n")