Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
#....................................................................
#
# Functions to check the variables which are used for calculating the aggregate variables in a json file and they are
# also in the csv or rds file.
# To create a txt-file containing the information of R output, and the results of unmatched variables.
#....................................................................
#' Check the variables/column names for calculating the aggregate variables
#' @details
#' If variables exist in a json-file and not in the fadn.raw.rds file or fadn csv file, then returning all unmatched variables.
#' Json file has 6 objects/categries: "id", "info", "costs", "crops", "subsides", "livstock".
#'
#' @description
#' The check.column function checks the variables if they exist in a json-file matching the variables
#' in the fadn.raw.rds or fadn.raw.csv (csv-file from FADN-AGRI),
#' returning a list of variables which are not in the raw data file. Then a new json file without unmatched variables can be saved in the extraction_dir.
#' A txt-file (my_logfile.txt) is created in a specific directory (spool.dir) where stores the output messages.
#'
#' @param importfilepath A fadn.raw.rds or fadn.raw.csv file address.
#' @param jsonfile A json file address.
#' @param rewrite_json Logical, if TRUE (default), a new json file without unmatched variables will be saved. The string "rewrite" will be added in front of the original file name, and they are separated through "_". For example, the name of original json file is A.json, the new json file will be saved as rewrite_A.json.
#' Otherwise, do not rewrite json file.
#' @param extraction_dir Extraction_dir is the folder for extracting the data.
#'
#'
#'
#' @return A list of multiple objects. The objects are in the json-file, which have the unmatched variables.
#' @export
#'
#' @author Xinxin Yang <xinxin.yang@thuenen.de>
#'
#' @examples
#' check.column("./fadn.raw.2009.BEL.rds", "./2014_after.json", TRUE, "./OV")
#' check.column("BEL2009.csv", "2013_before.json", TRUE, "./OV")
#'
#'
check.column <- function(importfilepath,
jsonfile,
rewrite_json = TRUE,
extraction_dir)
{
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir()
rds.dir = paste0(data.dir, "/rds/")
spool.dir = paste0(data.dir, "/spool/")
}
print(extraction_dir)
# check the file type and read the file
fadn.raw = tryCatch(check_file_type(importfilepath))
# read json
raw_str_map = fromJSON(paste(readLines(jsonfile), collapse = "\n"))
cat("Loading a json file...\n")
json.name = basename(jsonfile)
# get the categorie of the json file
categories.json = names(raw_str_map)
# create a empty list for saving the unmatching variables
res_final = list()
# create a list list for saving the json file
json_list = list()
for (i in categories.json)
{
message("Doing the category: ", i)
key = raw_str_map[[i]]
res_key = nested_var(key, fadn.raw)
res_final[[i]] <- res_key[[1]]
json_list[[i]] <- res_key[[2]]
}
jsonc = toJSON(json_list, auto_unbox = TRUE, pretty = TRUE)
if (rewrite_json){
json.name = paste('rewrite', json.name, sep='_')
write(jsonc, paste0(extraction_dir, "/", json.name))
cat("The new json file has been successfully written, please check the file in the current dir:", extraction_dir, ".\n")
if(length(res_final)==0)
{cat("The new json file has been no changed!")}
}
else {
cat("No json file is rewritten, all variables exist in the import file.\n")
if (length(res_final)!=0)
{
warning("Unmatched variables exist, please rewrite a new json file!!!")
}
}
# save the list of unmatched variables in a txt file
my_logfile = file(paste0(spool.dir,"my_logfile.txt" ), open = "wt") # wt open for writing or appending???
#system time output
cat("==================================================================\n",
as.character(Sys.time()),
"\n==================================================================\n",
file= my_logfile)
if (rewrite_json)
{
cat("A new jsonfile has been successfully written in the current dir:", extraction_dir, file = my_logfile, sep="\n" )
if(length(res_final)==0)
{cat("The new json file has been no changed!", file = my_logfile, sep="\n" )}
}
else{
cat("No json file is rewritten.", file = my_logfile, sep="\n" )
if (length(res_final)!=0)
{
cat("Unmatched variables exist, please rewrite a new json file!!!", file = my_logfile, sep="\n" )
}
}
cat("\nUnmatched variables: \n",file=my_logfile,sep="\n" )
cat(capture.output(res_final),file=my_logfile, sep="\n" )
return(res_final)
}
#' Check a objest in the json file
#' @description
#'
#' This function checks the node of chosen object/category for the json file and find out the variables
#' which are in json file but not in fadn.raw data file.
#' Returning two lists: unmatched variables/column names and modified json.
#' If unmatched variable exists, this variable will be deleted from the json list.
#'
#' @param var A object or category of raw json.
#' @param rds All variables/column names in fadn.raw.rds file.
#'
#' @details A json file has 6 parent objects/categories: "id", "info", "costs", "crops", "subsides", "livstock". This function checks all objects inside the parent object.
#'
#'
#'
#' @author Xinxin Yang
#'
#' @return A list of multiple objects. This list combines no machted variables and the modified json for the chosen object/category.
nested_var <- function(var, rds)
{
res= NULL
newjson = NULL
col_names = names(var)
cat("Number of the totoal objects: ", length(col_names), "\n")
for (var.key in col_names ){
var.key.map = var[[var.key]]
# id, info, costs, subsidies
if (!is.null(var.key.map[["source"]]))
{
#print("no nested, doing*****************************")
cmd = parse(text = (var.key.map[["source"]]))
extracted_element = all.vars(cmd)
for (i in extracted_element){
d = i %in% names(rds)
if (!isTRUE(d)){
cat(i, " is not in rds\n")
res = c(extracted_element, res)
#print(var.key)
# delete the unmatched vars
var[[var.key]][["source"]] = "NA"
}
}
}
# livestock, crops
else{
#print("nested! taking it down a level*****************************")
for (element in names(var.key.map[["columns"]])){
cmd = parse(text = (var.key.map[["columns"]][[element]]))
extracted_element = all.vars(cmd)
for (i in extracted_element){
d = i %in% names(rds)
if (!isTRUE(d)){
#message(i, " is not in rds")
res = c(extracted_element, res)
#delete the unmatched var
var[[var.key]][["columns"]][[element]] = "NA"
}
}
}
}
}
# remove the duplicated variables
res = res[!duplicated(res)]
newList = list("variables" = res, "json"= var)
return (newList)
}
#' Check the type of load file
#'
#' @description
#' This function checks the type of the load file and read this file.
#' If the file is not a csv or rds file,
#' the execution of the currently running R code will be stopped.
#'
#' @param filepath A rds or csv file address.
#'
#' @return A data frame with cases corresponding to lines and variables to fields in the file.
#'
check_file_type <- function(filepath)
{
if (grepl('rds$', filepath))# ends with rds?
{
cat("Loading a rds file...\n")
fadn.raw = readRDS(filepath)
}
else if( grepl('csv$', filepath))
{
cat("Loading a csv file...\n")
fadn.raw = read.csv(filepath)
}
else{
stop("ERROR: Please check the file format. It must be a file of type: rds or csv!")
}
return(fadn.raw)
}