library(fadnUtils) library(writexl) library(jsonlite) library(xlsx) # FADN Data DIR dir = "D:/public/data/fadn/lieferung_20210414/csv/" setwd(dir) # Get all csv file in FADN data dir csv_files = list.files(path = dir, pattern= "*.csv$") csv_list = data.frame(csv_files) colnames(csv_list) = "names" # split string into two columns at data frame based on "." df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE))) df # split data frame string into 2 columns df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE) table(df$country) table(df$Year) countires = unique(df$country) years = unique(df$Year) # 28 countries # # Get all csv files for country "DEU" DEU_csv <- grep("^DEU", csv_list$names, value = TRUE) length(DEU_csv) #++++++++++++++++++++++++++++ # set a project dir set.data.dir("D:/public/yang/MIND_STEP/new_sample") get.data.dir() # convert CSV into Raw data for DEU for(file in csv_files) { print(file) # convert into data table country = substr(file, 1, 3) # extract 4-7 char year = substr(file, 4, 7) convert.to.fadn.raw.rds( file.path = file, sepS = ",", fadn.country = country, fadn.year = year, col.id = "ID") } show.data.dir.contents() # CONVERT FADN.RAW.RDS TO FADN.STR.RDS rds.dir = paste0(get.data.dir(),"/rds/") # set a str name for for saving the str r-data in rds.dir new.str.name = "DEU" # set a extraction_dir dir.create(paste0(rds.dir, new.str.name)) new.extraction.dir = paste0(rds.dir, new.str.name) # Save the modifed json file list_vars = check.column(importfilepath = paste0(rds.dir, "fadn.raw.2018.BEL.rds"), # a rds file or a csv file jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json", # a json file rewrite_json = TRUE, # write a new json file without unmatched variables extraction_dir = new.extraction.dir # save the new json in extraction_dir ) #******************************** year_list = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018) # find all adjacent combinations in a list myFun <- function(Data) { A <- lapply(2:(length(Data)-1L), sequence) B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x))) unlist(lapply(seq_along(A), function(x) { lapply(B[[x]], function(y) Data[A[[x]]+y]) }), recursive = FALSE, use.names = FALSE) } adjacent_list = myFun(year_list) adjacent_list[[45]] = year_list my.data = list() for (year_items in adjacent_list) { name = toString(year_items) print(class(name)) data = load.fadn.raw.rds(countries = "DEU", years = year_items) my.data[[name]] = data } Big.Num.Common.id = list() for (data_list in 1:length(my.data)){ data = my.data[data_list] # Retrieving column names name = names(data) print("******************************") colnames(data[[name]])[which( names(data[[name]]) == "ID")] <- "id" common.id = collect.common.id(data[[name]]) Big.Num.Common.id[[name]] = nrow(common.id) } DF = do.call(rbind, Big.Num.Common.id) DF = data.frame(DF) DF$Years <- rownames(DF) colnames(DF) <- "the number of common id" rownames(DF) <- NULL write_xlsx(DF, "D:/public/yang/MIND_STEP/new_sample/DEUData_common_id.xlsx")