Xinxin Yang's avatar
Xinxin Yang committed
library(fadnUtils)
library(writexl)
library(jsonlite)
library(xlsx)
# FADN Data DIR
dir = "D:/public/data/fadn/lieferung_20210414/csv/"
setwd(dir)

# Get all csv file in FADN data dir
csv_files = list.files(path = dir, pattern= "*.csv$")
csv_list = data.frame(csv_files) 
colnames(csv_list) = "names"


# split string into two columns at data frame based on "."
df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE)))
df
# split data frame string into 2 columns
df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE)
table(df$country)
table(df$Year)
countires = unique(df$country)
years = unique(df$Year)
# 28 countries
#

# Get all csv files for country "DEU"
DEU_csv <- grep("^DEU", csv_list$names, value = TRUE)

length(DEU_csv) 

#++++++++++++++++++++++++++++

# set a project dir 
set.data.dir("D:/public/yang/MIND_STEP/new_sample")

get.data.dir()

# convert CSV into Raw data for DEU
for(file in csv_files) {
  print(file)
  
  
  # convert into data table 
  country = substr(file, 1, 3)
  
  # extract 4-7 char
  year = substr(file, 4, 7)
  
  convert.to.fadn.raw.rds(
    file.path = file,
    sepS = ",",
    fadn.country = country,
    fadn.year = year,
    col.id = "ID")
  
}

show.data.dir.contents()

# CONVERT FADN.RAW.RDS TO FADN.STR.RDS 

rds.dir = paste0(get.data.dir(),"/rds/")

# set a str name for for saving the str r-data in rds.dir
new.str.name = "DEU"

# set a extraction_dir
dir.create(paste0(rds.dir, new.str.name))
new.extraction.dir = paste0(rds.dir, new.str.name)


# Save the modifed json file
list_vars = check.column(importfilepath = paste0(rds.dir, "fadn.raw.2018.BEL.rds"), # a rds file or a csv file
                         jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json", # a json file 
                         rewrite_json = TRUE, # write a new json file without unmatched variables
                         extraction_dir = new.extraction.dir # save the new json in extraction_dir
)

#********************************

year_list = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018)
# find all adjacent combinations in a list
myFun <- function(Data) {
  A <- lapply(2:(length(Data)-1L), sequence)
  B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x)))
  unlist(lapply(seq_along(A), function(x) {
    lapply(B[[x]], function(y) Data[A[[x]]+y])
  }), recursive = FALSE, use.names = FALSE)
}
adjacent_list = myFun(year_list)
adjacent_list[[45]] = year_list
my.data = list()

for (year_items in adjacent_list) {
  name = toString(year_items)
  print(class(name))
  data = load.fadn.raw.rds(countries = "DEU", years = year_items)
  my.data[[name]] = data
}


Big.Num.Common.id = list()
for (data_list in 1:length(my.data)){
  data = my.data[data_list]
  # Retrieving column names
  name = names(data)
 
  print("******************************")

  colnames(data[[name]])[which( names(data[[name]]) == "ID")] <- "id"
  
  common.id = collect.common.id(data[[name]])
  
  
  Big.Num.Common.id[[name]] = nrow(common.id)
}
DF = do.call(rbind, Big.Num.Common.id)

DF = data.frame(DF)

DF$Years <- rownames(DF)
colnames(DF) <-  "the number of common id"
rownames(DF) <- NULL

write_xlsx(DF, "D:/public/yang/MIND_STEP/new_sample/DEUData_common_id.xlsx")