Xinxin Yang's avatar
Xinxin Yang committed
# find all adjacent combinations in a list
myFun <- function(Data) {
  A <- lapply(2:(length(Data)-1L), sequence)
  B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x)))
  unlist(lapply(seq_along(A), function(x) {
    lapply(B[[x]], function(y) Data[A[[x]]+y])
  }), recursive = FALSE, use.names = FALSE)
}
# load fadn raw data 
# search the number of common id for adjacent combination years
output_common_id <- function(countires_list){

  rds.dir = paste0(get.data.dir(),"/rds/")

  # set a str name for for saving the str r-data in rds.dir

  for (country in countires_list){
    
    new.str.name = country
    cat("Country:", country, '\n')
    
    # set a extraction_dir
    dir.create(paste0(rds.dir, new.str.name))
    new.extraction.dir = paste0(rds.dir, new.str.name)

    # count the number of the years for one country

    years = df[df$country== country, ]$Year
   
    adjacent_list = myFun(years)
    adjacent_list[[length(adjacent_list)+1]] = years

    my.data = list()
    for (year_items in adjacent_list) {
      name = toString(year_items)

      data = load.fadn.raw.rds(countries = country, years = year_items)
      my.data[[name]] = data
    }


    Big.Num.Common.id = list()

    for (data_list in 1:length(my.data)){

      data = my.data[data_list]
      
      # Retrieving column names
      name = names(data)

      print("******************************")

      #colnames(data[[name]])[which( names(data[[name]]) == "ID")] <- "id"

      common.id = collect.common.id(data[[name]])


      Big.Num.Common.id[[name]] = nrow(common.id)
    }
    DF = do.call(rbind, Big.Num.Common.id)

    DF = data.frame(DF)
    colnames(DF) <-  "number_of_common_id"

    
    DF$Years = rownames(DF)
  
    # # write xlsx
    # write.xlsx(DF,
    #            file="D:/public/yang/MIND_STEP/new_sample/DEUData_common_id.xlsx",
    #            sheetName = country,
    #            col.names= TRUE,
    #            row.names = TRUE,
    #            append = T)
    }

}




output_common_id("BEL")

# convert fadn raw into str data 
raw2str <- function(Current_raw_str_map.file = NULL, overwrite_external_json = F, countires_list){

  for (country in countires_list){
    print("**********************************")
    new.str.name = country
    
    
    rds.dir  = paste0(get.data.dir(), "/rds")
    
    raw_file_names = dir(rds.dir, pattern = paste0(country,".","rds$"))
    
    unlink(paste0(rds.dir,"/", country), recursive=TRUE)
    
    for (file in raw_file_names){
      
        # extract first 3 char
        country = substr(file, 15, 17)
        
        # extract number
        year = as.numeric(gsub("\\D+", "", file))
        
        
        cat("converting the str data for country: ", country, " and year: ", year, "\n")
        tryCatch(
          expr = {
            convert.to.fadn.str.rds(fadn.country = country,
                                    fadn.year = year,
                                    raw_str_map.file = Current_raw_str_map.file,
                                    force_external_raw_str_map =  overwrite_external_json,
                                    str.name = country)
          },
          warning = function(w){
            message('Caught an warning!')
            print(w)
          },
          error = function(e) {
            message("Caught an error! Please check the objects in json file using check.column() (see more in USE_CASE_4.R).")
            #cat("Wrong, can't convert the str r-data!",sep = "\n")
            print(e)
          }
        )
        
  
      
      
    }
  }
  
  
}
raw2str(Current_raw_str_map.file ="D:/public/yang/MIND_STEP/new_sample/raw_str_maps/rewrite_2014_after.json",
        countires_list = countires)