library(data.table) library(stringr) library(tidyr) library(hablar) #dir = "S:/Oekonomie/Transfer/gocht/FADN_DATA_2021/csv/" dir = "D:/public/data/fadn/lieferung_20210414/csv/" setwd(dir) csv_files = list.files(path = dir, pattern= "*.csv$") csv_list = data.frame(csv_files) colnames(csv_list) = "names" # split string into two columns at data frame based on "." df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE))) # split data frame string into 2 columns df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE) # count duplicates by country table(df$country) # count duplicates by year table(df$Year) countries = unique(df$country) years = unique(df$Year) # df[df$country=="HRV",] df[df$country=="POL",] filepath = "D:/public/data/fadn/lieferung_20210414/csv/BEL2009.csv" # for country: BEL BEL2009.bsp <- read.csv(filepath, header=TRUE) ncol(BEL2009.bsp) nrow(BEL2009.bsp) colnames(BEL2009.bsp) # get all the ID columns id_BEL2009 = grep("ID", colnames(BEL2009.bsp), value = TRUE) BEL2009.bsp["ID"] csv_files file_list = list() for(i in csv_files[1:10]) { print(i) x <- read.csv(i) cat(ncol(x),nrow(x),'\n') }