GitLab at IIASA

9FF2C80-contents 1.16 KiB
Newer Older
Xinxin Yang's avatar
Xinxin Yang committed
library(data.table)
library(stringr)
library(tidyr)
library(hablar)
#dir = "S:/Oekonomie/Transfer/gocht/FADN_DATA_2021/csv/"
dir = "D:/public/data/fadn/lieferung_20210414/csv/"
setwd(dir)

csv_files = list.files(path = dir, pattern= "*.csv$")
csv_list = data.frame(csv_files)
colnames(csv_list) = "names"
# split string into two columns at data frame based on "."
df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE)))

# split data frame string into 2 columns
df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE)

# count duplicates by country
table(df$country)
# count duplicates by year
table(df$Year)

countries = unique(df$country)
years = unique(df$Year)
#
df[df$country=="HRV",]
df[df$country=="POL",]


filepath = "D:/public/data/fadn/lieferung_20210414/csv/BEL2009.csv"
# for country: BEL
BEL2009.bsp <- read.csv(filepath, header=TRUE)
ncol(BEL2009.bsp)
nrow(BEL2009.bsp)
colnames(BEL2009.bsp)
# get all the ID columns
id_BEL2009 = grep("ID", colnames(BEL2009.bsp), value = TRUE)
BEL2009.bsp["ID"]
csv_files
file_list = list()
for(i in csv_files[1:10]) {
  print(i)
  x <- read.csv(i)

  cat(ncol(x),nrow(x),'\n')
}