# This use case has been written to convert from NUTS 2003 to latest NUTS version (2016)
# in both directions, because FADN data is from 2004 to 2018.
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2', 'tidyverse')
for(p in requiredPackages){
if(!require(p,character.only = TRUE)) install.packages(p)
library(p,character.only = TRUE)
}
CurrentProjectDirectory = "D:/data/fadn/lieferung_20210414/yang/fadn_work_space"
set.data.dir(CurrentProjectDirectory)
str.dir <- "str_dir"
str_data <- readRDS(paste0(get.data.dir(),"/rds/",str.dir,"/fadn.str.all.rds"))
# maps for time series of NUTS1, NUTS2, NUTS3 and FADN region ----
print("# maps for time series of NUTS1, NUTS2, NUTS3 and FADN region")
fadnUtils::nuts.heatmap.group(str_data$info, "NUTS1", countries = "DEU", onepage = FALSE)
fadnUtils::nuts.heatmap.group(str_data$info, "NUTS2", countries = "DEU", onepage = FALSE)
fadnUtils::nuts.heatmap.group(str_data$info, "NUTS3")
fadnUtils::nuts.heatmap.group(str_data$info, "REGION", onepage = TRUE)
cat("
#
# Conversion of fadn data to latest NUTS Version
# Depends on the MIND STEP project and provided data
# - 2004 (NED)-2007-2018
# - regional allocation does not necessarily match to official EU regulation
#
")
# test data ----
test_data <- str_data$info %>%
select(ID,WEIGHT,COUNTRY,REGION,YEAR,NUTS1,NUTS2,NUTS3,UAA,SIZEUR)
# FADN region (from 'fadnUtils' package) ----
final_fadn_region <- test_data %>% left_join(region.trans, by = c( "REGION", "COUNTRY")) %>%
mutate(REGION_final = if_else(is.na(REGION_new), REGION, REGION_new)) %>% select(-c(REGION_new, change_region))
cat("
# nuts2 ----
# nuts2.trans includes the complete list of changes at NUTS2 level from NUTS 2003 to latest version 2016
#
#1: Most of the countries need only be recoded. This means NUTS2 regions are merged. There are also
some boundary shifts, but these are not considered as we do not have the true location of the farms.
#2: Special rules for some countries: Some individual countries have their own rules. (split and boundary shift)
#3: Compare finally transformed and original data.
# Special rules are:
# 1: Conversion based on NUTS3 level rules. A visual inspection was made according to NUTS shapefiles.
# SVN: 2003-2006: NUTS3 <- NUTS2 <- NUTS1
# 2010-2013: shapefile check: SI01 = SI03, SI02=SI04
# HUN: recode HU10 to HU12 as we found FADN data only for 'HU12'.
# POL: PL91 and PL92: putting both together as PL9192
# SUO, LTU, IRE : NUTS3 <- NUTS2 <- NUTS1
# UKI: 2010-2013 shapefile for NUTS2: UKIx fadn no data
# 2006-2010 only code change
# 2013-2016: NUTS3 <- NUTS2 <-NUTS1
# UKN, UKN0, UKN1 -> recoded UKN0
# *ignore the number of regional surface except POL
")
NUTS2_convert <- function (test_data){
keep_columns <- c(colnames(test_data), "NUTS1_final", "NUTS2_final")
# print(keep_columns)
# 1: new NUTS2 will be added to fadn original data: only for recoded -----
special_countries <- c("SVN","HUN","POL","SUO","LTU","IRE","UKI")
cat("Sepcial COUNTRIES: ", special_countries,"\n")
cat("Now update NUTS2 (NUTS1) regulation for non-special countries.... \n")
## rules for only non-special countries
nuts2.tran.recoded <- nuts2.trans %>%
filter(!COUNTRY %in% special_countries)
## as we have at maximum two changes of NUTS regulation, we left join two times
test_data_nuts2 <- test_data %>% filter(!COUNTRY %in% special_countries) %>%
left_join(nuts2.tran.recoded, by = c( "NUTS2", "COUNTRY"))
test_data_nuts2 <- test_data_nuts2 %>%
left_join(nuts2.tran.recoded, by = c("NUTS2_new"="NUTS2", "COUNTRY"))
## Now we generate the final NUTS2 regulation based on the left joins
recoded_final <- test_data_nuts2 %>%
mutate(NUTS2_final=case_when(
is.na(NUTS2_new) & is.na(NUTS2_new.y) ~ NUTS2,
!is.na(NUTS2_new) & is.na(NUTS2_new.y) ~ NUTS2_new,
TRUE ~ NUTS2_new.y
)) %>% select(-contains(".x"),-contains(".y"), -NUTS2_new)
## Now we add new NUTS1 - NUTS1_final
recoded_final <- recoded_final %>%
mutate(NUTS1_final=str_sub(NUTS2_final,1,3)) %>% select(keep_columns)
cat("Non-special countries finished.\n")
cat("Special countrie.....\n")
# 2: special rules ----
# hun: fadn data only for HU12 ----
# rename
country_HUN <- test_data %>% filter(COUNTRY=="HUN") %>%
mutate( NUTS2_final = NUTS2,
NUTS2_final = if_else(NUTS2_final=="HU10", "HU12", NUTS2_final),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>% select(keep_columns)
# pol: calcalate boundary shift ----
pol_nuts2_trans <- nuts2.trans %>% filter(COUNTRY == "POL") %>%
filter(NUTS2!="PL12")
## POL code change for some NUTS2
country_POL <- test_data %>% filter(COUNTRY=="POL") %>%
left_join(pol_nuts2_trans, by = c( "NUTS2", "COUNTRY")) %>%
mutate(NUTS2_final=case_when(
is.na(NUTS2_new) ~ NUTS2,
TRUE ~ NUTS2_new
))
## Special case for PL12 in PL91/PL92
country_POL <- country_POL %>%
mutate(NUTS2_final=if_else(NUTS2_final %in% c("PL12","PL91","PL92"),"PL9192",NUTS2_final),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>% select(keep_columns)
## convert based on nuts3 ----
## SUO ----
country_SUO <- test_data %>% filter(COUNTRY=="SUO") %>%
left_join(nuts3.trans, by = c( "NUTS3", "COUNTRY")) %>%
mutate(NUTS3_final=case_when(
is.na(NUTS3_new) ~ NUTS3,
TRUE ~ NUTS3_new
)) %>%
mutate( NUTS2_final = str_sub(NUTS3_final,1,4)) %>%
mutate(NUTS2_final = if_else(NUTS2_final == "","FI19",NUTS2_final),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>%
select(keep_columns)
## LTU ----
country_LTU <- test_data %>% filter(COUNTRY=="LTU") %>%
left_join(nuts3.trans, by = c( "NUTS3", "COUNTRY")) %>%
mutate(NUTS3_final=case_when(
is.na(NUTS3_new) ~ NUTS3,
TRUE ~ NUTS3_new
)) %>%
mutate( NUTS2_final = str_sub(NUTS3_final,1,4),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>%
select(keep_columns)
## IRE ----
## IE02 and IE06 must be an error in the FADN data - because it has only 4 digits (must have 5 because of NUTS3)
## adaption: as IE061/IE021 is Dublin and small and IE062/IE022 is missing, we make:
## - IE02 to IE022 and IE06 to IE062
country_IRE <- test_data %>% filter(COUNTRY=="IRE") %>%
mutate(NUTS3=if_else(NUTS3=="IE02","IE022",NUTS3)) %>%
left_join(nuts3.trans, by = c( "NUTS3", "COUNTRY")) %>%
mutate(NUTS3_final=case_when(
is.na(NUTS3_new) ~ NUTS3,
TRUE ~ NUTS3_new
)) %>%
mutate( NUTS2_final = str_sub(NUTS3_final,1,4),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>%
select(keep_columns)
## SVN ----
country_SVN <- test_data %>% filter(COUNTRY=="SVN") %>%
left_join(nuts3.trans, by = c( "NUTS3", "COUNTRY")) %>%
left_join(nuts3.trans, by = c( "NUTS3_new"="NUTS3", "COUNTRY")) %>%
mutate(NUTS3_final=case_when(
is.na(NUTS3_new) & is.na(NUTS3_new.y) ~ NUTS3,
!is.na(NUTS3_new) & is.na(NUTS3_new.y) ~ NUTS3_new,
TRUE ~ NUTS3_new.y
)) %>%
mutate( NUTS2_final = str_sub(NUTS3_final,1,4),
NUTS1_final = str_sub(NUTS2_final,1,3)) %>%
select(keep_columns)
## UKI: ----
## UKI: 2010-2013 shapefile for NUTS2: UKIx fadn no data
## 2006-2010 only code change
## 2013-2016: NUTS3 <- NUTS2 <-NUTS1(UKN: NUTS2 has no change, NUTS3: UKM recoded)
## UKN, UKN0, UKN1 -> recoded UKN0
## United Kingdom has many inaccurate and misleading information for NUTS3
## - there are 4-digit codes as NUTS3 codes - but must be 5-digit
## - there are missing newer NUTS3 codes, but former NUTS3 codes are in the data
## - for instance:
## test_data %>% filter(grepl("UKM",NUTS1)) %>% group_by(YEAR,NUTS3) %>%
## summarise(n=sum(WEIGHT)) %>% pivot_wider(names_from = YEAR,values_from=n) %>%
## print(n=31) %>% view()
## --> there is no UKM21 and UKM22, but newer UKM71 and UKM72
## --> there is UKM35 and UKM36, but no newer UKM83 and UKM84
## solution: NUTS3 UKM2 -> UKM7 and UKM3 -> UKM9
country_UKI_UKM <- test_data %>% filter(grepl("UKM",NUTS2)) %>%
left_join(nuts3.trans, by = c("NUTS3", "COUNTRY")) %>%
mutate(NUTS3_final=case_when(
is.na(NUTS3_new) ~ NUTS3,
TRUE ~ NUTS3_new
),
NUTS3_final=case_when(
NUTS3=="UKM2" ~ "UKM7",