GitLab at IIASA

check_rules.R 48.9 KiB
Newer Older
Xinxin Yang's avatar
Xinxin Yang committed
####################################
# nuts rules


# fadn nuts2 heatmap############################
# fadn nuts2 heatmap

multiplot <- function(..., plotlist=NULL, cols) {
  require(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # Make the panel
  plotCols = cols                          # Number of columns of plots
  plotRows = ceiling(numPlots/plotCols) # Number of rows needed, calculated from # of cols

  # Set up the page
  grid.newpage()
  pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
  vplayout <- function(x, y)
    viewport(layout.pos.row = x, layout.pos.col = y)

  # Make each plot, in the correct location
  for (i in 1:numPlots) {
    curRow = ceiling(i/plotCols)
    curCol = (i-1) %% plotCols + 1
    print(plots[[i]], vp = vplayout(curRow, curCol ))
  }



}
countries <- unique(str_data$info$COUNTRY)

# heatmap function
heatmap.group <- function(group.by) {
  for (country in countries){
  heatmap_data <- str_data$info %>%
    filter(COUNTRY == country) %>%
    count({{group.by}},YEAR) %>%
    arrange(YEAR) %>%
    # pivot_wider(names_from = YEAR,values_from=n) %>%
    mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1)))
  # pivot_longer(c(`2004`:`2018` ), names_to = "YEAR", values_to = "n")

  heatmap_data <- data.frame(lapply(heatmap_data,as.character))

  if (NROW(heatmap_data$NUTS3 %>% unique()) >100 )  {
    text.size = 3
  } else{text.size = 11}


  p <- heatmap_data %>% ggplot(aes(YEAR, {{group.by}}, fill= n)) + geom_tile() +
    theme(legend.position="none") +
    ggtitle(country)+ theme(axis.text.y = element_text(size = text.size))
  #
  # p_name <- country
  #
  # assign(p_name, heatmap_data %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  #          theme(legend.position="none") +
  #          ggtitle(country))
  # ggsave(plot = p ,
  #        filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_rules/",country,".png"),
  #        width = 18, height = 8)


  ggsave(plot = p ,
         filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_plots/",country,".png"),
         width = 18, height = 8)



  }
}

heatmap.group(REGION)
heatmap.group(NUTS1)
heatmap.group(NUTS2)
heatmap.group(NUTS3)

# multiple plots

plots <- list(NED, BEL, BGR, CYP, CZE,DAN,DEU,ELL,ESP,EST,FRA,HUN,IRE,ITA,LTU,LUX,LVA,MLT,OST,POL,POR,ROU,SUO,SVE,SVK,SVN,UKI,HRV)
mulp <- multiplot(NED, BEL, BGR, CYP, CZE,DAN,DEU,ELL,ESP,EST,FRA,HUN,IRE,ITA,LTU,LUX,LVA,MLT,OST,POL,POR,ROU,SUO,SVE,SVK,SVN,UKI,HRV, cols=5)
ggsave(plot = mulp ,
       filename = paste0(CurrentProjectDirectory,"/plots/animal/num_farms/nuts2/nuts2_rules/","all.png"),
       width = 18, height = 8)

#check nuts function###########################################

check.nuts <- function (data, country, year, group.by, export.EXCEL = FALSE){

  nuts2Excel <-  paste0(CurrentProjectDirectory,"/plots/","fadn_",deparse(substitute(group.by)),"_rules/excel/")

  export.name = paste0(country,".",deparse(substitute(group.by)),".",year[1])

  assign(export.name, data %>%
           filter(YEAR %in% year & COUNTRY == country) %>%
           select({{group.by}}, COUNTRY) %>% distinct() %>%  mutate(value = 1) )
  # envir = parent.frame()
  # )
  # print(get(export.name))
  if (deparse(substitute(group.by)) == 'REGION') export.EXCEL= FALSE
Xinxin Yang's avatar
Xinxin Yang committed
  library(xlsx)
Xinxin Yang's avatar
Xinxin Yang committed
  if(export.EXCEL){
    write.xlsx(as.data.frame(get(export.name)),
               paste0(nuts2Excel, deparse(substitute(group.by)),"_", country,year[1],".xlsx"),
               row.names = FALSE)}
Xinxin Yang's avatar
Xinxin Yang committed

  print(paste0(nuts2Excel, deparse(substitute(group.by)),"_", country,year[1],".xlsx"))
Xinxin Yang's avatar
Xinxin Yang committed
  return (get(export.name))

}

# nuts2 ##################################################
# DEU #######################################################################

deu.version07 <- str_data$info %>%
  filter(YEAR == 2007 & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version07"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version07), paste0(nuts2File,"deu.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)

deu.version08_09 <- str_data$info %>%
  filter(YEAR %in% c(2008:2009) & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version08_09"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version08_09), paste0(nuts2File,"deu.version08_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)


# version 2010-2018
deu.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version10_18"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version10_18), paste0(nuts2File,"deu.version10_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version: 2007
#..................................
# DEE1, DEE2, DEE3 merged into DEEO

# version: 2008-2009
#..........................
# DEE2 merged into DEE0

# version 2008-2009 -> 2010-2018
#............................
# DE41, DE41 merged into DE40
#............................
# Leipzig reccalculation by NSI
# DED5 new region
# DED3 boundary shift
#........................
# chemnitz reccalculation by NSI
# DED1 boundary shift
# DED4 new region
#..........................



# summary NUT2 level change .......................
str_data$info %>%
  filter(COUNTRY == "DEU") %>%
  mutate(NUTS2=case_when(
  NUTS2 %in% c("DEE1","DEE2","DEE3") ~ "DEE0",
  NUTS2 %in% c("DE41","DE42") ~ "DE40",
  NUTS2 %in% c("DED1") ~ "DED4",
  NUTS2 %in% c("DED3") ~ "DED5",
  TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")
# CZE ##################################################
# no chnange............................

cze.version07_13 <- str_data$info %>%
  filter(YEAR %in% c(2007:2013) & COUNTRY == "CZE") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version07_13"), .cols = NUTS2)

write.xlsx(as.data.frame(cze.version07_13), paste0(nuts2File,"cze.version07_13.xlsx"), sheetName = "NUTS2", row.names = FALSE)

cze.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "CZE") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version14_18"), .cols = NUTS2)
write.xlsx(as.data.frame(cze.version14_18), paste0(nuts2File,"cze.version14_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# ELL ##################################################

ell.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "ELL") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ell.version07_09), paste0(nuts2File,"ell.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

ell.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "ELL") %>%
  select(NUTS2, COUNTRY) %>% distinct()

#.......................
# version  2003
# new region:  GRZZ
# fadn has no GRZZ!!!!!
#.......................


#  version 07-09 -> version 2010
#........................
# since version 2010
# code change
# GR11 changed into EL11
# GR12 changed into EL12
# GR13 changed into	EL13
# GR14 changed into	EL14
# GR21 changed into EL21
# GR22 changed into	EL22
# GR23 changed into	EL23
# GR24 changed into EL24
# GR25 changed into	EL25
# GR30 changed into	EL30
# GR41 changed into	EL41
# GR42 changed into	EL42
# GR43 changed into	EL43
# GRZZ changed into	ELZZ
#.................
# version 2010 - 2013
# new    old
# EL51 = EL11
# EL52 = EL12
# EL53 = EL13
# EL54 = EL21
# EL61 = EL14
# EL62 = EL22
# EL63 = EL23
# EL64 = EL24
# EL65 = EL25

#
# summary
str_data$info %>%
  filter(COUNTRY == "ELL") %>%
  mutate(NUTS2=case_when(
    NUTS2 %in% c( "GR11","EL11") ~ "EL51" ,
    NUTS2 %in% c("GR12", "EL12") ~ "EL52",
    NUTS2 %in% c("GR13", "EL13") ~ "EL53",
    NUTS2 %in% c("GR14", "EL14") ~ "EL54",
    NUTS2 %in% c("GR21", "EL21") ~ "EL61",
    NUTS2 %in% c("GR22", "EL22") ~"EL62",
    NUTS2 %in% c("GR23", "EL23") ~ "EL63",
    NUTS2 %in% c("GR24", "EL24") ~"EL64",
    NUTS2 %in% c("GR25", "EL25") ~ "EL65",
    NUTS2 == "GR30" ~ "EL30" ,
    NUTS2 == "GR41" ~ "EL41" ,
    NUTS2 == "GR42" ~ "EL42",
    NUTS2 == "GR43" ~ "EL43" ,
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# FRA ###################################################

fra.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "FRA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(fra.version07_16), paste0(nuts2File,"fra.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


fra.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "FRA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(fra.version17_18), paste0(nuts2File,"fra.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version 2013 -> version 2016
Xinxin Yang's avatar
Xinxin Yang committed
# recoded 26 regions and FR10 no changed
Xinxin Yang's avatar
Xinxin Yang committed
# FR24	FRB0
# FR26	FRC1
# FR43	FRC2
# FR25	FRD1
# FR23	FRD2
# FR30	FRE1
# FR22	FRE2
# FR42	FRF1
# FR21	FRF2
# FR41	FRF3
# FR51	FRG0
# FR52	FRH0
# FR61	FRI1
# FR63	FRI2
# FR53	FRI3
# FR81	FRJ1
# FR62	FRJ2
# FR72	FRK1
# FR71	FRK2
# FR82	FRL0
# FR83	FRM0
# FRA1	FRY1
# FRA2	FRY2
# FRA3	FRY3
# FRA4	FRY4
# FRA5	FRY5

# summary
str_data$info %>%
  filter(COUNTRY == "FRA") %>%
  mutate(NUTS2=case_when(
    NUTS2 =="FR24"~	"FRB0",
    NUTS2 =="FR26"~"FRC1",
    NUTS2 =="FR43"~	"FRC2",
    NUTS2 =="FR25"~	"FRD1",
    NUTS2 =="FR23"~	"FRD2",
    NUTS2 =="FR30"~	"FRE1",
    NUTS2 =="FR22"~	"FRE2",
    NUTS2 =="FR42"~	"FRF1",
    NUTS2 =="FR21"~	"FRF2",
    NUTS2 =="FR41"~	"FRF3",
    NUTS2 =="FR51"~	"FRG0",
    NUTS2 =="FR52"~	"FRH0",
    NUTS2 =="FR61"~	"FRI1",
    NUTS2 =="FR63"~	"FRI2",
    NUTS2 =="FR53"~	"FRI3",
    NUTS2 =="FR81"~	"FRJ1",
    NUTS2 =="FR62"~	"FRJ2",
    NUTS2 =="FR72"~	"FRK1",
    NUTS2 =="FR71"~	"FRK2",
    NUTS2 =="FR82"~	"FRL0",
    NUTS2 =="FR83"~	"FRM0",
    NUTS2 =="FRA1"~	"FRY1",
    NUTS2 =="FRA2"~	"FRY2",
    NUTS2 =="FRA3"~	"FRY3",
    NUTS2 =="FRA4"~	"FRY4",
    NUTS2 =="FRA5"~	"FRY5",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# HUN #################################################
#
# since 2016
# HU10 discontinued; and split into new HU11 and HU12
# HU11 = HU101 (NUTS3)
# HU12 = HU102 (NUTS3)

# ????????????????
# fadn data: missing HU11?????
# nuts3 has no HU101 and HU102, but HU10

hun.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "HUN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(hun.version07_16), paste0(nuts2File,"hun.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


hun.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "HUN") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(hun.version17_18), paste0(nuts2File,"hun.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# summary ???????????????????????????????????
str_data$info %>%
  filter(COUNTRY == "HUN") %>%
  mutate(NUTS2=case_when(
    NUTS2 =="HU10"~	"HU12",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# IRE #################################################

ire.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "IRE") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ire.version07_16), paste0(nuts2File,"ire.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


ire.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "IRE") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ire.version17_18), paste0(nuts2File,"ire.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# since 2016 version
#......................
# IE01		discontinued
# IE02		discontinued
# IE04		new region ?????????????????
# IE05	  new region, made from ex-IE023, IE024 and IE025	IE05=IE023+IE024+IE025 (from NUTS3)
# IE06		new region ?????????????????

# summary ???????????????
# NUTS2	IE04	IRE	0.78
# NUTS2	IE05	IRE	0.81
# NUTS2	IE06	IRE	0.42
Xinxin Yang's avatar
Xinxin Yang committed



# ITA ################################
ita.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "ITA") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ita.version07_09), paste0(nuts2File,"ita.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# vrsion 2006 into version 2010
#.....................................
#  recoded
# ITD1	ITH1	label change	ITH1 = ITD1
# ITD2	ITH2	label change	ITH2 = ITD2
# ITD3	ITH3	 recoded	ITH3 = ITD3
# ITD4	ITH4	 recoded	ITH4 = ITD4
# ITE4	ITI4	 recoded	ITI4 = ITE4
# ITE1	ITI1	 recoded	ITI1 = ITE1
# ITE2	ITI2	 recoded	ITI2 = ITE2
#....................................
# Boundary shift
# ITD5	Emilia-Romagna	Boundary shift
# ITH5	Emilia-Romagna	new region	recalculation by NSI
# ITE3	Marche	Boundary shift
# ITI3	Marche	new region	recalculation by NSI
str_data$info %>%
  filter(COUNTRY == "ITA") %>%
  mutate(NUTS2=case_when(
    NUTS2 ==	"ITD1"~ "ITH1",
    NUTS2 == "ITD2"~ "ITH2" ,
    NUTS2 ==  "ITD3"~ "ITH3"	,
    NUTS2 ==	"ITD4"~ "ITH4",
    NUTS2 ==	"ITE4" ~"ITI4",
    NUTS2 ==	"ITI1"~ "ITE1",
    NUTS2 ==	"ITI2" ~"ITE2",
    NUTS2 == "ITD5" ~ "ITH5",
    NUTS2 =="ITE3" ~ "ITI3",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# LTU ########################################

ltu.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "LTU") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ltu.version07_16), paste0(nuts2File,"ltu.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)

ltu.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "LTU") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ltu.version17_18), paste0(nuts2File,"ltu.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2013 -> 2016
#..............................
# LT00 split into new LT01 LT02
# LT01=LT00A
# LT02=LT00-LT00A ??? how

# summary

str_data$info %>%
  filter(COUNTRY == "LTU") %>%
  mutate(NUTS2=case_when(
  NUTS3 == "LT00A"  ~ "LT01",
  NUTS2 == "LT00"  ~ "LT02",
  TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# LVA #######################################


lva.version07_08 <- str_data$info %>%
  filter(YEAR %in% c(2007:2008) & COUNTRY == "LVA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(lva.version07_08), paste0(nuts2File,"lva.version07_08.xlsx"), sheetName = "NUTS2", row.names = FALSE)


lva.version09_18 <- str_data$info %>%
  filter(YEAR %in% c(2009:2018) & COUNTRY == "LVA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(lva.version09_18), paste0(nuts2File,"lva.version09_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# ????????????????
# empty nuts2

# OST ########################################

ost.version07 <- str_data$info %>%
  filter(YEAR %in% c(2007) & COUNTRY == "OST") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ost.version07), paste0(nuts2File,"ost.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)
ost.version08_18 <- str_data$info %>%
  filter(YEAR %in% c(2008:2018) & COUNTRY == "OST") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ost.version08_18), paste0(nuts2File,"ost.version08_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# in 2007: "" item maybe AT13???

# POL ##########################################

pol.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "POL") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value =1)
write.xlsx(as.data.frame(pol.version07_16), paste0(nuts2File,"pol.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)

pol.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "POL") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(pol.version17_18), paste0(nuts2File,"pol.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2013 -> 2016
#.....................
# recoded
# PL11	PL71
# PL33	PL72
# PL31	PL81
# PL32	PL82
# PL34	PL84
#....................
# split
# PL12 split into new PL91, PL92
# PL91=PL127+PL129+PL12A-newPL926
# PL92=PL128+PL12B+PL12C+PL12D+PL12E+new PL926
# summary
# ??? before 2014 can't convert !!!!
str_data$info %>%
  filter(COUNTRY == "POL") %>%
  mutate(NUTS2=case_when(
   NUTS2 ==  "PL11" ~	"PL71",
   NUTS2 ==  "PL33" ~	"PL72",
   NUTS2 ==  "PL31" ~	"PL81",
   NUTS2 ==  "PL32" ~	"PL82",
   NUTS2 ==  "PL34"	~ "PL84",
   (NUTS2 == "PL12" & NUTS3 %in% c("PL127","PL129","PL12A", "PL926")) ~ "PL91",
   (NUTS2 == "PL12" & NUTS3 %in% c("PL128","PL12B","PL12C","PL12D","PL12E", "PL926")) ~ "PL92",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")



# SUO ############################################

suo.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "SUO") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(suo.version07_09), paste0(nuts2File,"suo.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

suo.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "SUO") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(suo.version10_18), paste0(nuts2File,"suo.version10_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
Xinxin Yang's avatar
Xinxin Yang committed
# veriosn 2006 to 2010.......................
Xinxin Yang's avatar
Xinxin Yang committed
# new region FI1D
# FI1D = FI13 + FI1A
#...........................................
# FI18 split into new region FI1B and FI1C
# FI1B + FI1C = FI18
# FI1B + FI1C = FI18

str_data$info %>%
  filter(COUNTRY == "SUO") %>%
  mutate(NUTS2=case_when(
    NUTS2 %in%  c("FI13", "FI1A") ~	"FI1D",
    # NUTS2 ==  "FI18" ~	"FI1B",
    # NUTS2 ==  "FI18" ~	"FI1C",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# SVE ########################################
sve.version07 <- str_data$info %>%
  filter(YEAR %in% c(2007) & COUNTRY == "SVE") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate (value =1)
write.xlsx(as.data.frame(sve.version07), paste0(nuts2File,"sve.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)

sve.version08_18 <- str_data$info %>%
  filter(YEAR %in% c(2008:2018) & COUNTRY == "SVE") %>%
  select(NUTS2, COUNTRY) %>% distinct()  %>% mutate (value =1)
write.xlsx(as.data.frame(sve.version08_18), paste0(nuts2File,"sve.version08_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version 2003 -> 2006
# code change...........
# old   new
# SE01	SE11
# SE02	SE12
# SE09	SE21
# SE04	SE22
# SE0A	SE23
# SE06	SE31
# SE07	SE32
# SE08	SE33

str_data$info %>%
  filter(COUNTRY == "SVE") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "SE01" ~ "SE11",
    NUTS2 == "SE02" ~	"SE12",
    NUTS2 == "SE09" ~	"SE21",
    NUTS2 == "SE04" ~	"SE22",
    NUTS2 == "SE0A" ~	"SE23",
    NUTS2 == "SE06"	~ "SE31",
    NUTS2 == "SE07"	~ "SE32",
    NUTS2 == "SE08" ~	"SE33",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")
# SVN ######################################

svn.version07_08 <- str_data$info %>%
  filter(YEAR %in% c(2007:2008) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version07_08), paste0(nuts2File,"svn.version07_08.xlsx"), sheetName = "NUTS2", row.names = FALSE)

svn.version09_13 <- str_data$info %>%
  filter(YEAR %in% c(2009:2013) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version09_13), paste0(nuts2File,"svn.version09_13.xlsx"), sheetName = "NUTS2", row.names = FALSE)
svn.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version14_18), paste0(nuts2File,"svn.version14_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2003 -> 2006
#............................
# SI00 split into SI01 and SI02
# version 2010 -> 2013
#.............................
# Boundary shift
# SI01 -> SI03 recalculation by NSI
# SI02 -> SI04 recalculation by NSI
#..................................

str_data$info %>%
  filter(COUNTRY == "SVN") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "SI01" ~ "SI03",
    NUTS2 == "SI02" ~	"SI04",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# UKI ##########################################
uki.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(uki.version07_09), paste0(nuts2File,"uki.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

uki.version10_13 <- str_data$info %>%
  filter(YEAR %in% c(2010:2013) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>% mutate(value = 1)

uki.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>% mutate(value = 1)

str_data$info %>%
  filter(YEAR %in% c(2010:2013) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>%  filter (NUTS2 %in% c("UKI1","UKI3","UKI4","UKI2" ))
#......................
# version 2006 -> 2010
# Boundary shift
#.
# UKD2 -> UKD6
# UKD5 -> UKD7

#............................
# version 2010 -> 2013
# UKI1 split into UKI3 + UKI4
# UKI3 + UKI4 = UKI1
#...................................
# UKI2 split into UKI5 + UKI6 + UKI7
# UKI5 + UKI6 + UKI7 = UKI2

#..................................
# version 2013 -> 2016
# UKM7 boundary shift: lost exUKM24
# UKM7=UKM2-UKM24
#...............................................
# UKM3 discontinued; split into new UKM8 and UKM9
#.............................................
# new region
# UKM8=UKM31+UKM34+UKM35+UKM36
# UKM9=UKM24+UKM32+UKM33+UKM37+UKM38


str_data$info %>%
  filter(COUNTRY == "UKI") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "UKD2" ~ "UKD6",
    NUTS2 == "UKD5" ~	"UKD7",
    NUTS3 %in% c("UKM31","UKM34","UKM35","UKM36") ~ "UKM8",
    NUTS3 %in% c("UKM24","UKM32","UKM33","UKM37", "UKM38") ~ "UKM9",
    TRUE ~ NUTS2)) %>%
  count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")







# nuts1 ##################################################
# ell ----
check.nuts(str_data$info, "ELL", c(2007:2009), NUTS1, FALSE)
check.nuts(str_data$info, "ELL", c(2010:2013), NUTS1, FALSE)
check.nuts(str_data$info, "ELL", c(2014:2018), NUTS1, FALSE)
# version 2006 -2010
#...................
# old new recoded
# GR1	EL1
# GR2	EL2
# GR3	EL3
# GR4	EL4
# version 2010-2013
#...........................
# boundary shift| new region
# EL1	          | EL5
# EL2	          | EL6

# fra ----
Xinxin Yang's avatar
Xinxin Yang committed
check.nuts(str_data$info, "FRA", c(2007:2011), NUTS1, export.EXCEL = TRUE)
check.nuts(str_data$info, "FRA", c(2012:2013), NUTS1, export.EXCEL = TRUE)
check.nuts(str_data$info, "FRA", c(2014:2016), NUTS1, export.EXCEL = TRUE)
check.nuts(str_data$info, "FRA", c(2017:2018), NUTS1, export.EXCEL = TRUE)
Xinxin Yang's avatar
Xinxin Yang committed
# version 2010 - 2013
#...................
# boundary shift
#..................
#      new
# FR9  FRA

Xinxin Yang's avatar
Xinxin Yang committed
# version 2013 -> 2016
Xinxin Yang's avatar
Xinxin Yang committed
# discontinued
# FR2
# FR3
# FR4
# FR5
# FR6
# FR8
#...............
# recoded
#...............
# old new
# FRA FRY
# FR7 FRK
#...............
# new region
#...............
# FRB=FR24
# FRC=FR26+FR43
# FRD=FR23+FR25
# FRE=FR22+FR30
# FRF=FR21+FR41+FR42
# FRG=FR51
# FRH=FR52
# FRI=FR53+FR61+FR63
# FRJ=FR62+FR81
# FRL=FR82
# FRM=FR83

# ita ---------
check.nuts(str_data$info, "ITA", c(2007:2009), NUTS1)
check.nuts(str_data$info, "ITA", c(2010:2018), NUTS1)

# version 2006 - 2010
#...............
# boundary shift
#................
# old new
# ITD ITH
# IDE ITI

# pol ---------
check.nuts(str_data$info, "POL", c(2007:2016), NUTS1)
check.nuts(str_data$info, "POL", c(2017:2018), NUTS1)
Xinxin Yang's avatar
Xinxin Yang committed
# 2013- 2016
Xinxin Yang's avatar
Xinxin Yang committed
# PL1, PL3 discontinued
# PL7=PL11+PL33
# PL8=PL3-PL33
# PL9=PL12

# sve -----------

check.nuts(str_data$info, "SVE", c(2007), NUTS1)
check.nuts(str_data$info, "SVE", c(2008:2018), NUTS1)

# version 2003 - 2006
# SE0 split into SE1 SE2 SE3
# NUTSConverter tool fehlt version 2003



# Nuts3 ####################################################

# BEL ####
check.nuts(str_data$info, "BEL", c(2007:2013), NUTS3, TRUE)
check.nuts(str_data$info, "BEL", c(2014:2018), NUTS3, TRUE)
# version 2003 - 2006
# split
# BE333 split into BE335 BE336

# CZE ####
check.nuts(str_data$info, "CZE", c(2007:2013), NUTS3, TRUE)
check.nuts(str_data$info, "CZE", c(2014:2018), NUTS3, TRUE)

# version 2003 - 2006
# CZ061 boundary shift  Vysocina
# CZ063 new region Vysocina
#.................................
# CZ062 boundary shift  Jihomoravsk? kraj
# CZ064 new region Jihomoravsk? kraj

# DEU ????####

deu.check <- check.nuts(str_data$info, "DEU", c(2007), NUTS3, TRUE)
deu.check.2008 <- check.nuts(str_data$info, "DEU", c(2008:2009), NUTS3, TRUE)
deu.check.2010 <- check.nuts(str_data$info, "DEU", c(2010:2013), NUTS3, TRUE)
deu.check.2014 <- check.nuts(str_data$info, "DEU", c(2014:2016), NUTS3, TRUE)
deu.check.2017 <- check.nuts(str_data$info, "DEU", c(2017:2018), NUTS3, TRUE)
  # version 1999-2003 ----
  #  old  new
  # merge .....
  # DE301	DE300 (part)
  # DE302	DE300 (part)
  # ..................
  # recoded
  # DE403	DE411
  # DE405	DE412
  # DE409	DE413
  # DE40A	DE414
  # DE40C	DE415
  # DE40D	DE416
  # DE40F	DE417
  # DE40I	DE418
  # DE401	DE421
  # DE402	DE422
  # DE404	DE423
  # DE406	DE424
  # DE407	DE425
  # DE408	DE426
  # DE40B	DE427
  # DE40E	DE428
  # DE40G	DE429
  # DE40H	DE42A
  # DE6	DE600
  # version 2003-2006 ----
  # old  new change
  # DEE11		Terminated
  # DEE12		Terminated
  # DEE13		Terminated
  # DEE14		Terminated
  # DEE15		Terminated
  # DEE16		Terminated
  # DEE33	DEE07 	Merged
  # DEE36	DEE07 	Merged
  # DEE22	DEE08 	Merged
  # DEE27	DEE08 	Merged
  # DEE23	DEE0A 	Merged
  # DEE26	DEE0A 	Merged
  # DEE24	DEE0B 	Merged
  # DEE25	DEE0B 	Merged
  # DEE32		Terminated
  # DEE34		Terminated
  # DEE35		Terminated
  # DEE38		Terminated
  # DEE39		Terminated
  # DEE3A		Terminated
  # DEE21	DEE02	 recoded
  # DEE31	DEE03	 recoded
  # DEE3B	DEE04	 recoded
  # DEE37	DEE0D	 recoded

  # version 2006-2010 ----
  # DE411	DE403
  # DE412	DE405
  # DE413	DE409
  # DE414	DE40A
  # DE415	DE40C
  # DE416	DE40D
  # DE417	DE40F
  # DE418	DE40I
  # DE421	DE401
  # DE422	DE402
  # DE423	DE404
  # DE424	DE406
  # DE425	DE407
  # DE426	DE408
  # DE427	DE40B
  # DE428	DE40E
  # DE429	DE40G
  # DE42A	DE40H
  # DED11	DED41
  # DED31	DED51
  # DEA21	DEA2D merge
  # DEA25	DEA2D merge
  #       DEA2D DA2D = DEA21 + DEA25
  # DED23	DED2C (part)
  # DED24	DED2C (part)
  # DED2B	DED2C (part)
  #       DED2C DED2C = DED23 + DED24 + DED2B
  # DED22	DED2D (part)
  # DED26	DED2D (part)
  # DED28	DED2D (part)
  #       DED2D DED2D = DED22 + DED26 + DED28
  # DED25	DED2E (part)
  # DED27	DED2E (part)
  #       DED2E DED2E = DED25 + DED27
  # DED29	DED2F (part)
  # DED2A	DED2F (part)
  #       DED2F DED2F = DED29 + DED2A
  # DED14	DED42 (part)
  # DED18	DED42 (part)
  # DED1A	DED42 (part)
  # DED1B	DED42 (part)
  #       DED42 DED42 = DED14 + DED18 + DED1A + DED1B
  # DED16	DED43 (part)
  # DED19	DED43 (part)
  # DED33	DED43 (part)
  #       DED43 DED43 = DED16 + DED19 + DED33
  # DED12	DED44 (part)
  # DED17	DED44 (part)
  #       DED44 DED44 = DED12 + DED17
  # DED13	DED45 (part)
  # DED15	DED45 (part)
  # DED1C	DED45 (part)
  # DED45 DED45 = DED13 + DED15 + DED1C
  # DED34	DED52 (part)
  # DED35	DED52 (part)
  #       DED52 DED52 = DED34 + DED35
  # DED32	DED53 (part)
  # DED36	DED53 (part)
  #       DED53 DED53 = DED32 + DED36

  # version 2010-2013
  # ...........
  # DE801      merge
  # DE808	     split
  # DE80B	     merge
  # DE80C      merge
  # DE80F      merge
  # DE80I	     merge
  # DE802	     merge
  #       DE80J DE80J=DE80C+DE80B+DE802+parts of DE808
  #       DE80N DE80N=DE801+DE80F+DE80I+parts of DE808
  # DE807	DE80K (part)
  # DE809	DE80K (part)
  #       DE80K DE80K = DE807 + DE809
  # DE805	DE80L (part)
  # DE80D	DE80L (part)
  # DE80H	DE80L (part)
  #       DE80L DE80L = DE805 + DE80D + DE80H
  # DE806	DE80M (part)
  # DE80E	DE80M (part)
  #       DE80M DE80M = DE806 + DE80E
  # DE80A	DE80O (part)
  # DE80G	DE80O (part)
  #       DE80O DE80O = DE80A + DE80G
  # version 2013-2016
  # DE915	discontinued; merged with ex-DE919
  # DE919	discontinued; merged with ex-DE915
  #       DE91C DE91C=DE915+DE919
  # DEB16	DEB1C boundary shift
  # DEB19	DEB1D boundary shift
  # version 2016-2021 no change ----
  # no change


# checked ----
checked <- c("DE411","DE403","DEB1D", "DE600","DEE07")
string <- deu_nuts3 %>% filter(version == "2010To2013") %>% select(NUTS3) %>% c()
deu.check %>% filter(NUTS3 %in% string$NUTS3)
deu.check.2008 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2010 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2014 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2017 %>% filter(NUTS3 %in% string$NUTS3 )
# deu_changed = read.table(text = "changed
#       DEE11
#       DEE12
#       DEE13
#       DEE14
#       DEE15
#       DEE16
#       DEE33
#       DEE36
#       DEE22
#       DEE27
#       DEE23
#       DEE26
#       DEE24
#       DEE25
#       DEE32
#       DEE34
#       DEE35