GitLab at IIASA

check_rules.R 48.7 KiB
Newer Older
Xinxin Yang's avatar
Xinxin Yang committed
####################################
# nuts rules


# fadn nuts2 heatmap############################
# fadn nuts2 heatmap

multiplot <- function(..., plotlist=NULL, cols) {
  require(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # Make the panel
  plotCols = cols                          # Number of columns of plots
  plotRows = ceiling(numPlots/plotCols) # Number of rows needed, calculated from # of cols

  # Set up the page
  grid.newpage()
  pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
  vplayout <- function(x, y)
    viewport(layout.pos.row = x, layout.pos.col = y)

  # Make each plot, in the correct location
  for (i in 1:numPlots) {
    curRow = ceiling(i/plotCols)
    curCol = (i-1) %% plotCols + 1
    print(plots[[i]], vp = vplayout(curRow, curCol ))
  }



}
countries <- unique(str_data$info$COUNTRY)

# heatmap function
heatmap.group <- function(group.by) {
  for (country in countries){
  heatmap_data <- str_data$info %>%
    filter(COUNTRY == country) %>%
    count({{group.by}},YEAR) %>%
    arrange(YEAR) %>%
    # pivot_wider(names_from = YEAR,values_from=n) %>%
    mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1)))
  # pivot_longer(c(`2004`:`2018` ), names_to = "YEAR", values_to = "n")

  heatmap_data <- data.frame(lapply(heatmap_data,as.character))

  if (NROW(heatmap_data$NUTS3 %>% unique()) >100 )  {
    text.size = 3
  } else{text.size = 11}


  p <- heatmap_data %>% ggplot(aes(YEAR, {{group.by}}, fill= n)) + geom_tile() +
    theme(legend.position="none") +
    ggtitle(country)+ theme(axis.text.y = element_text(size = text.size))
  #
  # p_name <- country
  #
  # assign(p_name, heatmap_data %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  #          theme(legend.position="none") +
  #          ggtitle(country))
  # ggsave(plot = p ,
  #        filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_rules/",country,".png"),
  #        width = 18, height = 8)


  ggsave(plot = p ,
         filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_plots/",country,".png"),
         width = 18, height = 8)



  }
}

heatmap.group(REGION)
heatmap.group(NUTS1)
heatmap.group(NUTS2)
heatmap.group(NUTS3)

# multiple plots

plots <- list(NED, BEL, BGR, CYP, CZE,DAN,DEU,ELL,ESP,EST,FRA,HUN,IRE,ITA,LTU,LUX,LVA,MLT,OST,POL,POR,ROU,SUO,SVE,SVK,SVN,UKI,HRV)
mulp <- multiplot(NED, BEL, BGR, CYP, CZE,DAN,DEU,ELL,ESP,EST,FRA,HUN,IRE,ITA,LTU,LUX,LVA,MLT,OST,POL,POR,ROU,SUO,SVE,SVK,SVN,UKI,HRV, cols=5)
ggsave(plot = mulp ,
       filename = paste0(CurrentProjectDirectory,"/plots/animal/num_farms/nuts2/nuts2_rules/","all.png"),
       width = 18, height = 8)

#check nuts function###########################################

check.nuts <- function (data, country, year, group.by, export.EXCEL = FALSE){

  nuts2Excel <-  paste0(CurrentProjectDirectory,"/plots/","fadn_",deparse(substitute(group.by)),"_rules/excel/")

  export.name = paste0(country,".",deparse(substitute(group.by)),".",year[1])

  assign(export.name, data %>%
           filter(YEAR %in% year & COUNTRY == country) %>%
           select({{group.by}}, COUNTRY) %>% distinct() %>%  mutate(value = 1) )
  # envir = parent.frame()
  # )
  # print(get(export.name))
  if (deparse(substitute(group.by)) == 'REGION') export.EXCEL= FALSE
  if(export.EXCEL){
    write.xlsx(as.data.frame(get(export.name)),
               paste0(nuts2Excel, deparse(substitute(group.by)),"_", country,year[1],".xlsx"),
               row.names = FALSE)}
  return (get(export.name))

}

# nuts2 ##################################################
# DEU #######################################################################

deu.version07 <- str_data$info %>%
  filter(YEAR == 2007 & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version07"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version07), paste0(nuts2File,"deu.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)

deu.version08_09 <- str_data$info %>%
  filter(YEAR %in% c(2008:2009) & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version08_09"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version08_09), paste0(nuts2File,"deu.version08_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)


# version 2010-2018
deu.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "DEU") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version10_18"), .cols = NUTS2)
write.xlsx(as.data.frame(deu.version10_18), paste0(nuts2File,"deu.version10_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version: 2007
#..................................
# DEE1, DEE2, DEE3 merged into DEEO

# version: 2008-2009
#..........................
# DEE2 merged into DEE0

# version 2008-2009 -> 2010-2018
#............................
# DE41, DE41 merged into DE40
#............................
# Leipzig reccalculation by NSI
# DED5 new region
# DED3 boundary shift
#........................
# chemnitz reccalculation by NSI
# DED1 boundary shift
# DED4 new region
#..........................



# summary NUT2 level change .......................
str_data$info %>%
  filter(COUNTRY == "DEU") %>%
  mutate(NUTS2=case_when(
  NUTS2 %in% c("DEE1","DEE2","DEE3") ~ "DEE0",
  NUTS2 %in% c("DE41","DE42") ~ "DE40",
  NUTS2 %in% c("DED1") ~ "DED4",
  NUTS2 %in% c("DED3") ~ "DED5",
  TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")
# CZE ##################################################
# no chnange............................

cze.version07_13 <- str_data$info %>%
  filter(YEAR %in% c(2007:2013) & COUNTRY == "CZE") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version07_13"), .cols = NUTS2)

write.xlsx(as.data.frame(cze.version07_13), paste0(nuts2File,"cze.version07_13.xlsx"), sheetName = "NUTS2", row.names = FALSE)

cze.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "CZE") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>%
  rename_with(.fn =  ~paste0(., "_version14_18"), .cols = NUTS2)
write.xlsx(as.data.frame(cze.version14_18), paste0(nuts2File,"cze.version14_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# ELL ##################################################

ell.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "ELL") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ell.version07_09), paste0(nuts2File,"ell.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

ell.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "ELL") %>%
  select(NUTS2, COUNTRY) %>% distinct()

#.......................
# version  2003
# new region:  GRZZ
# fadn has no GRZZ!!!!!
#.......................


#  version 07-09 -> version 2010
#........................
# since version 2010
# code change
# GR11 changed into EL11
# GR12 changed into EL12
# GR13 changed into	EL13
# GR14 changed into	EL14
# GR21 changed into EL21
# GR22 changed into	EL22
# GR23 changed into	EL23
# GR24 changed into EL24
# GR25 changed into	EL25
# GR30 changed into	EL30
# GR41 changed into	EL41
# GR42 changed into	EL42
# GR43 changed into	EL43
# GRZZ changed into	ELZZ
#.................
# version 2010 - 2013
# new    old
# EL51 = EL11
# EL52 = EL12
# EL53 = EL13
# EL54 = EL21
# EL61 = EL14
# EL62 = EL22
# EL63 = EL23
# EL64 = EL24
# EL65 = EL25

#
# summary
str_data$info %>%
  filter(COUNTRY == "ELL") %>%
  mutate(NUTS2=case_when(
    NUTS2 %in% c( "GR11","EL11") ~ "EL51" ,
    NUTS2 %in% c("GR12", "EL12") ~ "EL52",
    NUTS2 %in% c("GR13", "EL13") ~ "EL53",
    NUTS2 %in% c("GR14", "EL14") ~ "EL54",
    NUTS2 %in% c("GR21", "EL21") ~ "EL61",
    NUTS2 %in% c("GR22", "EL22") ~"EL62",
    NUTS2 %in% c("GR23", "EL23") ~ "EL63",
    NUTS2 %in% c("GR24", "EL24") ~"EL64",
    NUTS2 %in% c("GR25", "EL25") ~ "EL65",
    NUTS2 == "GR30" ~ "EL30" ,
    NUTS2 == "GR41" ~ "EL41" ,
    NUTS2 == "GR42" ~ "EL42",
    NUTS2 == "GR43" ~ "EL43" ,
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# FRA ###################################################

fra.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "FRA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(fra.version07_16), paste0(nuts2File,"fra.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


fra.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "FRA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(fra.version17_18), paste0(nuts2File,"fra.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version 2013 -> version 2016
# recoded 26 and FR10 no changed
# FR24	FRB0
# FR26	FRC1
# FR43	FRC2
# FR25	FRD1
# FR23	FRD2
# FR30	FRE1
# FR22	FRE2
# FR42	FRF1
# FR21	FRF2
# FR41	FRF3
# FR51	FRG0
# FR52	FRH0
# FR61	FRI1
# FR63	FRI2
# FR53	FRI3
# FR81	FRJ1
# FR62	FRJ2
# FR72	FRK1
# FR71	FRK2
# FR82	FRL0
# FR83	FRM0
# FRA1	FRY1
# FRA2	FRY2
# FRA3	FRY3
# FRA4	FRY4
# FRA5	FRY5

# summary
str_data$info %>%
  filter(COUNTRY == "FRA") %>%
  mutate(NUTS2=case_when(
    NUTS2 =="FR24"~	"FRB0",
    NUTS2 =="FR26"~"FRC1",
    NUTS2 =="FR43"~	"FRC2",
    NUTS2 =="FR25"~	"FRD1",
    NUTS2 =="FR23"~	"FRD2",
    NUTS2 =="FR30"~	"FRE1",
    NUTS2 =="FR22"~	"FRE2",
    NUTS2 =="FR42"~	"FRF1",
    NUTS2 =="FR21"~	"FRF2",
    NUTS2 =="FR41"~	"FRF3",
    NUTS2 =="FR51"~	"FRG0",
    NUTS2 =="FR52"~	"FRH0",
    NUTS2 =="FR61"~	"FRI1",
    NUTS2 =="FR63"~	"FRI2",
    NUTS2 =="FR53"~	"FRI3",
    NUTS2 =="FR81"~	"FRJ1",
    NUTS2 =="FR62"~	"FRJ2",
    NUTS2 =="FR72"~	"FRK1",
    NUTS2 =="FR71"~	"FRK2",
    NUTS2 =="FR82"~	"FRL0",
    NUTS2 =="FR83"~	"FRM0",
    NUTS2 =="FRA1"~	"FRY1",
    NUTS2 =="FRA2"~	"FRY2",
    NUTS2 =="FRA3"~	"FRY3",
    NUTS2 =="FRA4"~	"FRY4",
    NUTS2 =="FRA5"~	"FRY5",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# HUN #################################################
#
# since 2016
# HU10 discontinued; and split into new HU11 and HU12
# HU11 = HU101 (NUTS3)
# HU12 = HU102 (NUTS3)

# ????????????????
# fadn data: missing HU11?????
# nuts3 has no HU101 and HU102, but HU10

hun.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "HUN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(hun.version07_16), paste0(nuts2File,"hun.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


hun.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "HUN") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(hun.version17_18), paste0(nuts2File,"hun.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# summary ???????????????????????????????????
str_data$info %>%
  filter(COUNTRY == "HUN") %>%
  mutate(NUTS2=case_when(
    NUTS2 =="HU10"~	"HU12",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# IRE #################################################

ire.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "IRE") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ire.version07_16), paste0(nuts2File,"ire.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)


ire.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "IRE") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ire.version17_18), paste0(nuts2File,"ire.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# since 2016 version
#......................
# IE01		discontinued
# IE02		discontinued
# IE04		new region ?????????????????
# IE05	  new region, made from ex-IE023, IE024 and IE025	IE05=IE023+IE024+IE025 (from NUTS3)
# IE06		new region ?????????????????

# summary ???????????????
# NUTS2	IE04	IRE	0.78
# NUTS2	IE05	IRE	0.81
# NUTS2	IE06	IRE	0.42
Xinxin Yang's avatar
Xinxin Yang committed
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000



# ITA ################################
ita.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "ITA") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ita.version07_09), paste0(nuts2File,"ita.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# vrsion 2006 into version 2010
#.....................................
#  recoded
# ITD1	ITH1	label change	ITH1 = ITD1
# ITD2	ITH2	label change	ITH2 = ITD2
# ITD3	ITH3	 recoded	ITH3 = ITD3
# ITD4	ITH4	 recoded	ITH4 = ITD4
# ITE4	ITI4	 recoded	ITI4 = ITE4
# ITE1	ITI1	 recoded	ITI1 = ITE1
# ITE2	ITI2	 recoded	ITI2 = ITE2
#....................................
# Boundary shift
# ITD5	Emilia-Romagna	Boundary shift
# ITH5	Emilia-Romagna	new region	recalculation by NSI
# ITE3	Marche	Boundary shift
# ITI3	Marche	new region	recalculation by NSI
str_data$info %>%
  filter(COUNTRY == "ITA") %>%
  mutate(NUTS2=case_when(
    NUTS2 ==	"ITD1"~ "ITH1",
    NUTS2 == "ITD2"~ "ITH2" ,
    NUTS2 ==  "ITD3"~ "ITH3"	,
    NUTS2 ==	"ITD4"~ "ITH4",
    NUTS2 ==	"ITE4" ~"ITI4",
    NUTS2 ==	"ITI1"~ "ITE1",
    NUTS2 ==	"ITI2" ~"ITE2",
    NUTS2 == "ITD5" ~ "ITH5",
    NUTS2 =="ITE3" ~ "ITI3",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")

# LTU ########################################

ltu.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "LTU") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ltu.version07_16), paste0(nuts2File,"ltu.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)

ltu.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "LTU") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ltu.version17_18), paste0(nuts2File,"ltu.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2013 -> 2016
#..............................
# LT00 split into new LT01 LT02
# LT01=LT00A
# LT02=LT00-LT00A ??? how

# summary

str_data$info %>%
  filter(COUNTRY == "LTU") %>%
  mutate(NUTS2=case_when(
  NUTS3 == "LT00A"  ~ "LT01",
  NUTS2 == "LT00"  ~ "LT02",
  TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# LVA #######################################


lva.version07_08 <- str_data$info %>%
  filter(YEAR %in% c(2007:2008) & COUNTRY == "LVA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(lva.version07_08), paste0(nuts2File,"lva.version07_08.xlsx"), sheetName = "NUTS2", row.names = FALSE)


lva.version09_18 <- str_data$info %>%
  filter(YEAR %in% c(2009:2018) & COUNTRY == "LVA") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(lva.version09_18), paste0(nuts2File,"lva.version09_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# ????????????????
# empty nuts2

# OST ########################################

ost.version07 <- str_data$info %>%
  filter(YEAR %in% c(2007) & COUNTRY == "OST") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(ost.version07), paste0(nuts2File,"ost.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)
ost.version08_18 <- str_data$info %>%
  filter(YEAR %in% c(2008:2018) & COUNTRY == "OST") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(ost.version08_18), paste0(nuts2File,"ost.version08_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# in 2007: "" item maybe AT13???

# POL ##########################################

pol.version07_16 <- str_data$info %>%
  filter(YEAR %in% c(2007:2016) & COUNTRY == "POL") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value =1)
write.xlsx(as.data.frame(pol.version07_16), paste0(nuts2File,"pol.version07_16.xlsx"), sheetName = "NUTS2", row.names = FALSE)

pol.version17_18 <- str_data$info %>%
  filter(YEAR %in% c(2017:2018) & COUNTRY == "POL") %>%
  select(NUTS2, COUNTRY) %>% distinct()
write.xlsx(as.data.frame(pol.version17_18), paste0(nuts2File,"pol.version17_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2013 -> 2016
#.....................
# recoded
# PL11	PL71
# PL33	PL72
# PL31	PL81
# PL32	PL82
# PL34	PL84
#....................
# split
# PL12 split into new PL91, PL92
# PL91=PL127+PL129+PL12A-newPL926
# PL92=PL128+PL12B+PL12C+PL12D+PL12E+new PL926
# summary
# ??? before 2014 can't convert !!!!
str_data$info %>%
  filter(COUNTRY == "POL") %>%
  mutate(NUTS2=case_when(
   NUTS2 ==  "PL11" ~	"PL71",
   NUTS2 ==  "PL33" ~	"PL72",
   NUTS2 ==  "PL31" ~	"PL81",
   NUTS2 ==  "PL32" ~	"PL82",
   NUTS2 ==  "PL34"	~ "PL84",
   (NUTS2 == "PL12" & NUTS3 %in% c("PL127","PL129","PL12A", "PL926")) ~ "PL91",
   (NUTS2 == "PL12" & NUTS3 %in% c("PL128","PL12B","PL12C","PL12D","PL12E", "PL926")) ~ "PL92",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")



# SUO ############################################

suo.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "SUO") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(suo.version07_09), paste0(nuts2File,"suo.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

suo.version10_18 <- str_data$info %>%
  filter(YEAR %in% c(2010:2018) & COUNTRY == "SUO") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(suo.version10_18), paste0(nuts2File,"suo.version10_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
#.......................
# new region FI1D
# FI1D = FI13 + FI1A
#...........................................
# FI18 split into new region FI1B and FI1C
# FI1B + FI1C = FI18
# FI1B + FI1C = FI18

str_data$info %>%
  filter(COUNTRY == "SUO") %>%
  mutate(NUTS2=case_when(
    NUTS2 %in%  c("FI13", "FI1A") ~	"FI1D",
    # NUTS2 ==  "FI18" ~	"FI1B",
    # NUTS2 ==  "FI18" ~	"FI1C",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# SVE ########################################
sve.version07 <- str_data$info %>%
  filter(YEAR %in% c(2007) & COUNTRY == "SVE") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate (value =1)
write.xlsx(as.data.frame(sve.version07), paste0(nuts2File,"sve.version07.xlsx"), sheetName = "NUTS2", row.names = FALSE)

sve.version08_18 <- str_data$info %>%
  filter(YEAR %in% c(2008:2018) & COUNTRY == "SVE") %>%
  select(NUTS2, COUNTRY) %>% distinct()  %>% mutate (value =1)
write.xlsx(as.data.frame(sve.version08_18), paste0(nuts2File,"sve.version08_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)

# version 2003 -> 2006
# code change...........
# old   new
# SE01	SE11
# SE02	SE12
# SE09	SE21
# SE04	SE22
# SE0A	SE23
# SE06	SE31
# SE07	SE32
# SE08	SE33

str_data$info %>%
  filter(COUNTRY == "SVE") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "SE01" ~ "SE11",
    NUTS2 == "SE02" ~	"SE12",
    NUTS2 == "SE09" ~	"SE21",
    NUTS2 == "SE04" ~	"SE22",
    NUTS2 == "SE0A" ~	"SE23",
    NUTS2 == "SE06"	~ "SE31",
    NUTS2 == "SE07"	~ "SE32",
    NUTS2 == "SE08" ~	"SE33",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")
# SVN ######################################

svn.version07_08 <- str_data$info %>%
  filter(YEAR %in% c(2007:2008) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version07_08), paste0(nuts2File,"svn.version07_08.xlsx"), sheetName = "NUTS2", row.names = FALSE)

svn.version09_13 <- str_data$info %>%
  filter(YEAR %in% c(2009:2013) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version09_13), paste0(nuts2File,"svn.version09_13.xlsx"), sheetName = "NUTS2", row.names = FALSE)
svn.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "SVN") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(svn.version14_18), paste0(nuts2File,"svn.version14_18.xlsx"), sheetName = "NUTS2", row.names = FALSE)
# version 2003 -> 2006
#............................
# SI00 split into SI01 and SI02
# version 2010 -> 2013
#.............................
# Boundary shift
# SI01 -> SI03 recalculation by NSI
# SI02 -> SI04 recalculation by NSI
#..................................

str_data$info %>%
  filter(COUNTRY == "SVN") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "SI01" ~ "SI03",
    NUTS2 == "SI02" ~	"SI04",
    TRUE ~ NUTS2))  %>% count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")


# UKI ##########################################
uki.version07_09 <- str_data$info %>%
  filter(YEAR %in% c(2007:2009) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>% mutate(value = 1)
write.xlsx(as.data.frame(uki.version07_09), paste0(nuts2File,"uki.version07_09.xlsx"), sheetName = "NUTS2", row.names = FALSE)

uki.version10_13 <- str_data$info %>%
  filter(YEAR %in% c(2010:2013) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>% mutate(value = 1)

uki.version14_18 <- str_data$info %>%
  filter(YEAR %in% c(2014:2018) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct()%>% mutate(value = 1)

str_data$info %>%
  filter(YEAR %in% c(2010:2013) & COUNTRY == "UKI") %>%
  select(NUTS2, COUNTRY) %>% distinct() %>%  filter (NUTS2 %in% c("UKI1","UKI3","UKI4","UKI2" ))
#......................
# version 2006 -> 2010
# Boundary shift
#.
# UKD2 -> UKD6
# UKD5 -> UKD7

#............................
# version 2010 -> 2013
# UKI1 split into UKI3 + UKI4
# UKI3 + UKI4 = UKI1
#...................................
# UKI2 split into UKI5 + UKI6 + UKI7
# UKI5 + UKI6 + UKI7 = UKI2

#..................................
# version 2013 -> 2016
# UKM7 boundary shift: lost exUKM24
# UKM7=UKM2-UKM24
#...............................................
# UKM3 discontinued; split into new UKM8 and UKM9
#.............................................
# new region
# UKM8=UKM31+UKM34+UKM35+UKM36
# UKM9=UKM24+UKM32+UKM33+UKM37+UKM38


str_data$info %>%
  filter(COUNTRY == "UKI") %>%
  mutate(NUTS2=case_when(
    NUTS2 == "UKD2" ~ "UKD6",
    NUTS2 == "UKD5" ~	"UKD7",
    NUTS3 %in% c("UKM31","UKM34","UKM35","UKM36") ~ "UKM8",
    NUTS3 %in% c("UKM24","UKM32","UKM33","UKM37", "UKM38") ~ "UKM9",
    TRUE ~ NUTS2)) %>%
  count(NUTS2,YEAR) %>%
  arrange(YEAR) %>%
  mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1))) %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
  theme(legend.position="none")







# nuts1 ##################################################
# ell ----
check.nuts(str_data$info, "ELL", c(2007:2009), NUTS1, FALSE)
check.nuts(str_data$info, "ELL", c(2010:2013), NUTS1, FALSE)
check.nuts(str_data$info, "ELL", c(2014:2018), NUTS1, FALSE)
# version 2006 -2010
#...................
# old new recoded
# GR1	EL1
# GR2	EL2
# GR3	EL3
# GR4	EL4
# version 2010-2013
#...........................
# boundary shift| new region
# EL1	          | EL5
# EL2	          | EL6

# fra ----
check.nuts(str_data$info, "FRA", c(2007:2011), NUTS1)
check.nuts(str_data$info, "FRA", c(2012:2013), NUTS1)
check.nuts(str_data$info, "FRA", c(2014:2016), NUTS1)
check.nuts(str_data$info, "FRA", c(2017:2018), NUTS1)
# version 2010 - 2013
#...................
# boundary shift
#..................
#      new
# FR9  FRA

# version 2013 -> 2018
# discontinued
# FR2
# FR3
# FR4
# FR5
# FR6
# FR8
#...............
# recoded
#...............
# old new
# FRA FRY
# FR7 FRK
#...............
# new region
#...............
# FRB=FR24
# FRC=FR26+FR43
# FRD=FR23+FR25
# FRE=FR22+FR30
# FRF=FR21+FR41+FR42
# FRG=FR51
# FRH=FR52
# FRI=FR53+FR61+FR63
# FRJ=FR62+FR81
# FRL=FR82
# FRM=FR83

# ita ---------
check.nuts(str_data$info, "ITA", c(2007:2009), NUTS1)
check.nuts(str_data$info, "ITA", c(2010:2018), NUTS1)

# version 2006 - 2010
#...............
# boundary shift
#................
# old new
# ITD ITH
# IDE ITI

# pol ---------
check.nuts(str_data$info, "POL", c(2007:2016), NUTS1)
check.nuts(str_data$info, "POL", c(2017:2018), NUTS1)
# PL1, PL3 discontinued
# PL7=PL11+PL33
# PL8=PL3-PL33
# PL9=PL12

# sve -----------

check.nuts(str_data$info, "SVE", c(2007), NUTS1)
check.nuts(str_data$info, "SVE", c(2008:2018), NUTS1)

# version 2003 - 2006
# SE0 split into SE1 SE2 SE3
# NUTSConverter tool fehlt version 2003



# Nuts3 ####################################################

# BEL ####
check.nuts(str_data$info, "BEL", c(2007:2013), NUTS3, TRUE)
check.nuts(str_data$info, "BEL", c(2014:2018), NUTS3, TRUE)
# version 2003 - 2006
# split
# BE333 split into BE335 BE336

# CZE ####
check.nuts(str_data$info, "CZE", c(2007:2013), NUTS3, TRUE)
check.nuts(str_data$info, "CZE", c(2014:2018), NUTS3, TRUE)

# version 2003 - 2006
# CZ061 boundary shift  Vysocina
# CZ063 new region Vysocina
#.................................
# CZ062 boundary shift  Jihomoravsk? kraj
# CZ064 new region Jihomoravsk? kraj

# DEU ????####

deu.check <- check.nuts(str_data$info, "DEU", c(2007), NUTS3, TRUE)
deu.check.2008 <- check.nuts(str_data$info, "DEU", c(2008:2009), NUTS3, TRUE)
deu.check.2010 <- check.nuts(str_data$info, "DEU", c(2010:2013), NUTS3, TRUE)
deu.check.2014 <- check.nuts(str_data$info, "DEU", c(2014:2016), NUTS3, TRUE)
deu.check.2017 <- check.nuts(str_data$info, "DEU", c(2017:2018), NUTS3, TRUE)
  # version 1999-2003 ----
  #  old  new
  # merge .....
  # DE301	DE300 (part)
  # DE302	DE300 (part)
  # ..................
  # recoded
  # DE403	DE411
  # DE405	DE412
  # DE409	DE413
  # DE40A	DE414
  # DE40C	DE415
  # DE40D	DE416
  # DE40F	DE417
  # DE40I	DE418
  # DE401	DE421
  # DE402	DE422
  # DE404	DE423
  # DE406	DE424
  # DE407	DE425
  # DE408	DE426
  # DE40B	DE427
  # DE40E	DE428
  # DE40G	DE429
  # DE40H	DE42A
  # DE6	DE600
  # version 2003-2006 ----
  # old  new change
  # DEE11		Terminated
  # DEE12		Terminated
  # DEE13		Terminated
  # DEE14		Terminated
  # DEE15		Terminated
  # DEE16		Terminated
  # DEE33	DEE07 	Merged
  # DEE36	DEE07 	Merged
  # DEE22	DEE08 	Merged
  # DEE27	DEE08 	Merged
  # DEE23	DEE0A 	Merged
  # DEE26	DEE0A 	Merged
  # DEE24	DEE0B 	Merged
  # DEE25	DEE0B 	Merged
  # DEE32		Terminated
  # DEE34		Terminated
  # DEE35		Terminated
  # DEE38		Terminated
  # DEE39		Terminated
  # DEE3A		Terminated
  # DEE21	DEE02	 recoded
  # DEE31	DEE03	 recoded
  # DEE3B	DEE04	 recoded
  # DEE37	DEE0D	 recoded

  # version 2006-2010 ----
  # DE411	DE403
  # DE412	DE405
  # DE413	DE409
  # DE414	DE40A
  # DE415	DE40C
  # DE416	DE40D
  # DE417	DE40F
  # DE418	DE40I
  # DE421	DE401
  # DE422	DE402
  # DE423	DE404
  # DE424	DE406
  # DE425	DE407
  # DE426	DE408
  # DE427	DE40B
  # DE428	DE40E
  # DE429	DE40G
  # DE42A	DE40H
  # DED11	DED41
  # DED31	DED51
  # DEA21	DEA2D merge
  # DEA25	DEA2D merge
  #       DEA2D DA2D = DEA21 + DEA25
  # DED23	DED2C (part)
  # DED24	DED2C (part)
  # DED2B	DED2C (part)
  #       DED2C DED2C = DED23 + DED24 + DED2B
  # DED22	DED2D (part)
  # DED26	DED2D (part)
  # DED28	DED2D (part)
  #       DED2D DED2D = DED22 + DED26 + DED28
  # DED25	DED2E (part)
  # DED27	DED2E (part)
  #       DED2E DED2E = DED25 + DED27
  # DED29	DED2F (part)
  # DED2A	DED2F (part)
  #       DED2F DED2F = DED29 + DED2A
  # DED14	DED42 (part)
  # DED18	DED42 (part)
  # DED1A	DED42 (part)
  # DED1B	DED42 (part)
  #       DED42 DED42 = DED14 + DED18 + DED1A + DED1B
  # DED16	DED43 (part)
  # DED19	DED43 (part)
  # DED33	DED43 (part)
  #       DED43 DED43 = DED16 + DED19 + DED33
  # DED12	DED44 (part)
  # DED17	DED44 (part)
  #       DED44 DED44 = DED12 + DED17
  # DED13	DED45 (part)
  # DED15	DED45 (part)
  # DED1C	DED45 (part)
  # DED45 DED45 = DED13 + DED15 + DED1C
  # DED34	DED52 (part)
  # DED35	DED52 (part)
  #       DED52 DED52 = DED34 + DED35
  # DED32	DED53 (part)
  # DED36	DED53 (part)
  #       DED53 DED53 = DED32 + DED36

  # version 2010-2013
  # ...........
  # DE801      merge
  # DE808	     split
  # DE80B	     merge
  # DE80C      merge
  # DE80F      merge
  # DE80I	     merge
  # DE802	     merge
  #       DE80J DE80J=DE80C+DE80B+DE802+parts of DE808
  #       DE80N DE80N=DE801+DE80F+DE80I+parts of DE808
  # DE807	DE80K (part)
  # DE809	DE80K (part)
  #       DE80K DE80K = DE807 + DE809
  # DE805	DE80L (part)
  # DE80D	DE80L (part)
  # DE80H	DE80L (part)
  #       DE80L DE80L = DE805 + DE80D + DE80H
  # DE806	DE80M (part)
  # DE80E	DE80M (part)
  #       DE80M DE80M = DE806 + DE80E
  # DE80A	DE80O (part)
  # DE80G	DE80O (part)
  #       DE80O DE80O = DE80A + DE80G
  # version 2013-2016
  # DE915	discontinued; merged with ex-DE919
  # DE919	discontinued; merged with ex-DE915
  #       DE91C DE91C=DE915+DE919
  # DEB16	DEB1C boundary shift
  # DEB19	DEB1D boundary shift
  # version 2016-2021 no change ----
  # no change


# checked ----
checked <- c("DE411","DE403","DEB1D", "DE600","DEE07")
string <- deu_nuts3 %>% filter(version == "2010To2013") %>% select(NUTS3) %>% c()
deu.check %>% filter(NUTS3 %in% string$NUTS3)
deu.check.2008 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2010 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2014 %>% filter(NUTS3 %in% string$NUTS3 )
deu.check.2017 %>% filter(NUTS3 %in% string$NUTS3 )
# deu_changed = read.table(text = "changed
#       DEE11
#       DEE12
#       DEE13
#       DEE14
#       DEE15
#       DEE16
#       DEE33
#       DEE36
#       DEE22
#       DEE27
#       DEE23
#       DEE26
#       DEE24
#       DEE25
#       DEE32
#       DEE34
#       DEE35
#       DEE38
#       DEE39
#       DEE3A
#       DEE21