diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 6c40b3f..9d22262 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,9 +1,4 @@ -D:/Seafile/WW_research/01_Research/R_package/regioncode/.Rbuildignore="63CD865F" -D:/Seafile/WW_research/01_Research/R_package/regioncode/DESCRIPTION="25AF9A25" -D:/Seafile/WW_research/01_Research/R_package/regioncode/NAMESPACE="96D5EBCA" -D:/Seafile/WW_research/01_Research/R_package/regioncode/NEWS.md="00FBA817" -D:/Seafile/WW_research/01_Research/R_package/regioncode/R/globals.R="56D29EEB" -D:/Seafile/WW_research/01_Research/R_package/regioncode/R/regioncode.R="78A4CF23" -D:/Seafile/WW_research/01_Research/R_package/regioncode/dev/citylevel2021.R="7C97FCDC" -D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/regioncode-vignette.Rmd="48AAAEC1" -D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/s_regioncode.bib="8CF3C239" +/Users/xinyiye/Documents/GitHub/regioncode/R/regioncode.R="DC074D86" +/Users/xinyiye/Documents/GitHub/regioncode/dev/citylevel2021.R="2E3C9D10" +/Users/xinyiye/Documents/GitHub/regioncode/dev/cityranking.R="A45A71BD" +/Users/xinyiye/Documents/博一下/空间分析/Lec2_visulizing_spatial_data/Lec2_visulizing_spatial_data.R="9BEF4761" diff --git a/.~region_data.xlsx b/.~region_data.xlsx new file mode 100644 index 0000000..7659275 Binary files /dev/null and b/.~region_data.xlsx differ diff --git a/.~yunnan_cityranking.xlsx b/.~yunnan_cityranking.xlsx new file mode 100644 index 0000000..7659275 Binary files /dev/null and b/.~yunnan_cityranking.xlsx differ diff --git a/R/sysdata.rda b/R/sysdata.rda index a74c64e..ab2c0dc 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/dev/cityranking.R b/dev/cityranking.R index 94eee54..e95023e 100644 --- a/dev/cityranking.R +++ b/dev/cityranking.R @@ -1,73 +1,147 @@ -# citylevel_population -# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。 -# 数据:中国城市统计年鉴、中国人口统计年鉴 -rm(list = ls()) -setwd("C:/Users/amand/Documents/GitHub/regioncode_new") - -if (!requireNamespace("dplyr", quietly = TRUE)) { - install.packages("dplyr") -} -library(dplyr) - -if (!requireNamespace("pacman", quietly = TRUE)) { - install.packages("pacman") -} -library(pacman) -p_load("rio", - "tidyverse") - -# import--------------------------------------------------------------------------- - -load("R/sysdata.rda") -df_citylevel<- import("data/cityranking.xls") - -library(tidyverse) - -# 定义函数,根据population生成cityranking变量----------------------------------------- -generate_cityranking <- function(df) { - year <- unique(df$year) - year_cityranking <- paste0(year, "_cityranking") - if (year %in% 1986:2013) { - df[[year_cityranking]] <- case_when( - df$population > 100 ~ "特大城市", - df$population > 50 & df$population <= 100 ~ "大城市", - df$population > 20 & df$population <= 50 ~ "中等城市", - df$population <= 20 ~ "小城市", - TRUE ~ NA_character_ - ) - } else if (year %in% 2014:2019) { - df[[year_cityranking]] <- case_when( - df$population > 1000 ~ "超大城市", - df$population > 500 & df$population <= 1000 ~ "特大城市", - df$population > 300 & df$population <= 500 ~ "I型大城市", - df$population > 100 & df$population <= 300 ~ "II型大城市", - df$population > 50 & df$population <= 100 ~ "中等城市", - df$population > 20 & df$population <= 50 ~ "I型小城市", - df$population <= 20 ~ "II型小城市", - TRUE ~ NA_character_ - ) - } - df <- select(df, -year, -population,-population_original) - df -} - -# 按年份拆分并生成cityranking变量-------------------------------------------------------- -dfs <- map(1986:2019, ~df_citylevel %>% - filter(year == .x) %>% - generate_cityranking()) - -# 合并86-19年的数据集----------------------------------------------------------------- -cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name")) - -# 删除city_code为空值的观测----------------------------------------------------------- -cityranking <- cityranking[!is.na(cityranking$city_code), ] - -# region_table新建一列city_code------------------------------------------------------- -region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`) - -#合并--------------------------------------------------------------------------------- -region_data <- merge(region_table,cityranking, by = "city_code") -region_data <- select(region_data, -city_code,-city_name) - -save(region_data, region_table, corruption,file = "~/R/sysdata.rda") - +# citylevel_population +# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。 +# 数据:中国城市统计年鉴、中国人口统计年鉴 +if (!requireNamespace("dplyr", quietly = TRUE)) { + install.packages("dplyr") +} +library(dplyr) + +if (!requireNamespace("pacman", quietly = TRUE)) { + install.packages("pacman") +} +library(pacman) +p_load("rio", + "tidyverse") + +# import--------------------------------------------------------------------------- + +load("R/sysdata.rda") +df_citylevel<- import("inst/extdata/cityranking.xls") +yunnan<- import("inst/extdata/云南人口构成.xlsx") + +yunnan <- yunnan %>% + filter(!is.na(year))%>% + mutate(population= as.numeric(population)) + +new<- import("inst/extdata/全国各市城镇人口构成.xlsx") + +new <- new %>% + mutate(population = as.numeric(population)) %>% + arrange(code, year) + + +# 定义函数,根据population生成cityranking变量----------------------------------------- + +generate_cityranking <- function(df) { + year <- unique(df$year) + year_cityranking <- paste0(year, "_cityranking") + if (year %in% 1986:2013) { + df[[year_cityranking]] <- case_when( + df$population > 100 ~ "特大城市", + df$population > 50 & df$population <= 100 ~ "大城市", + df$population > 20 & df$population <= 50 ~ "中等城市", + df$population <= 20 ~ "小城市", + TRUE ~ NA_character_ + ) + } else if (year %in% 2014:2019) { + df[[year_cityranking]] <- case_when( + df$population > 1000 ~ "超大城市", + df$population > 500 & df$population <= 1000 ~ "特大城市", + df$population > 300 & df$population <= 500 ~ "I型大城市", + df$population > 100 & df$population <= 300 ~ "II型大城市", + df$population > 50 & df$population <= 100 ~ "中等城市", + df$population > 20 & df$population <= 50 ~ "I型小城市", + df$population <= 20 ~ "II型小城市", + TRUE ~ NA_character_ + ) + } + df <- select(df, -year, -population,-population_original) + df +} + +# 按年份拆分并生成cityranking变量-------------------------------------------------------- +dfs <- map(1986:2019, ~df_citylevel %>% + filter(year == .x) %>% + generate_cityranking()) + +# 合并86-19年的数据集----------------------------------------------------------------- +cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name")) + +# 删除city_code为空值的观测----------------------------------------------------------- +cityranking <- cityranking[!is.na(cityranking$city_code), ] + +# region_table新建一列city_code------------------------------------------------------- +region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`) + +#合并--------------------------------------------------------------------------------- +region_data <- merge(region_table,cityranking, by = "city_code") +region_data <- select(region_data, -city_code,-city_name) + +save(region_data, region_table, corruption,file = "~/R/sysdata.rda") + + +# 云南 ----------------------------------------------------------------------------- + +new_cityranking <- function(df, year) { + year_cityranking <- paste0(year, "_cityranking") + + if (year %in% 1986:2013) { + df[[year_cityranking]] <- case_when( + df$population > 100 ~ "特大城市", + df$population > 50 & df$population <= 100 ~ "大城市", + df$population > 20 & df$population <= 50 ~ "中等城市", + df$population <= 20 ~ "小城市", + TRUE ~ NA_character_ + ) + } else if (year > 2013) { + df[[year_cityranking]] <- case_when( + df$population > 1000 ~ "超大城市", + df$population > 500 & df$population <= 1000 ~ "特大城市", + df$population > 300 & df$population <= 500 ~ "I型大城市", + df$population > 100 & df$population <= 300 ~ "II型大城市", + df$population > 50 & df$population <= 100 ~ "中等城市", + df$population > 20 & df$population <= 50 ~ "I型小城市", + df$population <= 20 ~ "II型小城市", + TRUE ~ NA_character_ + ) + } + + df <- df %>% select(-year, -population) + return(df) +} + + +dfs <- map(1986:2019, ~yunnan %>% + filter(year == .x) %>% + new_cityranking(., .x)) + +yunnan_cityranking<- reduce(dfs, full_join, by = c("name")) + +yunnan_cityranking <- yunnan_cityranking %>% + select(-contains("code")) + +write_xlsx(yunnan_cityranking, "yunnan_cityranking.xlsx") + + +# new 20-22 ---------------------------------------------------------------------------- + +dfs <- map(2020:2022, ~new %>% + filter(year == .x) %>% + new_cityranking(., .x)) + + +new_cityranking<- reduce(dfs, full_join, by = c("code")) + +new_cityranking <- new_cityranking %>% + select(name,code,contains("cityrank")) + +region_data <- region_data %>% + mutate(join_key = case_when( + zhixiashi == TRUE ~ prov_code, + zhixiashi != TRUE ~ `2022_code` + )) + +region_data <- region_data %>% + left_join(new_cityranking, by = c("join_key" = "code")) %>% + select(-"join_key") + diff --git "a/inst/extdata/\344\272\221\345\215\227\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" "b/inst/extdata/\344\272\221\345\215\227\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" new file mode 100644 index 0000000..ce1baca Binary files /dev/null and "b/inst/extdata/\344\272\221\345\215\227\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" differ diff --git "a/inst/extdata/\345\205\250\345\233\275\345\220\204\345\270\202\345\237\216\351\225\207\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" "b/inst/extdata/\345\205\250\345\233\275\345\220\204\345\270\202\345\237\216\351\225\207\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" new file mode 100644 index 0000000..6a2223a Binary files /dev/null and "b/inst/extdata/\345\205\250\345\233\275\345\220\204\345\270\202\345\237\216\351\225\207\344\272\272\345\217\243\346\236\204\346\210\220.xlsx" differ