Skip to content

Commit

Permalink
Population Data for Yunnan Province Cities (1986–2019) and National C…
Browse files Browse the repository at this point in the history
…ities (2020–2022)

【更新】

1.删除县级市

2.更正重庆市武隆区(2017年撤县设区)

3.处理重庆 、海南的错误代码
海南省1987 属于广东行政区, 1988成为省
重庆 96属于四川 97之后成为直辖市

4.处理云南缺失人口数据

5.新增20-22年全国人口数据

参考资料:https://www.mca.gov.cn/n156/n186/index.html

【待更新】


1.需更新以年份判断省份

2.贵州人口缺失

3.20-22部分市人口缺失
  • Loading branch information
Xinyi Ye committed Nov 8, 2024
1 parent 6269ff3 commit 77f95ab
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 82 deletions.
13 changes: 4 additions & 9 deletions .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
D:/Seafile/WW_research/01_Research/R_package/regioncode/.Rbuildignore="63CD865F"
D:/Seafile/WW_research/01_Research/R_package/regioncode/DESCRIPTION="25AF9A25"
D:/Seafile/WW_research/01_Research/R_package/regioncode/NAMESPACE="96D5EBCA"
D:/Seafile/WW_research/01_Research/R_package/regioncode/NEWS.md="00FBA817"
D:/Seafile/WW_research/01_Research/R_package/regioncode/R/globals.R="56D29EEB"
D:/Seafile/WW_research/01_Research/R_package/regioncode/R/regioncode.R="78A4CF23"
D:/Seafile/WW_research/01_Research/R_package/regioncode/dev/citylevel2021.R="7C97FCDC"
D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/regioncode-vignette.Rmd="48AAAEC1"
D:/Seafile/WW_research/01_Research/R_package/regioncode/vignettes/s_regioncode.bib="8CF3C239"
/Users/xinyiye/Documents/GitHub/regioncode/R/regioncode.R="DC074D86"
/Users/xinyiye/Documents/GitHub/regioncode/dev/citylevel2021.R="2E3C9D10"
/Users/xinyiye/Documents/GitHub/regioncode/dev/cityranking.R="A45A71BD"
/Users/xinyiye/Documents/博一下/空间分析/Lec2_visulizing_spatial_data/Lec2_visulizing_spatial_data.R="9BEF4761"
Binary file added .~region_data.xlsx
Binary file not shown.
Binary file added .~yunnan_cityranking.xlsx
Binary file not shown.
Binary file modified R/sysdata.rda
Binary file not shown.
220 changes: 147 additions & 73 deletions dev/cityranking.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,147 @@
# citylevel_population
# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。
# 数据:中国城市统计年鉴、中国人口统计年鉴
rm(list = ls())
setwd("C:/Users/amand/Documents/GitHub/regioncode_new")

if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
library(dplyr)

if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
library(pacman)
p_load("rio",
"tidyverse")

# import---------------------------------------------------------------------------

load("R/sysdata.rda")
df_citylevel<- import("data/cityranking.xls")

library(tidyverse)

# 定义函数,根据population生成cityranking变量-----------------------------------------
generate_cityranking <- function(df) {
year <- unique(df$year)
year_cityranking <- paste0(year, "_cityranking")
if (year %in% 1986:2013) {
df[[year_cityranking]] <- case_when(
df$population > 100 ~ "特大城市",
df$population > 50 & df$population <= 100 ~ "大城市",
df$population > 20 & df$population <= 50 ~ "中等城市",
df$population <= 20 ~ "小城市",
TRUE ~ NA_character_
)
} else if (year %in% 2014:2019) {
df[[year_cityranking]] <- case_when(
df$population > 1000 ~ "超大城市",
df$population > 500 & df$population <= 1000 ~ "特大城市",
df$population > 300 & df$population <= 500 ~ "I型大城市",
df$population > 100 & df$population <= 300 ~ "II型大城市",
df$population > 50 & df$population <= 100 ~ "中等城市",
df$population > 20 & df$population <= 50 ~ "I型小城市",
df$population <= 20 ~ "II型小城市",
TRUE ~ NA_character_
)
}
df <- select(df, -year, -population,-population_original)
df
}

# 按年份拆分并生成cityranking变量--------------------------------------------------------
dfs <- map(1986:2019, ~df_citylevel %>%
filter(year == .x) %>%
generate_cityranking())

# 合并86-19年的数据集-----------------------------------------------------------------
cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name"))

# 删除city_code为空值的观测-----------------------------------------------------------
cityranking <- cityranking[!is.na(cityranking$city_code), ]

# region_table新建一列city_code-------------------------------------------------------
region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`)

#合并---------------------------------------------------------------------------------
region_data <- merge(region_table,cityranking, by = "city_code")
region_data <- select(region_data, -city_code,-city_name)

save(region_data, region_table, corruption,file = "~/R/sysdata.rda")

# citylevel_population
# 目的:在sysdata.rda中增加城市规模,根据城市的人口计算出城市规模。
# 数据:中国城市统计年鉴、中国人口统计年鉴
if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
library(dplyr)

if (!requireNamespace("pacman", quietly = TRUE)) {
install.packages("pacman")
}
library(pacman)
p_load("rio",
"tidyverse")

# import---------------------------------------------------------------------------

load("R/sysdata.rda")
df_citylevel<- import("inst/extdata/cityranking.xls")
yunnan<- import("inst/extdata/云南人口构成.xlsx")

yunnan <- yunnan %>%
filter(!is.na(year))%>%
mutate(population= as.numeric(population))

new<- import("inst/extdata/全国各市城镇人口构成.xlsx")

new <- new %>%
mutate(population = as.numeric(population)) %>%
arrange(code, year)


# 定义函数,根据population生成cityranking变量-----------------------------------------

generate_cityranking <- function(df) {
year <- unique(df$year)
year_cityranking <- paste0(year, "_cityranking")
if (year %in% 1986:2013) {
df[[year_cityranking]] <- case_when(
df$population > 100 ~ "特大城市",
df$population > 50 & df$population <= 100 ~ "大城市",
df$population > 20 & df$population <= 50 ~ "中等城市",
df$population <= 20 ~ "小城市",
TRUE ~ NA_character_
)
} else if (year %in% 2014:2019) {
df[[year_cityranking]] <- case_when(
df$population > 1000 ~ "超大城市",
df$population > 500 & df$population <= 1000 ~ "特大城市",
df$population > 300 & df$population <= 500 ~ "I型大城市",
df$population > 100 & df$population <= 300 ~ "II型大城市",
df$population > 50 & df$population <= 100 ~ "中等城市",
df$population > 20 & df$population <= 50 ~ "I型小城市",
df$population <= 20 ~ "II型小城市",
TRUE ~ NA_character_
)
}
df <- select(df, -year, -population,-population_original)
df
}

# 按年份拆分并生成cityranking变量--------------------------------------------------------
dfs <- map(1986:2019, ~df_citylevel %>%
filter(year == .x) %>%
generate_cityranking())

# 合并86-19年的数据集-----------------------------------------------------------------
cityranking<- reduce(dfs, full_join, by = c("city_code", "city_name"))

# 删除city_code为空值的观测-----------------------------------------------------------
cityranking <- cityranking[!is.na(cityranking$city_code), ]

# region_table新建一列city_code-------------------------------------------------------
region_table$city_code <- ifelse(region_table$zhixiashi == "TRUE", region_table$prov_code, region_table$`2019_code`)

#合并---------------------------------------------------------------------------------
region_data <- merge(region_table,cityranking, by = "city_code")
region_data <- select(region_data, -city_code,-city_name)

save(region_data, region_table, corruption,file = "~/R/sysdata.rda")


# 云南 -----------------------------------------------------------------------------

new_cityranking <- function(df, year) {
year_cityranking <- paste0(year, "_cityranking")

if (year %in% 1986:2013) {
df[[year_cityranking]] <- case_when(
df$population > 100 ~ "特大城市",
df$population > 50 & df$population <= 100 ~ "大城市",
df$population > 20 & df$population <= 50 ~ "中等城市",
df$population <= 20 ~ "小城市",
TRUE ~ NA_character_
)
} else if (year > 2013) {
df[[year_cityranking]] <- case_when(
df$population > 1000 ~ "超大城市",
df$population > 500 & df$population <= 1000 ~ "特大城市",
df$population > 300 & df$population <= 500 ~ "I型大城市",
df$population > 100 & df$population <= 300 ~ "II型大城市",
df$population > 50 & df$population <= 100 ~ "中等城市",
df$population > 20 & df$population <= 50 ~ "I型小城市",
df$population <= 20 ~ "II型小城市",
TRUE ~ NA_character_
)
}

df <- df %>% select(-year, -population)
return(df)
}


dfs <- map(1986:2019, ~yunnan %>%
filter(year == .x) %>%
new_cityranking(., .x))

yunnan_cityranking<- reduce(dfs, full_join, by = c("name"))

yunnan_cityranking <- yunnan_cityranking %>%
select(-contains("code"))

write_xlsx(yunnan_cityranking, "yunnan_cityranking.xlsx")


# new 20-22 ----------------------------------------------------------------------------

dfs <- map(2020:2022, ~new %>%
filter(year == .x) %>%
new_cityranking(., .x))


new_cityranking<- reduce(dfs, full_join, by = c("code"))

new_cityranking <- new_cityranking %>%
select(name,code,contains("cityrank"))

region_data <- region_data %>%
mutate(join_key = case_when(
zhixiashi == TRUE ~ prov_code,
zhixiashi != TRUE ~ `2022_code`
))

region_data <- region_data %>%
left_join(new_cityranking, by = c("join_key" = "code")) %>%
select(-"join_key")

Binary file added inst/extdata/云南人口构成.xlsx
Binary file not shown.
Binary file added inst/extdata/全国各市城镇人口构成.xlsx
Binary file not shown.

0 comments on commit 77f95ab

Please sign in to comment.