Question 1: In Tokyo, Japan, did regions with high COVID-19 cases lead to high suicide rate ?
Question 2: Did the relationship of COVID-19 and suicide differ among centre Tokyo and Peripheral Tokyo ?
############## Tokyo COVID-19 感染人數 ######################
rm(list=ls()) #clear all
setwd("~/Downloads")
infect=read.csv("130001_tokyo_covid19_positive_cases_by_municipality.csv")
infect$公表_年月=as.Date(infect$公表_年月日)
library(lubridate)
library(dplyr)
infect=infect[-which(infect$市区町村名==""),]
infect.m=infect %>%
group_by(yr = year(公表_年月日), mon = month(公表_年月日),place=市区町村名,code=全国地方公共団体コード) %>%
summarise(infect_m = sum(陽性者数))
infect.m$date=paste0(infect.m$yr,"-",infect.m$mon,"-01")
infect.m$date=as.Date(infect.m$date)
#infect.m$date=as.Date(paste0(infect.m$yr,infect.m$mon,"01"))
data=infect.m[,-c(1:2)]
library(tidyr)
data_wide = spread(data, date, infect_m)
data_wide$region[which(grepl("区",data_wide$place))]="centre"
data_wide$region[-which(grepl("区",data_wide$place))]="periphery"
d1=data_wide
d1$code=as.character(d1$code)
library(sf)
jp=st_read("~/Desktop/碩論_日本青少年自殺空間變異/data/japan_population_shp/jp1741_bothsex.shp")
## Reading layer `jp1741_bothsex' from data source
## `/Users/yangyuxiang_1/Desktop/碩論_日本青少年自殺空間變異/data/japan_population_shp/jp1741_bothsex.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1741 features and 30 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 122.9339 ymin: 24.04562 xmax: 153.9867 ymax: 45.52638
## Geodetic CRS: JGD2011
jp2=jp[which(jp$prfct__=="東京都"),c(4,3,7,9)]
d1=left_join(d1,st_drop_geometry(jp2[,c(1,3,4)]),by="code")
d1[,c(3:24)]=(d1[,c(3:24)]/d1$all_age)*10^5 #每十萬人有多少感染人數
d1=d1[,-c(1,26,27)]
jp3=left_join(jp2,d1,by="code")
df <- st_drop_geometry(jp3)
########### from wide to long data ###########
library(tidyr)
long1 <- df %>% gather(date, covid_19, -c(code,cmplt_k,cmplt_n,all_age,region))
long1$date=as.Date(long1$date)
############## 自殺人數 ######################
setwd("~/Desktop/碩論_日本青少年自殺空間變異/data/suicide_count")
library(readxl)
d.tokyo=read_excel("tokyo_202003to202110.xlsx",col_names = TRUE,sheet = 1)
c.tokyo=left_join(jp3[,c(1,4)],d.tokyo,by="code")
d=c.tokyo
d[,c(9:28)]=(d[,c(9:28)]/d$all_age)*10^5#每十萬人有多少自殺
d0=d[,-c(2:6)]
d0=st_drop_geometry(d0)
########### from wide to long data ###########
library(tidyr)
long2 <-d0 %>% gather(date,suicide, -c(code,complete_kanji,complete_en))
long2$yr=substr(long2$date,1,4)
long2$mon=substr(long2$date,5,6)
long2$date=paste0(long2$yr,"-",long2$mon,"-01")
long2$date=as.Date(long2$date)
long2=long2[,c(1,4,5)]
join_long=left_join(long1,long2,by=c("code","date"))
#library("writexl")
#write_xlsx(join_long,"~/Desktop/碩論_日本青少年自殺空間變異/data/suicide_count/tokyo_longjoin.xlsx")
d=join_long
#時間縮短
d2=join_long[which(d$date<"2021-03-02"),]
#group 到類別
library(dplyr)
d3=d %>% group_by(region,date) %>% summarise_all(mean)
d3=d3[-c(21,22,43,44),-c(3,4,5)]
d=as.data.frame(d)
d3=as.data.frame(d3)
head(d)
## code cmplt_k cmplt_n all_age region date
## 1 131016 東京都千代田区 Tokyo-to,Chiyoda-ku 67216 centre 2020-03-01
## 2 131024 東京都中央区 Tokyo-to,Chuo-ku 170583 centre 2020-03-01
## 3 131032 東京都港区 Tokyo-to,Minato-ku 259036 centre 2020-03-01
## 4 131041 東京都新宿区 Tokyo-to,Shinjuku-ku 345231 centre 2020-03-01
## 5 131059 東京都文京区 Tokyo-to,Bunkyo-ku 226574 centre 2020-03-01
## 6 131067 東京都台東区 Tokyo-to,Taito-ku 203647 centre 2020-03-01
## covid_19 suicide
## 1 4.463223 0.0000000
## 2 11.138273 1.1724498
## 3 15.055822 1.9302336
## 4 6.372545 2.3172890
## 5 1.765428 0.4413569
## 6 7.365687 0.9820916
head(d3)
## region date all_age covid_19 suicide
## 1 centre 2020-03-01 416207.1 4.323384 1.210717
## 2 centre 2020-04-01 416207.1 657.550987 0.901172
## 3 centre 2020-05-01 416207.1 1515.700282 1.200989
## 4 centre 2020-06-01 416207.1 1729.220383 1.266975
## 5 centre 2020-07-01 416207.1 2823.836784 1.624201
## 6 centre 2020-08-01 416207.1 5401.435378 1.447878
tokyo =jp3[-c(54:62),]
plot(tokyo["region"],main="Centre & Perpheral of Tokyo,Japan")
p_popup1 <- paste0("<strong>Municipals: </strong>", tokyo$cmplt_k)
p_popup2 <- paste0("<strong>Regions: </strong>", tokyo$region)
popup=rbind(p_popup1,p_popup2)
library(leaflet)
?colorFactor
tokyo$region=as.factor(tokyo$region)
factpal <- colorFactor(topo.colors(2), tokyo$region)
library(leaflet)
leaflet(tokyo) %>%
addPolygons(
stroke = T, # remove polygon borders,
color = "white",
fillColor = ~factpal(region), # set fill color with function from above and value
fillOpacity =0.8, smoothFactor = 0.5, # make it nicer
popup = p_popup1 ) %>%
addTiles()
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +no_defs).
## Need '+proj=longlat +datum=WGS84'
#時間縮短
d2=join_long[which(d$date<"2021-10-02"),]
library(ggplot2)
g1=ggplot(d2,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
ggtitle("Scatterplot of COVID-19 and suicide")+
geom_smooth(method="lm", size=1, color="black")
g1
g1.1=ggplot(d2,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
ggtitle("Scatterplot of COVID-19 and suicide")+
geom_smooth(method="lm", size=1, color="black")+
facet_wrap(~region,ncol=4)
g1.1
g2=ggplot(d2,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
facet_wrap(~date,ncol=4)+
ggtitle("Scatterplot of COVID-19 and suicide")
g2
g3=ggplot(d2,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
geom_path()+
theme_bw()+
theme(text=element_text(family="黑體-繁 中黑", size=14))
library(directlabels)
direct.label(g3,method = "extreme.grid")
g4=ggplot(data = d2,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
geom_path()+
theme_bw()+
geom_text(aes(label = date),colour = "gray",size = 2.5,hjust = 0,vjust = 0)
g4
d$date2=as.character(d$date)
d$date2=as.numeric(gsub("-","",d$date2))
#### ploty animation ########
library(plotly)
p <- plot_ly(d,x = ~covid_19 , y = ~suicide)#,color=~region)
add_markers(p,
size = ~all_age,
color = ~region,
text = ~region,hoverinfo = "text",
id = ~region,
frame = ~ date2,
showlenged = TRUE)
#group 到類別
library(dplyr)
d3=d2 %>% group_by(region,date) %>% summarise_all(mean)
g1=ggplot(d3,aes(x=covid_19,y=suicide,col=region))+
geom_point()
g1
g1.1=ggplot(d3,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
ggtitle("Scatterplot of COVID-19 and suicide")+
geom_smooth(method="lm", size=1, color="black")+
facet_wrap(~region,ncol=4)
g1.1
g2=ggplot(d3,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
facet_wrap(~date,ncol=4)
g2
g3=ggplot(d3,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
geom_path()+
theme_bw()+
theme(text=element_text(family="黑體-繁 中黑", size=14))
library(directlabels)
direct.label(g3,method = "extreme.grid")
g4=ggplot(data = d3,aes(x=covid_19,y=suicide,col=region))+
geom_point()+
geom_path()+
theme_bw()+
geom_text(aes(label = date),colour = "gray",size = 2.5,hjust = 0,vjust = 0)
g4
d3$date2=as.character(d3$date)
d3$date2=as.numeric(gsub("-","",d3$date2))
d3$date2
## [1] 20200301 20200401 20200501 20200601 20200701 20200801 20200901 20201001
## [9] 20201101 20201201 20210101 20210201 20210301 20210401 20210501 20210601
## [17] 20210701 20210801 20210901 20211001 20200301 20200401 20200501 20200601
## [25] 20200701 20200801 20200901 20201001 20201101 20201201 20210101 20210201
## [33] 20210301 20210401 20210501 20210601 20210701 20210801 20210901 20211001
#### ploty animation ########
library(plotly)
p <- plot_ly(d3,x = ~covid_19 , y = ~suicide)#,color=~region)
add_markers(p,
size = ~all_age,
color = ~region,
text = ~region,hoverinfo = "text",
id = ~region,
frame = ~ date2,
showlenged = TRUE)
A data story of correlation over time: Using appropriate data to set up grouping and complementary variables for representing temporal variations. The assignment should include: Visualization outputs: (interactive) charts/plots Text descriptions (describe the data and plots, your observations, and possible explanations)
In the big picture, I found out that the COVID-19 cases and suicide rate are negatively related which is very surprise to me. I originally thought that the place with higher infected rate will caused uneasiness and panic leading to excessive suicide rate. However, the data analysis proved me wrong.
As the difference between central and peripheral Tokyo, I found out that in most of time, COVID-19 infected rate is higher in central Tokyo. However the suicide rate change over different stage of virus outbreak. In simplified aggregated analysis, suicide rate in central Tokyo is higher than peripheral in most of the time except for 2020-Sep and 2021-Feb. However, If we see the unaggregated analysis, peripheral Tokyo has higher fluctuation of suicide rate than central Tokyo.
As for possible mechanism of this phenomen, I have not found the reasonable explanation for this. However, I think this phenomen found out by data analysis is interesting and worth dig deeper.