R09228001 NTU GOEG M.S. 楊宇翔 YANG
load in files
rm(list=ls(all=TRUE))
library(readxl)
library(ggplot2)
library(readr)
library(dplyr)
library(xts)
rm(list=ls()) #clear all
#install.packages("readxl")
library(readxl)
s1=read_excel("2019s1.xls")
s2=read_excel("2019s2.xls")
s3=read_excel("2019s3.xls")
s4=read_excel("2019s4.xls")
s5=read_excel("2020s1.xlsx")
s6=read_excel("2020s2.xlsx")
d=rbind(s1,s2,s3,s4,s5,s6)
datatable=d
datatable$建物移轉總面積平方公尺 <- as.numeric(datatable$建物移轉總面積平方公尺)
datatable$總價元 <- as.numeric(datatable$總價元)
datatable <- datatable[(datatable$建物移轉總面積平方公尺 > 0),]
datatable <- datatable[(datatable$總價元 > 0),]
datatable <- datatable[!is.na(datatable$單價元平方公尺),]
datatable <- datatable[(datatable$單價元平方公尺 > 0),]
datatable$year <- as.numeric(substr(datatable$交易年月日,start=1,stop=3))+1911
datatable$month <- as.numeric(substr(datatable$交易年月日,start=4,stop=5))
datatable$day <- as.numeric(substr(datatable$交易年月日,start=6,stop=7))
datatable$date <- paste(datatable$year,datatable$month,datatable$day, sep="-")
datatable$date <- as.Date(datatable$date)
datatable <- datatable[datatable$year == 2020,]
d=datatable
Assignment [1]: using ts_seasonal() function
Extracting the objects of 公寓 which their main use is residence to create a seasonality plot, heatmap and 3D surface for exploring seasonality of transaction counts in 2020.
step 1 : data select
library(dplyr)
d1=d %>% filter(主要用途=="住家用")
d2=d1 %>% filter(建物型態=="公寓(5樓含以下無電梯)")
d3=d2 %>% select(交易年月日)
d4=as.data.frame(table(d3))
colnames(d4)=c("time","count")
d4$time=as.numeric(as.character(d4$time))
d5=d4
d5$time=as.character(d5$time)
post=substr(d5$time,4,7)
prior=as.character(as.numeric(substr(d5$time,1,3))+1911)
d5$date=paste0(prior,post)
d6=d5 %>% select(date,count)
d6$date= as.Date(d6$date,"%Y%m%d")
my_ts <- xts(d6$count , order.by = as.Date(d6$date))
count=d6$count
library(zoo)
#轉換成yrquarter
Q = quarters(d6$date)
#轉換成weekday
W <- .indexwday( my_ts )
DataTable <- data.frame("Q" = Q,"W" = W,"count" = count)
xtable <- xtabs(formula = count~Q+W,data = DataTable)
time-series object: ts data
Interpretation :
Xaxis=Quater of Year; Yaxis=Week day
1=Sunday, 2=Monday,……,7=Saturday
#轉成ts的格式
my_ts2 <- ts(as.vector(as.matrix(xtable)), start=1, frequency = 4)
my_ts2
## Qtr1 Qtr2 Qtr3 Qtr4
## 1 160 211 207 222
## 2 180 207 202 239
## 3 136 175 179 211
## 4 130 174 177 179
## 5 130 156 167 207
## 6 164 155 169 181
## 7 131 171 179 219
Plotting : ts_seasonal()
Interpretation :
Regard to quater difference, Q4 >Q3 > Q2 > Q1. Regards to weekday difference, it vary through different Quaters. In Q1, Mon>Sun>Fri; In Q2, Sun > Mon > Tue; In Q3, Sun > Mon > Tue;In Q4, Mon>Sun>Thu.
library(TSstudio)
library(plotly)
ts_seasonal(my_ts2,Xgrid = T, type = "cycle") %>%
layout(title = 'Seansonal time series of quater-weekday \ntransaction amount of real estate in Taipei,2020', xaxis = list(title = "Day of the Week"), yaxis = list(title = "Transactoin Count"),legend = list(title=list(text='<b> Quater </b>')))
Heatmap : ts_heatmap()
Interpretation :
Monday in Quater 4 possessed the highest real estate transaction counts. Quater 1 possess the lowest real estate transaction counts especially in weekday.
ts_heatmap(my_ts2)%>%
layout(title = 'Heatmap of quater-weekday transaction counts of real estate in Taipei,2020', xaxis = list(title = "Day of the Week"), yaxis = list(title = "Quater"),legend = list(title=list(text='Transactoin Count')))
3-Dimension Surface map:ts_surfac
Interpretation :
Sunday and Monday in Quater 4 possessed the highest real estate transaction counts, which is the top of this 3-D surface map.
axx <- list( title = "Day of the Week")
axz <- list( title = "Transaction counts")
ts_surface(my_ts2)%>% layout(title = '3-D Surface map of quater-weekday \ntransaction counts of real estate in Taipei,2020',scene = list(xaxis=axx,zaxis=axz))
?ts_surface
Question 2.
Creating a time-series plot for comparing weekly average transaction prices between 大安區 and 文山區
library(TSstudio)
library(xts)
datatable1 <- data.frame(
"Date" = datatable$date,
"Region" = datatable$鄉鎮市區,
"Unit_Price" = as.numeric(datatable$單價元平方公尺))
datatable1=datatable1 %>% filter(Region==c("大安區","文山區"))
gb=datatable1 %>% group_by(Date,Region) %>% summarise_all(mean)
daan=gb %>% filter(Region=="大安區")
wenshan=gb %>% filter(Region=="文山區")
#大安區
daan_ts <- xts(daan$Unit_Price , order.by = as.Date(daan$Date))
ep3 <- endpoints(daan_ts,on = "weeks")
daan_ts1 <-period.apply(daan_ts,INDEX = ep3,FUN = sum)
#文山區
wenshan_ts <- xts(wenshan$Unit_Price , order.by = as.Date(wenshan$Date))
ep3 <- endpoints(wenshan_ts,on = "weeks")
wenshan_ts1 <-period.apply(wenshan_ts,INDEX = ep3,FUN = sum)
x <- cbind(coredata(daan_ts1),coredata(wenshan_ts1))
colnames(x) <- c("大安區","文山區")
my_tsf <- xts(x , order.by = as.Date(index(daan_ts1)))
Interpretation :
Gerally speaking, Daan district possess higher unit price in real estate transaction in 2020 than Wenshan district in Taipei city, Taiwan. However, during some short period of time, for example, 1st-15th,March, the average unit prices in transaction in Wenshan district is higher the Daan’s ones.
ts_plot(my_tsf, slider = TRUE,title="Time-Series of real estate transaction unit price \n in 2 districts in Taipei in 2020",Xtitle="time",Ytitle="transaction unit price")
Conclusion and Interpretation :
Real estate transaction in Taipei city, in 2020. The 4th quater, which is Oct-Dec,2020, possess higher count of transaction. The transaction has higher frequency occured in Sunday and Monday. This may be something to do the fact that most of people check and consider the house or dealing with real estate agent during weekends (free day). Accordinly, the time-series of transactions of quater and weekday result in this kind of characteristic.
The end of homework for week 5
R09228001 NTU GOEG M.S. 楊宇翔 YANG