R09228001 NTU GOEG M.S. 楊宇翔 YANG

load in files

rm(list=ls(all=TRUE))
library(readxl)
library(ggplot2)
library(readr)
library(dplyr)
library(xts)

rm(list=ls()) #clear all
#install.packages("readxl")
library(readxl)
s1=read_excel("2019s1.xls")
s2=read_excel("2019s2.xls")
s3=read_excel("2019s3.xls")
s4=read_excel("2019s4.xls")
s5=read_excel("2020s1.xlsx")
s6=read_excel("2020s2.xlsx")
d=rbind(s1,s2,s3,s4,s5,s6)


datatable=d
datatable$建物移轉總面積平方公尺 <- as.numeric(datatable$建物移轉總面積平方公尺)
datatable$總價元 <- as.numeric(datatable$總價元)
datatable <- datatable[(datatable$建物移轉總面積平方公尺 > 0),]
datatable <- datatable[(datatable$總價元 > 0),]
datatable <- datatable[!is.na(datatable$單價元平方公尺),]
datatable <- datatable[(datatable$單價元平方公尺 > 0),]


datatable$year <- as.numeric(substr(datatable$交易年月日,start=1,stop=3))+1911
datatable$month <- as.numeric(substr(datatable$交易年月日,start=4,stop=5))
datatable$day <- as.numeric(substr(datatable$交易年月日,start=6,stop=7))
datatable$date <- paste(datatable$year,datatable$month,datatable$day, sep="-")
datatable$date <- as.Date(datatable$date)
datatable <- datatable[datatable$year == 2020,]
d=datatable

Assignment [1]: using ts_seasonal() function

Extracting the objects of 公寓 which their main use is residence to create a seasonality plot, heatmap and 3D surface for exploring seasonality of transaction counts in 2020.

step 1 : data select

library(dplyr)
d1=d %>% filter(主要用途=="住家用")
d2=d1 %>% filter(建物型態=="公寓(5樓含以下無電梯)")
d3=d2 %>% select(交易年月日)
d4=as.data.frame(table(d3))
colnames(d4)=c("time","count")
d4$time=as.numeric(as.character(d4$time))
d5=d4 
d5$time=as.character(d5$time)
post=substr(d5$time,4,7)
prior=as.character(as.numeric(substr(d5$time,1,3))+1911)
d5$date=paste0(prior,post)
d6=d5 %>% select(date,count)
d6$date= as.Date(d6$date,"%Y%m%d")

my_ts <- xts(d6$count , order.by = as.Date(d6$date))

count=d6$count
library(zoo)
#轉換成yrquarter
Q = quarters(d6$date)
#轉換成weekday 
W <- .indexwday( my_ts )
DataTable <- data.frame("Q" = Q,"W" = W,"count" = count)
xtable <- xtabs(formula = count~Q+W,data = DataTable)

time-series object: ts data

Interpretation :

Xaxis=Quater of Year; Yaxis=Week day

1=Sunday, 2=Monday,……,7=Saturday

#轉成ts的格式
my_ts2 <- ts(as.vector(as.matrix(xtable)), start=1, frequency = 4)
my_ts2
##   Qtr1 Qtr2 Qtr3 Qtr4
## 1  160  211  207  222
## 2  180  207  202  239
## 3  136  175  179  211
## 4  130  174  177  179
## 5  130  156  167  207
## 6  164  155  169  181
## 7  131  171  179  219

Plotting : ts_seasonal()

Interpretation :

Regard to quater difference, Q4 >Q3 > Q2 > Q1. Regards to weekday difference, it vary through different Quaters. In Q1, Mon>Sun>Fri; In Q2, Sun > Mon > Tue; In Q3, Sun > Mon > Tue;In Q4, Mon>Sun>Thu.

library(TSstudio)
library(plotly)
ts_seasonal(my_ts2,Xgrid = T, type = "cycle") %>%
        layout(title = 'Seansonal time series of quater-weekday \ntransaction amount of real estate in Taipei,2020', xaxis = list(title = "Day of the Week"), yaxis = list(title = "Transactoin Count"),legend = list(title=list(text='<b> Quater </b>')))

Heatmap : ts_heatmap()

Interpretation :

Monday in Quater 4 possessed the highest real estate transaction counts. Quater 1 possess the lowest real estate transaction counts especially in weekday.

ts_heatmap(my_ts2)%>%
        layout(title = 'Heatmap of quater-weekday transaction counts of real estate in Taipei,2020', xaxis = list(title = "Day of the Week"), yaxis = list(title = "Quater"),legend = list(title=list(text='Transactoin Count')))

3-Dimension Surface map:ts_surfac

Interpretation :

Sunday and Monday in Quater 4 possessed the highest real estate transaction counts, which is the top of this 3-D surface map.

axx <- list( title =  "Day of the Week")
axz <- list( title =  "Transaction counts")
ts_surface(my_ts2)%>% layout(title = '3-D Surface map of quater-weekday \ntransaction counts of real estate in Taipei,2020',scene = list(xaxis=axx,zaxis=axz))
?ts_surface

Question 2.

Creating a time-series plot for comparing weekly average transaction prices between 大安區 and 文山區

library(TSstudio)
library(xts)

datatable1 <- data.frame(
  "Date" = datatable$date,
  "Region" = datatable$鄉鎮市區,
  "Unit_Price" = as.numeric(datatable$單價元平方公尺))
datatable1=datatable1 %>% filter(Region==c("大安區","文山區"))
gb=datatable1 %>% group_by(Date,Region) %>% summarise_all(mean)

daan=gb %>% filter(Region=="大安區")
wenshan=gb %>% filter(Region=="文山區")

#大安區  
daan_ts <- xts(daan$Unit_Price , order.by = as.Date(daan$Date))
ep3 <- endpoints(daan_ts,on = "weeks")
daan_ts1 <-period.apply(daan_ts,INDEX = ep3,FUN = sum)


#文山區
wenshan_ts <- xts(wenshan$Unit_Price , order.by = as.Date(wenshan$Date))
ep3 <- endpoints(wenshan_ts,on = "weeks")
wenshan_ts1 <-period.apply(wenshan_ts,INDEX = ep3,FUN = sum)

x <- cbind(coredata(daan_ts1),coredata(wenshan_ts1))
colnames(x) <- c("大安區","文山區")

my_tsf <- xts(x , order.by = as.Date(index(daan_ts1)))

Interpretation :

Gerally speaking, Daan district possess higher unit price in real estate transaction in 2020 than Wenshan district in Taipei city, Taiwan. However, during some short period of time, for example, 1st-15th,March, the average unit prices in transaction in Wenshan district is higher the Daan’s ones.

ts_plot(my_tsf, slider = TRUE,title="Time-Series of real estate transaction unit price  \n in 2 districts in  Taipei in 2020",Xtitle="time",Ytitle="transaction unit price")

Conclusion and Interpretation :

Real estate transaction in Taipei city, in 2020. The 4th quater, which is Oct-Dec,2020, possess higher count of transaction. The transaction has higher frequency occured in Sunday and Monday. This may be something to do the fact that most of people check and consider the house or dealing with real estate agent during weekends (free day). Accordinly, the time-series of transactions of quater and weekday result in this kind of characteristic.

The end of homework for week 5

R09228001 NTU GOEG M.S. 楊宇翔 YANG