Network_HW5_ Centrality (Individual-level)
r09228001 臺大地理所碩一 楊宇翔 2021/4/07 (Wed.)

Q5-1 網絡分析實作 利用 data(studentnets.peerinfl, package=“NetData”) 提供的資料表(https://www.rdocumentation.org/packages/NetData/versions/0.3/topics/peerinfl),檢視 sem1, sem2資料表的欄位std_id / alter_id,表示某班級兩學期的同學友誼的指認關係。

• 作業5-1提供data(studentnets.peerinfl) 的sem1,該表格為某校在某學期的各班同學指認不同類型的朋友關係的調查。

• 請先篩選出班級(cls_id)為508的edges,再針對會一起「參加學校活動(acta)」以及「參加校外活動(nacta)」的關係,分別建立好友網絡,計算network centrality的各種指標(利用套件的函數),比較這些同學在班上人際網絡的關係(如比較每個人在「學校活動」網絡與「校外活動」網絡的差異等)。

欄位說明: • std_id - student id • alter_id - alter id • cls_id - class id • acta - In school activities (1 = yes, 0 = np) • nacta - Non-school activities (1 = yes, 0 = no)

How we measure the centrality:Freeman’s Measures: Degree, closeness, and betweenness Network-level centralization: More indices: Information centrality and Eigenvector centrality Directed network centrality: HITS and PageRank algorithms Spatial network centrality: Geo-location + PageRank

#install.packages("spData")
#install.packages("NetData")
rm(list=ls(all=TRUE))
library(igraph)
library(spData)
library(NetData)
data(studentnets.peerinfl)
preVtable<- attitudes[attitudes$cls_id == "508",]
Vtable <- preVtable$std_id
Etable <- sem1[sem1$cls_id == "508",]

g1=Inside the school

Etable1 <- Etable[Etable$acta == 1,]
g1 = graph.data.frame (Etable1, directed=F, Vtable)

g2= Outside the School

Etable2 <- Etable[Etable$nacta == 1,]
g2 = graph.data.frame (Etable2, directed=TRUE, Vtable)

1.1 Freeman’s Measures: Degree

Centrality is fairly straight forward: we want to identify which nodes are in the ‘center’ of the network.

Inside the school

d1 <- degree(g1, v = V(g1), mode = c("all", "out", "in", "total"), loops = TRUE, normalized = F)
hist(d1,ylim = c(0, 15),col="gold",xlab="Central Degree", main="In-School Activity Cental Degree Network")

d1
## 14833 14843 14851 14853 14927 15003 15037 14736 14826 14986 15120 15129 14824 
##     0     2     2     0     1     5     8     7     1     7     6     3     0 
## 14866 15010 15076 15130 14739 14746 14812 14834 14953 14954 14951 14972 15014 
##     0     0     0     0     2     5     5     4     4     6     0     0     2 
## 15061 15065 15086 15177 
##     3     3     0     0
summary(d1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   2.000   2.533   4.750   8.000

Outside the School

d2 <- degree(g2, v = V(g2), mode = c("all", "out", "in", "total"), loops = TRUE, normalized = T)
hist(d2,ylim = c(0, 15),col="gold",xlab="Central Degree", main="Out-School Activity Cental Degree Network")

d2
##      14833      14843      14851      14853      14927      15003      15037 
## 0.00000000 0.03448276 0.03448276 0.00000000 0.00000000 0.03448276 0.03448276 
##      14736      14826      14986      15120      15129      14824      14866 
## 0.10344828 0.00000000 0.03448276 0.06896552 0.00000000 0.03448276 0.00000000 
##      15010      15076      15130      14739      14746      14812      14834 
## 0.00000000 0.06896552 0.00000000 0.03448276 0.03448276 0.03448276 0.06896552 
##      14953      14954      14951      14972      15014      15061      15065 
## 0.03448276 0.10344828 0.03448276 0.00000000 0.00000000 0.00000000 0.00000000 
##      15086      15177 
## 0.03448276 0.06896552
summary(d2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.03448 0.02989 0.03448 0.10345

1.2 Freeman’s Measures:closeness

#An actor is considered important if he/she is relatively close to all other actors. #Closeness is based on the inverse of the distance of each actor to every other actor in the network.

Inside the school

dist1 <- distances(g1)
sp1 <- shortest_paths(g1,from = V(g1)[2] , to = V(g1)) 
## Warning in shortest_paths(g1, from = V(g1)[2], to = V(g1)): At
## structural_properties.c:745 :Couldn't reach some vertices
#sp1$vpath
c1=closeness(g1, vids = V(g1), mode = c("out", "in", "all", "total"), weights = NULL, normalized = FALSE)
## Warning in closeness(g1, vids = V(g1), mode = c("out", "in", "all", "total"), :
## At centrality.c:2784 :closeness centrality is not well-defined for disconnected
## graphs
hist(c1,ylim = c(0, 15),col="gold",xlab="Closeness", main="In-School Activity Network Closeness ")

c1
##       14833       14843       14851       14853       14927       15003 
## 0.001149425 0.001953125 0.001926782 0.001149425 0.001845018 0.001976285 
##       15037       14736       14826       14986       15120       15129 
## 0.001960784 0.001949318 0.001912046 0.001956947 0.001945525 0.001953125 
##       14824       14866       15010       15076       15130       14739 
## 0.001149425 0.001149425 0.001149425 0.001149425 0.001149425 0.001915709 
##       14746       14812       14834       14953       14954       14951 
## 0.001324503 0.001324503 0.001960784 0.001326260 0.001326260 0.001149425 
##       14972       15014       15061       15065       15086       15177 
## 0.001149425 0.001322751 0.001923077 0.001886792 0.001149425 0.001149425
summary(c1)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001149 0.001149 0.001326 0.001544 0.001941 0.001976

Outside the School

dist2 <- distances(g2)
sp2 <- shortest_paths(g2,from = V(g2)[2] , to = V(g2)) 
## Warning in shortest_paths(g2, from = V(g2)[2], to = V(g2)): At
## structural_properties.c:745 :Couldn't reach some vertices
#sp1$vpath
c2=closeness(g2, vids = V(g2), mode = c("out", "in", "all", "total"), weights = NULL, normalized = FALSE)
## Warning in closeness(g2, vids = V(g2), mode = c("out", "in", "all", "total"), :
## At centrality.c:2784 :closeness centrality is not well-defined for disconnected
## graphs
hist(c2,ylim = c(0, 25),col="gold",xlab="Closeness", main="Out-School Activity Network Closeness ")

c2
##       14833       14843       14851       14853       14927       15003 
## 0.001149425 0.001189061 0.001149425 0.001149425 0.001149425 0.001149425 
##       15037       14736       14826       14986       15120       15129 
## 0.001149425 0.001373626 0.001149425 0.001149425 0.001230012 0.001149425 
##       14824       14866       15010       15076       15130       14739 
## 0.001149425 0.001149425 0.001149425 0.001231527 0.001149425 0.001149425 
##       14746       14812       14834       14953       14954       14951 
## 0.001149425 0.001149425 0.001189061 0.001149425 0.001277139 0.001149425 
##       14972       15014       15061       15065       15086       15177 
## 0.001149425 0.001149425 0.001149425 0.001149425 0.001149425 0.001231527
summary(c2)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001149 0.001149 0.001149 0.001172 0.001149 0.001374

1.3 Freeman’s Measures:betweenness

#A person who lies on communication paths can control communication flow, and is thus important. #Betweenness counts the number of shortest paths between i and k that actor j resides on.

Inside the school

vb1<- betweenness(g1, v = V(g1), directed = TRUE, weights = NULL, nobigint = TRUE, normalized = FALSE)
hist(vb1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Betweenness ")

vb1
##      14833      14843      14851      14853      14927      15003      15037 
##  0.0000000 30.0000000  0.0000000  0.0000000  0.0000000 41.5000000 16.6666667 
##      14736      14826      14986      15120      15129      14824      14866 
##  5.3333333  0.0000000 20.5000000  1.0000000  6.6666667  0.0000000  0.0000000 
##      15010      15076      15130      14739      14746      14812      14834 
##  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000 15.3333333 
##      14953      14954      14951      14972      15014      15061      15065 
##  0.6666667  1.3333333  0.0000000  0.0000000  0.0000000 22.0000000 12.0000000 
##      15086      15177 
##  0.0000000  0.0000000
summary(vb1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   5.767   6.333  41.500

Outside the School

vb2<- betweenness(g2, v = V(g2), directed = TRUE, weights = NULL, nobigint = TRUE, normalized = FALSE)
hist(vb2,ylim = c(0, 30),col="gold",xlab="Closeness", main="In-School Activity Network Betweenness ")

vb2
## 14833 14843 14851 14853 14927 15003 15037 14736 14826 14986 15120 15129 14824 
##     0     0     0     0     0     0     0     0     0     0     2     0     0 
## 14866 15010 15076 15130 14739 14746 14812 14834 14953 14954 14951 14972 15014 
##     0     0     0     0     0     0     0     2     0     0     0     0     0 
## 15061 15065 15086 15177 
##     0     0     0     0
summary(vb2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1333  0.0000  2.0000

1.4 Network-Level Degree Centralization

此指標將centralization視為是actors之間的分散程度(dispersion)或異質程度(heterogeneity)

Inside the school

centr_degree(g1)
## $res
##  [1] 0 2 2 0 1 5 8 7 1 7 6 3 0 0 0 0 0 2 5 5 4 4 6 0 0 2 3 3 0 0
## 
## $centralization
## [1] 0.1885057
## 
## $theoretical_max
## [1] 870

Inside the school

centr_degree(g2)
## $res
##  [1] 0 1 1 0 0 1 1 3 0 1 2 0 1 0 0 2 0 1 1 1 2 1 3 1 0 0 0 0 1 2
## 
## $centralization
## [1] 0.03804994
## 
## $theoretical_max
## [1] 1682

1.5 Eigenvector centrality

The corresponding entry of the principal eigenvector of the adjacency matrix of the network.It assigns relative scores to all nodes in the network based on the principle that connections to high-scoring nodes contribute more.

Inside the school

ec <- eigen_centrality(g1, directed = FALSE, scale = TRUE,weights = NULL, options = arpack_defaults) 
ec1 <- ec$vector
hist(ec1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Eigenvector centrality ")

ec1
##        14833        14843        14851        14853        14927        15003 
## 0.000000e+00 5.369102e-02 3.413434e-01 0.000000e+00 6.688384e-04 2.975721e-01 
##        15037        14736        14826        14986        15120        15129 
## 9.597872e-01 1.000000e+00 1.366099e-01 7.843313e-01 9.559067e-01 2.402684e-01 
##        14824        14866        15010        15076        15130        14739 
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 7.470779e-02 
##        14746        14812        14834        14953        14954        14951 
## 5.400702e-17 2.700351e-17 2.144634e-01 5.400702e-17 0.000000e+00 0.000000e+00 
##        14972        15014        15061        15065        15086        15177 
## 0.000000e+00 0.000000e+00 1.068924e-02 3.840064e-03 0.000000e+00 0.000000e+00
summary(ec1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1691  0.1950  1.0000

Outside the School

ec <- eigen_centrality(g2, directed = FALSE, scale = TRUE,weights = NULL, options = arpack_defaults) 
ec1 <- ec$vector
hist(ec1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Eigenvector centrality ")

ec1
##        14833        14843        14851        14853        14927        15003 
## 4.614386e-17 0.000000e+00 0.000000e+00 4.614386e-17 4.614386e-17 0.000000e+00 
##        15037        14736        14826        14986        15120        15129 
## 5.257311e-01 1.000000e+00 4.614386e-17 5.257311e-01 8.506508e-01 4.614386e-17 
##        14824        14866        15010        15076        15130        14739 
## 0.000000e+00 4.614386e-17 4.614386e-17 0.000000e+00 4.614386e-17 3.249197e-01 
##        14746        14812        14834        14953        14954        14951 
## 0.000000e+00 0.000000e+00 6.180340e-01 0.000000e+00 0.000000e+00 0.000000e+00 
##        14972        15014        15061        15065        15086        15177 
## 4.614386e-17 4.614386e-17 4.614386e-17 4.614386e-17 0.000000e+00 0.000000e+00
summary(ec1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1282  0.0000  1.0000

Conclustion of Q 5-1

The centrality is higher in class 508 in “in-school” activities rather than “out-of-school” activities.

Appendix
The Edges of The Network of Class 508 In-School Activities
plot(g1, main="The Network of Class 508 In-School Activities")

g1
## IGRAPH 6bf2af4 UN-- 30 38 -- 
## + attr: name (v/c), timea (e/n), ona (e/n), oca (e/n), acta (e/n),
## | nacta (e/n), wrka (e/n), nbra (e/n), otha (e/n), wkna (e/n), bfa
## | (e/n), lva (e/n), cls_id (e/n)
## + edges from 6bf2af4 (vertex names):
##  [1] 14851--14736 14736--14986 15037--14736 14736--15120 14739--14834
##  [6] 14746--14812 14746--14953 14746--14954 14746--14812 14812--14953
## [11] 14812--14954 14739--14834 14851--15037 14746--14954 14812--14954
## [16] 14953--14954 14736--14986 14826--14986 14986--14834 14986--15120
## [21] 14986--15129 15003--14834 14843--15003 15003--15037 15003--15129
## [26] 14953--15014 14954--15014 15037--14736 15037--15037 15037--15120
## + ... omitted several edges
The Edges of The Network of Class 508 Out-of-School Activities
plot(g2,main="The Network of Class 508 Out-of-School Activities")

g2
## IGRAPH 8fdfbca DN-- 30 13 -- 
## + attr: name (v/c), timea (e/n), ona (e/n), oca (e/n), acta (e/n),
## | nacta (e/n), wrka (e/n), nbra (e/n), otha (e/n), wkna (e/n), bfa
## | (e/n), lva (e/n), cls_id (e/n)
## + edges from 8fdfbca (vertex names):
##  [1] 14736->14986 14736->15037 14736->15120 14834->14739 14843->15003
##  [6] 14954->14746 14954->14812 14954->14953 15076->14824 15076->14851
## [11] 15120->14834 15177->14951 15177->15086
Raw Data
head(attitudes)
##   std_id sem_id cls_id tlks tlkt like_c imp egrd jgrd sub tot frn cmt tch voln
## 1  16144      1    851    4    2     NA  NA    2   -1   2   2   3   2   2   NA
## 2  16181      1    851    4    2      2  NA    3    0   2   3   4   3   2   NA
## 3  16247      1    851    4    4      2  NA    4    0   2   2   4   3   2   NA
## 4  16399      1    851    4    2      3  NA    4    1   3   2   3   1   2   NA
## 5  15469      1    901    2    2      3  NA    2    0   3   4   4   4   4   NA
## 6  15947      1    901    3    2      3  NA    3    0   3   4   4   3   4   NA
##   call misb sanc sftch tfstd tfros chal prev
## 1   NA   NA   NA    NA    NA    NA   NA    0
## 2   NA   NA   NA    NA    NA    NA    3    0
## 3   NA   NA   NA    NA    NA    NA    2    0
## 4   NA   NA   NA    NA    NA    NA    2    0
## 5   NA   NA   NA    NA    NA    NA    1    0
## 6   NA   NA   NA    NA    NA    NA    3    0
head(sem1)
##   std_id alter_id timea ona oca acta nacta wrka nbra otha wkna bfa lva cls_id
## 2 149824   119516     5   1   0    0     0    0    0    0    0   0   0    251
## 3 149824   122634     1   1   0    0     0    0    0    0    0   0   0    251
## 4 149824   114679     1   0   1    0     0    0    0    0    0   0   0    251
## 7  16868    17142     0   1   0    1     0    0    0    0    0   0   0    851
## 8  16877    16681     1   0   1    1     0    0    0    0    0   0   0    851
## 9  16877    16769     1   0   1    1     0    0    0    0    0   0   0    851

Q5-2

演算法實作(不可用套件的函數) 建立自訂函數 HITS(“矩陣名稱”),提供讀取矩陣名稱的引數,自行撰寫HITS的演算流程,並以data frame方式,輸出每個節點的 authority, and hub scores,並與igraph提供的函數計算結果進行比對。

自訂函數HITS ( ) 後,請放入一個鄰近矩陣(不限,也可以用上課提供的資料),來觀看HITS運算結果。並將鄰近矩陣轉換成network,放入igraph提供的函數來進行比較。

rm(list=ls(all=TRUE))
setwd("~/Desktop/109-2 碩班/109-2 Network  Models/week5 centrality/Lab1")
edges <- read.table("sample_edgelist.csv", header=T, sep=",")
nodes <- read.table("sample_node.csv", header=T, sep=",")
#directed=TRUE 表示有方向性
g1 = graph.data.frame (edges, directed=TRUE, nodes)

Input data : Adjancency Matrix

#transform from edge list to full matrix(adjacency)
m <- as_adjacency_matrix(g1,type="both",names=TRUE,sparse=F)
m
##       23732 23778 23871 58009 58098 23824 58256
## 23732     0     1     1     0     1     0     0
## 23778     0     0     1     0     1     1     0
## 23871     0     0     0     1     1     0     0
## 58009     0     0     0     0     1     0     0
## 58098     0     0     0     0     0     0     1
## 23824     0     0     0     0     0     0     0
## 58256     0     0     0     0     0     0     0
g1
## IGRAPH 377f15f DNW- 7 10 -- 
## + attr: name (v/c), sex (v/n), age (v/n), weight (e/n)
## + edges from 377f15f (vertex names):
##  [1] 23732->23778 23732->23871 23732->58098 23778->23824 23778->23871
##  [6] 23778->58098 23871->58009 23871->58098 58009->58098 58098->58256

2.1 Result of self-written HITS Model

HITS(g1)
##    node authority.score    hub.score
## 1 23732    0.000000e+00 3.206128e-01
## 2 23778    1.293941e-01 3.206128e-01
## 3 23871    2.587882e-01 1.953496e-01
## 4 58009    7.883992e-02 1.634247e-01
## 5 58098    4.035837e-01 3.552561e-79
## 6 23824    1.293941e-01 0.000000e+00
## 7 58256    8.773189e-79 0.000000e+00

2.2 Using R package functions for verification

au <- authority_score(g1, scale = TRUE, weights = NULL, options = arpack_defaults)
au <- au$vector
hub <- hub_score(g1, scale = TRUE, weights = NULL, options = arpack_defaults)
hub <- hub$vector
data.frame("node"= names(V(g1)) ,"authority score" = au,"hub score" = hub)
##        node authority.score    hub.score
## 23732 23732       0.0000000 2.562966e-01
## 23778 23778       0.1206043 4.094185e-01
## 23871 23871       0.2844410 1.000000e+00
## 58009 58009       0.7529046 2.852408e-01
## 58098 58098       1.0000000 4.051664e-17
## 23824 23824       0.2697215 3.236710e-17
## 58256 58256       0.0000000 3.236710e-17