Network_HW5_ Centrality (Individual-level)
r09228001 臺大地理所碩一 楊宇翔 2021/4/07 (Wed.)
Q5-1 網絡分析實作 利用 data(studentnets.peerinfl, package=“NetData”) 提供的資料表(https://www.rdocumentation.org/packages/NetData/versions/0.3/topics/peerinfl),檢視 sem1, sem2資料表的欄位std_id / alter_id,表示某班級兩學期的同學友誼的指認關係。
• 作業5-1提供data(studentnets.peerinfl) 的sem1,該表格為某校在某學期的各班同學指認不同類型的朋友關係的調查。
• 請先篩選出班級(cls_id)為508的edges,再針對會一起「參加學校活動(acta)」以及「參加校外活動(nacta)」的關係,分別建立好友網絡,計算network centrality的各種指標(利用套件的函數),比較這些同學在班上人際網絡的關係(如比較每個人在「學校活動」網絡與「校外活動」網絡的差異等)。
欄位說明: • std_id - student id • alter_id - alter id • cls_id - class id • acta - In school activities (1 = yes, 0 = np) • nacta - Non-school activities (1 = yes, 0 = no)
How we measure the centrality:Freeman’s Measures: Degree, closeness, and betweenness Network-level centralization: More indices: Information centrality and Eigenvector centrality Directed network centrality: HITS and PageRank algorithms Spatial network centrality: Geo-location + PageRank
#install.packages("spData")
#install.packages("NetData")
rm(list=ls(all=TRUE))
library(igraph)
library(spData)
library(NetData)
data(studentnets.peerinfl)
preVtable<- attitudes[attitudes$cls_id == "508",]
Vtable <- preVtable$std_id
Etable <- sem1[sem1$cls_id == "508",]
Etable1 <- Etable[Etable$acta == 1,]
g1 = graph.data.frame (Etable1, directed=F, Vtable)
Etable2 <- Etable[Etable$nacta == 1,]
g2 = graph.data.frame (Etable2, directed=TRUE, Vtable)
d1 <- degree(g1, v = V(g1), mode = c("all", "out", "in", "total"), loops = TRUE, normalized = F)
hist(d1,ylim = c(0, 15),col="gold",xlab="Central Degree", main="In-School Activity Cental Degree Network")
d1
## 14833 14843 14851 14853 14927 15003 15037 14736 14826 14986 15120 15129 14824
## 0 2 2 0 1 5 8 7 1 7 6 3 0
## 14866 15010 15076 15130 14739 14746 14812 14834 14953 14954 14951 14972 15014
## 0 0 0 0 2 5 5 4 4 6 0 0 2
## 15061 15065 15086 15177
## 3 3 0 0
summary(d1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.000 2.533 4.750 8.000
d2 <- degree(g2, v = V(g2), mode = c("all", "out", "in", "total"), loops = TRUE, normalized = T)
hist(d2,ylim = c(0, 15),col="gold",xlab="Central Degree", main="Out-School Activity Cental Degree Network")
d2
## 14833 14843 14851 14853 14927 15003 15037
## 0.00000000 0.03448276 0.03448276 0.00000000 0.00000000 0.03448276 0.03448276
## 14736 14826 14986 15120 15129 14824 14866
## 0.10344828 0.00000000 0.03448276 0.06896552 0.00000000 0.03448276 0.00000000
## 15010 15076 15130 14739 14746 14812 14834
## 0.00000000 0.06896552 0.00000000 0.03448276 0.03448276 0.03448276 0.06896552
## 14953 14954 14951 14972 15014 15061 15065
## 0.03448276 0.10344828 0.03448276 0.00000000 0.00000000 0.00000000 0.00000000
## 15086 15177
## 0.03448276 0.06896552
summary(d2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.03448 0.02989 0.03448 0.10345
#An actor is considered important if he/she is relatively close to all other actors. #Closeness is based on the inverse of the distance of each actor to every other actor in the network.
dist1 <- distances(g1)
sp1 <- shortest_paths(g1,from = V(g1)[2] , to = V(g1))
## Warning in shortest_paths(g1, from = V(g1)[2], to = V(g1)): At
## structural_properties.c:745 :Couldn't reach some vertices
#sp1$vpath
c1=closeness(g1, vids = V(g1), mode = c("out", "in", "all", "total"), weights = NULL, normalized = FALSE)
## Warning in closeness(g1, vids = V(g1), mode = c("out", "in", "all", "total"), :
## At centrality.c:2784 :closeness centrality is not well-defined for disconnected
## graphs
hist(c1,ylim = c(0, 15),col="gold",xlab="Closeness", main="In-School Activity Network Closeness ")
c1
## 14833 14843 14851 14853 14927 15003
## 0.001149425 0.001953125 0.001926782 0.001149425 0.001845018 0.001976285
## 15037 14736 14826 14986 15120 15129
## 0.001960784 0.001949318 0.001912046 0.001956947 0.001945525 0.001953125
## 14824 14866 15010 15076 15130 14739
## 0.001149425 0.001149425 0.001149425 0.001149425 0.001149425 0.001915709
## 14746 14812 14834 14953 14954 14951
## 0.001324503 0.001324503 0.001960784 0.001326260 0.001326260 0.001149425
## 14972 15014 15061 15065 15086 15177
## 0.001149425 0.001322751 0.001923077 0.001886792 0.001149425 0.001149425
summary(c1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001149 0.001149 0.001326 0.001544 0.001941 0.001976
dist2 <- distances(g2)
sp2 <- shortest_paths(g2,from = V(g2)[2] , to = V(g2))
## Warning in shortest_paths(g2, from = V(g2)[2], to = V(g2)): At
## structural_properties.c:745 :Couldn't reach some vertices
#sp1$vpath
c2=closeness(g2, vids = V(g2), mode = c("out", "in", "all", "total"), weights = NULL, normalized = FALSE)
## Warning in closeness(g2, vids = V(g2), mode = c("out", "in", "all", "total"), :
## At centrality.c:2784 :closeness centrality is not well-defined for disconnected
## graphs
hist(c2,ylim = c(0, 25),col="gold",xlab="Closeness", main="Out-School Activity Network Closeness ")
c2
## 14833 14843 14851 14853 14927 15003
## 0.001149425 0.001189061 0.001149425 0.001149425 0.001149425 0.001149425
## 15037 14736 14826 14986 15120 15129
## 0.001149425 0.001373626 0.001149425 0.001149425 0.001230012 0.001149425
## 14824 14866 15010 15076 15130 14739
## 0.001149425 0.001149425 0.001149425 0.001231527 0.001149425 0.001149425
## 14746 14812 14834 14953 14954 14951
## 0.001149425 0.001149425 0.001189061 0.001149425 0.001277139 0.001149425
## 14972 15014 15061 15065 15086 15177
## 0.001149425 0.001149425 0.001149425 0.001149425 0.001149425 0.001231527
summary(c2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001149 0.001149 0.001149 0.001172 0.001149 0.001374
#A person who lies on communication paths can control communication flow, and is thus important. #Betweenness counts the number of shortest paths between i and k that actor j resides on.
vb1<- betweenness(g1, v = V(g1), directed = TRUE, weights = NULL, nobigint = TRUE, normalized = FALSE)
hist(vb1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Betweenness ")
vb1
## 14833 14843 14851 14853 14927 15003 15037
## 0.0000000 30.0000000 0.0000000 0.0000000 0.0000000 41.5000000 16.6666667
## 14736 14826 14986 15120 15129 14824 14866
## 5.3333333 0.0000000 20.5000000 1.0000000 6.6666667 0.0000000 0.0000000
## 15010 15076 15130 14739 14746 14812 14834
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 15.3333333
## 14953 14954 14951 14972 15014 15061 15065
## 0.6666667 1.3333333 0.0000000 0.0000000 0.0000000 22.0000000 12.0000000
## 15086 15177
## 0.0000000 0.0000000
summary(vb1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 5.767 6.333 41.500
vb2<- betweenness(g2, v = V(g2), directed = TRUE, weights = NULL, nobigint = TRUE, normalized = FALSE)
hist(vb2,ylim = c(0, 30),col="gold",xlab="Closeness", main="In-School Activity Network Betweenness ")
vb2
## 14833 14843 14851 14853 14927 15003 15037 14736 14826 14986 15120 15129 14824
## 0 0 0 0 0 0 0 0 0 0 2 0 0
## 14866 15010 15076 15130 14739 14746 14812 14834 14953 14954 14951 14972 15014
## 0 0 0 0 0 0 0 2 0 0 0 0 0
## 15061 15065 15086 15177
## 0 0 0 0
summary(vb2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1333 0.0000 2.0000
此指標將centralization視為是actors之間的分散程度(dispersion)或異質程度(heterogeneity)
centr_degree(g1)
## $res
## [1] 0 2 2 0 1 5 8 7 1 7 6 3 0 0 0 0 0 2 5 5 4 4 6 0 0 2 3 3 0 0
##
## $centralization
## [1] 0.1885057
##
## $theoretical_max
## [1] 870
centr_degree(g2)
## $res
## [1] 0 1 1 0 0 1 1 3 0 1 2 0 1 0 0 2 0 1 1 1 2 1 3 1 0 0 0 0 1 2
##
## $centralization
## [1] 0.03804994
##
## $theoretical_max
## [1] 1682
The corresponding entry of the principal eigenvector of the adjacency matrix of the network.It assigns relative scores to all nodes in the network based on the principle that connections to high-scoring nodes contribute more.
ec <- eigen_centrality(g1, directed = FALSE, scale = TRUE,weights = NULL, options = arpack_defaults)
ec1 <- ec$vector
hist(ec1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Eigenvector centrality ")
ec1
## 14833 14843 14851 14853 14927 15003
## 0.000000e+00 5.369102e-02 3.413434e-01 0.000000e+00 6.688384e-04 2.975721e-01
## 15037 14736 14826 14986 15120 15129
## 9.597872e-01 1.000000e+00 1.366099e-01 7.843313e-01 9.559067e-01 2.402684e-01
## 14824 14866 15010 15076 15130 14739
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 7.470779e-02
## 14746 14812 14834 14953 14954 14951
## 5.400702e-17 2.700351e-17 2.144634e-01 5.400702e-17 0.000000e+00 0.000000e+00
## 14972 15014 15061 15065 15086 15177
## 0.000000e+00 0.000000e+00 1.068924e-02 3.840064e-03 0.000000e+00 0.000000e+00
summary(ec1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1691 0.1950 1.0000
ec <- eigen_centrality(g2, directed = FALSE, scale = TRUE,weights = NULL, options = arpack_defaults)
ec1 <- ec$vector
hist(ec1,ylim = c(0, 25),col="gold",xlab="Closeness", main="In-School Activity Network Eigenvector centrality ")
ec1
## 14833 14843 14851 14853 14927 15003
## 4.614386e-17 0.000000e+00 0.000000e+00 4.614386e-17 4.614386e-17 0.000000e+00
## 15037 14736 14826 14986 15120 15129
## 5.257311e-01 1.000000e+00 4.614386e-17 5.257311e-01 8.506508e-01 4.614386e-17
## 14824 14866 15010 15076 15130 14739
## 0.000000e+00 4.614386e-17 4.614386e-17 0.000000e+00 4.614386e-17 3.249197e-01
## 14746 14812 14834 14953 14954 14951
## 0.000000e+00 0.000000e+00 6.180340e-01 0.000000e+00 0.000000e+00 0.000000e+00
## 14972 15014 15061 15065 15086 15177
## 4.614386e-17 4.614386e-17 4.614386e-17 4.614386e-17 0.000000e+00 0.000000e+00
summary(ec1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1282 0.0000 1.0000
plot(g1, main="The Network of Class 508 In-School Activities")
g1
## IGRAPH 6bf2af4 UN-- 30 38 --
## + attr: name (v/c), timea (e/n), ona (e/n), oca (e/n), acta (e/n),
## | nacta (e/n), wrka (e/n), nbra (e/n), otha (e/n), wkna (e/n), bfa
## | (e/n), lva (e/n), cls_id (e/n)
## + edges from 6bf2af4 (vertex names):
## [1] 14851--14736 14736--14986 15037--14736 14736--15120 14739--14834
## [6] 14746--14812 14746--14953 14746--14954 14746--14812 14812--14953
## [11] 14812--14954 14739--14834 14851--15037 14746--14954 14812--14954
## [16] 14953--14954 14736--14986 14826--14986 14986--14834 14986--15120
## [21] 14986--15129 15003--14834 14843--15003 15003--15037 15003--15129
## [26] 14953--15014 14954--15014 15037--14736 15037--15037 15037--15120
## + ... omitted several edges
plot(g2,main="The Network of Class 508 Out-of-School Activities")
g2
## IGRAPH 8fdfbca DN-- 30 13 --
## + attr: name (v/c), timea (e/n), ona (e/n), oca (e/n), acta (e/n),
## | nacta (e/n), wrka (e/n), nbra (e/n), otha (e/n), wkna (e/n), bfa
## | (e/n), lva (e/n), cls_id (e/n)
## + edges from 8fdfbca (vertex names):
## [1] 14736->14986 14736->15037 14736->15120 14834->14739 14843->15003
## [6] 14954->14746 14954->14812 14954->14953 15076->14824 15076->14851
## [11] 15120->14834 15177->14951 15177->15086
head(attitudes)
## std_id sem_id cls_id tlks tlkt like_c imp egrd jgrd sub tot frn cmt tch voln
## 1 16144 1 851 4 2 NA NA 2 -1 2 2 3 2 2 NA
## 2 16181 1 851 4 2 2 NA 3 0 2 3 4 3 2 NA
## 3 16247 1 851 4 4 2 NA 4 0 2 2 4 3 2 NA
## 4 16399 1 851 4 2 3 NA 4 1 3 2 3 1 2 NA
## 5 15469 1 901 2 2 3 NA 2 0 3 4 4 4 4 NA
## 6 15947 1 901 3 2 3 NA 3 0 3 4 4 3 4 NA
## call misb sanc sftch tfstd tfros chal prev
## 1 NA NA NA NA NA NA NA 0
## 2 NA NA NA NA NA NA 3 0
## 3 NA NA NA NA NA NA 2 0
## 4 NA NA NA NA NA NA 2 0
## 5 NA NA NA NA NA NA 1 0
## 6 NA NA NA NA NA NA 3 0
head(sem1)
## std_id alter_id timea ona oca acta nacta wrka nbra otha wkna bfa lva cls_id
## 2 149824 119516 5 1 0 0 0 0 0 0 0 0 0 251
## 3 149824 122634 1 1 0 0 0 0 0 0 0 0 0 251
## 4 149824 114679 1 0 1 0 0 0 0 0 0 0 0 251
## 7 16868 17142 0 1 0 1 0 0 0 0 0 0 0 851
## 8 16877 16681 1 0 1 1 0 0 0 0 0 0 0 851
## 9 16877 16769 1 0 1 1 0 0 0 0 0 0 0 851
rm(list=ls(all=TRUE))
setwd("~/Desktop/109-2 碩班/109-2 Network Models/week5 centrality/Lab1")
edges <- read.table("sample_edgelist.csv", header=T, sep=",")
nodes <- read.table("sample_node.csv", header=T, sep=",")
#directed=TRUE 表示有方向性
g1 = graph.data.frame (edges, directed=TRUE, nodes)
HITS <- function(g_array){
#先建立空間
hub_list <- rep(x = 1,nrow(nodes))
auth_list <- rep(x = 1,nrow(nodes))
#重複演算999次
for(count in c(1:99)){
for(id in c(1:length(V(g_array)))){
n = 0
for(eid in c( 1:length(E(g_array)))){
if (ends(g_array, E(g_array)[eid])[2] == names(V(g_array))[id] ){
n <- n + hub_list[ which(ends(g_array, E(g_array)[eid])[1] == names(V(g_array)) )]
}
}
auth_list[id] <- n
}
for(id in c(1:length(V(g_array)))){
n = 0
for(eid in c( 1:length(E(g_array)))){
if (ends(g_array, E(g_array)[eid])[1] == names(V(g_array))[id] ){
n <- n + auth_list[ which(ends(g_array, E(g_array)[eid])[2] == names(V(g_array)) )]
}
}
hub_list[id] <- n
}
auth_list <- auth_list/sum(auth_list)
hub_list <- hub_list/sum(hub_list)
}
data.frame("node"= names(V(g_array)) ,"authority score" = auth_list,"hub score" = hub_list)
}
#transform from edge list to full matrix(adjacency)
m <- as_adjacency_matrix(g1,type="both",names=TRUE,sparse=F)
m
## 23732 23778 23871 58009 58098 23824 58256
## 23732 0 1 1 0 1 0 0
## 23778 0 0 1 0 1 1 0
## 23871 0 0 0 1 1 0 0
## 58009 0 0 0 0 1 0 0
## 58098 0 0 0 0 0 0 1
## 23824 0 0 0 0 0 0 0
## 58256 0 0 0 0 0 0 0
g1
## IGRAPH 377f15f DNW- 7 10 --
## + attr: name (v/c), sex (v/n), age (v/n), weight (e/n)
## + edges from 377f15f (vertex names):
## [1] 23732->23778 23732->23871 23732->58098 23778->23824 23778->23871
## [6] 23778->58098 23871->58009 23871->58098 58009->58098 58098->58256
HITS(g1)
## node authority.score hub.score
## 1 23732 0.000000e+00 3.206128e-01
## 2 23778 1.293941e-01 3.206128e-01
## 3 23871 2.587882e-01 1.953496e-01
## 4 58009 7.883992e-02 1.634247e-01
## 5 58098 4.035837e-01 3.552561e-79
## 6 23824 1.293941e-01 0.000000e+00
## 7 58256 8.773189e-79 0.000000e+00
au <- authority_score(g1, scale = TRUE, weights = NULL, options = arpack_defaults)
au <- au$vector
hub <- hub_score(g1, scale = TRUE, weights = NULL, options = arpack_defaults)
hub <- hub$vector
data.frame("node"= names(V(g1)) ,"authority score" = au,"hub score" = hub)
## node authority.score hub.score
## 23732 23732 0.0000000 2.562966e-01
## 23778 23778 0.1206043 4.094185e-01
## 23871 23871 0.2844410 1.000000e+00
## 58009 58009 0.7529046 2.852408e-01
## 58098 58098 1.0000000 4.051664e-17
## 23824 23824 0.2697215 3.236710e-17
## 58256 58256 0.0000000 3.236710e-17