{Network Analysis Tutorial \ With Applications in \R }

Network Analysis Tutorial
With Applications in R  

Bruce A. Desmarais

University of Massachusetts Amherst

1  ^Introduction

1.1  ^This Workshop

1.2  ^Networks: A Broad Conceptual Framework

A network: units and the relationships among them

2  ^Introduction to R

2.1  ^Programming in R: First Steps

# In R, functions are executed as '<function.name>(<input>)'
# The exception is the 'print()' function, which can 
# be executed by typing the name of the object to 
# print and hitting enter
# Try
print('Hello World!')
# Now Try
'Hello World!'

2.2  ^Objects: Vectors and Matrices

# Vectors contain data of the same type
# Create a character vector
char_vec <- c('a','b','c')

# Create a numeric vector
num_vec <- numeric(5)

# Change Values
num_vec[1] <- 4
num_vec[2:4] <- c(3,2,1) 
num_vec[5] <- '5'
num_vec[-3]

# Matrices are 2-dimensional Vectors
# Create a matrix
MyMat <- matrix(1:25,nrow=5,ncol=5)

# Access (or change) a cell
MyMat[1,3] 
MyMat[2,4] <- 200 
MyMat[2,4]

# Rows then columns
MyMat[1,]
MyMat[,3] <- c(1,1,1,1,1)
MyMat[,3]

# Multiple rows/columns and negation
MyMat[1:3,-c(1:3)]

# The matrix (shortcut for network objects)
MyMat[,]

2.3  ^Objects: Lists and Data Frames

# Lists are indexed by double square brackets '[[ ]]'
# Create a list
stuff_list <- list()

# Store a number
stuff_list[[1]] <- 3

# Store a matrix
stuff_list[[2]] <- MyMat

# Store a function
stuff_list[[3]] <- sqrt

# List elements can be named using character vectors
names(stuff_list) <- c('Three','Matrix','Function')

# A Data Frame is a Special kind of list, which acts like a dataset
# Create a data frame containing numbers and a character vector
let_vec <- c('a','b','c','d','e')
dat <- data.frame(MyMat, num_vec,let_vec)
names(dat) <- c("mm1","mm2","mm3","mm4","mm5","nv","lv")

# Variables/list elements can be accessed with '$'
dat$lv

2.4  ^R Packages

# Use install.packages() to install
# library() or require() to use the package
install.packages('statnet') # - suite of great network analysis packages
install.packages('igraph') # - other great network analysis package
library(statnet)
 

# R is OPEN SOURCE,  SO LOOK AT THE CODE !!!
degree

# And give credit to the authors
citation('statnet')

2.5  ^Interactions with the Hard Drive

# Setting the working directory
setwd("~/Dropbox/professional/Teaching/Current/RIntro/")

# Saving dat as a .csv
write.csv(dat,'dat.csv', row.names=F)

# Loading it as such
dat2 <- read.csv('dat.csv',stringsAsFactors=F)

# Save and load an R-format list
dput(stuff_list,'stuff_list.txt')
stuff_list2 <- dget('stuff_list.txt')

# Save to RData file and load
save(list=c("stuff_list","dat"),file="StuffNDat.RData")
load("StuffNDat.RData")

# Save and load it all
save.image('ALL.RData')
load('ALL.RData')
# Beware save.image()!!

2.6  ^R can help

# When you know the function name exactly
help("network")
# or
?network

# A substring of the function name is known
apropos('etwo')

# Find help files containing a string
help.search("network")

# Browsable Help
help.start()

R help files contain

2.7  ^Use R for graphics

# Initialize the plot
# first column of MyMat on x-axis
# second on y-axis
plot(MyMat[,1],MyMat[,2],pch=1:5,cex=3,col=1:5) 

# Draw a line
lines(MyMat[,1],MyMat[,2], lty =2,  col="grey45",lwd=3)

# Re-write the points
points(MyMat[,1],MyMat[,2],pch=1:5,cex=3, col=1:5)

# add a legend
legend("topleft", legend = c("Circle", "Triangle", "Plus", "Times", "Diamond"), col=1:5,pch=1:5)

# Check out all the other options
?par

# Save it this time (as a PDF)
pdf('myfirstplot.pdf')
plot(MyMat[,1],MyMat[,2],pch=1:5,cex=3,col=1:5)
lines(MyMat[,1],MyMat[,2], lty =2,  ,col="grey45",lwd=3)
points(MyMat[,1],MyMat[,2],pch=1:5,cex=3,col=1:5)
legend("topleft", legend = c("Circle", "Triangle", "Plus", "Times", "Diamond"), col=1:5,pch=1:5)
dev.off()

2.8  ^Use Loops

# loop to transpose MyMat # nrow() and ncol() count rows, cols respectively ncol(MyMat) nrow(MyMat)
tMyMat <- matrix(NA,ncol(MyMat),nrow(MyMat))
# Loop over a vector using the variable i
for(i in 1:nrow(MyMat)){
   # Loop over a vector indexing by j
      for(j in 1:ncol(MyMat)){
         tMyMat[j,i] <- MyMat[i,j]
         }
    }



# This time using while loop
 ttMyMat <- matrix(0,nrow(MyMat),ncol(MyMat))

# i variable to iterate up
 i <- 1
# Go until the outer loop is done
 while(i <= nrow(tMyMat)){
    # j variable to iterate up
    j <- 1
    while(j <= ncol(tMyMat)){
       ttMyMat[j,i] <- tMyMat[i,j]
       j <- j + 1
     }
    i <- i + 1
  }
	

tMyMat
ttMyMat
MyMat

2.8  ^ Vectorized math in R

MyMat[,1]*3
MyMat + 2
MyMat[,5]*MyMat[,3]
MyMat[,1:4]+MyMat[,5]
sd(MyMat)
sqrt(MyMat)

3  ^Introduction to Networks

3.1  ^Network Terminology and the Basics

3.2  ^Network and Network Data Types

3.3  ^Network Data

# Getting Network Data into R - flexible use of CSV
# Read in vertex dataset
allyV <- read.csv("allyVLD.csv",stringsAsFactors=F)

# Read in edgelist
allyEL <- read.csv("allyEL.csv", stringsAsFactors=F)

# Read in contiguity
contig <- read.csv("contiguity.csv",stringsAsFactors=F,row.names=1)

3.4  ^ Creating Network Objects: Defense Pacts (2000)

# Using the network package
require(network)
# (1) Initialize network
AllyNet <- network.initialize(nrow(allyV),dir=F)

# (2) Set vertex labels
network.vertex.names(AllyNet)  <- allyV$stateabb

# (3) Add in the edges
# Note, edgelist must match vertex labels
AllyNet[as.matrix(allyEL)]  <- 1

# (4) Store country code attribute
set.vertex.attribute(AllyNet,"ccode",allyV$ccode)

# (5) Store year attribute
set.vertex.attribute(AllyNet,"created",allyV$styear)

# (6) Store network attribute
set.network.attribute(AllyNet,"contiguous",as.matrix(contig))

# Simple plot
plot(AllyNet,displaylabels=T,label.cex=.5)
# check out all the options with ?plot.network

3.5  ^Connectedness: Degree Centrality

require(sna)
# Degree Centrality is the number of connections by node
dc <- degree(AllyNet)

# Get ordered labels
dclab <- network.vertex.names(AllyNet)[order(dc)]

# Sort degree centrality
dcs <- sort(dc)
# plot
plot(1:length(dc),dcs,type="n")
text(1:length(dc),dcs,lab=dclab,adj=c(1,.05), cex=.35,srt=90)

3.6  ^Connectedness: Eigenvector Centrality

x = λ-1Ax
# Eigenvector Centrality Recursively Considers Neighbors' Centrality
ec <- evcent(symmetrize(AllyNet))

# Get ordered labels
eclab <- network.vertex.names(AllyNet)[order(ec)]

# Sort eigenvector centrality
ecs <- sort(ec)
# plot
plot(1:length(ec),ecs,type="n")
text(1:length(ec),ecs,lab=eclab,adj=c(1,.05), cex=.35,srt=90)


3.7  ^Connectedness: Betweenness Centrality

# Betweenness Centrality Considers unlikely connections
bc <- betweenness(AllyNet)

# Get ordered labels
bclab <- network.vertex.names(AllyNet)[order(bc)]

# Sort degree centrality
bcs <- sort(bc)
# plot
plot(1:length(bc),bcs,type="n")
text(1:length(bc),bcs,lab=eclab,adj=c(1,.05), cex=.35,srt=90)

3.8  ^Comparing Centrality Measures

# DC vs. EC
plot(dc,ec,type="n")
text(dc,ec,lab=network.vertex.names(AllyNet), cex=.75)

# DC vs. BC
plot(dc,bc,type="n")
text(dc,bc,lab=network.vertex.names(AllyNet), cex=.75)

# BC vs. EC
plot(bc,ec,type="n")
text(bc,ec,lab=network.vertex.names(AllyNet), cex=.75)

3.9  ^Degree Distribution

# Histogram of the degree centrality over nodes
hist(dc, col="grey60")

Degree distributions often exhibit power laws, meaning
f(x) ∝ x−α
"scale free" networks exhibit power law degree distributions.
# Read in Igraph
library(igraph)

# Find the alpha that fits
alpha <- power.law.fit(dc+1)$alpha


# Approximate normalizing constant
 C <- 1/sum((1:10000)^-alpha)

# Plot to compare
# log-scale density plot should be on a straight line
 plot(sort(dc+1), (sort(dc)+2)^-alpha*C, xlab="X", ylab="f(x)",type="l",col="red",lwd=3, log="xy")

# Remove igraph to restore network functions
 detach("package:igraph")

3.10  ^Degrees of Separation - Geodesic Distance

# Get geodesic distance matrix 
AllyGD <- geodist(AllyNet)$gdist
AllyGD <- geodist(AllyNet, inf.replace = 200)$gdist

# Return a vector that is the average of each row of 
AvDist <- apply(AllyGD,1,mean)

# Compare Average Distance with Degree
plot(rank(AvDist),degree(AllyNet),col="blue", pch="+", cex=1.5)

# Plot network within 2 degrees
plot(network(AllyGD == 2,dir=F), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75,displayisolates=F)


3.11  ^Position and Role: Structural Equivalence

# Get Hamming Distance SeQ Matrix
AllySE <- sedist(AllyNet, mode="graph")

# Plot network within 5
plot(network(AllySE <= 5,dir=F), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.5, displayisolates=F)

# Cluster based on SeQ
 plot(equiv.clust(AllyNet), lab=network.vertex.names(AllyNet), hang=.01, cex=.5)

3.12  ^Birds of a Feather: Homophily/Assortative Mixing

# Do states established at the same time ally?
# Get creation date
year <- get.vertex.attribute(AllyNet,"created")

# Compute distance
yrdist <- as.matrix(dist(year,diag=0, upper=T))

# Compare alliance networks based on large or small differences
# Far apart
plot(network((yrdist > 50)*AllyNet,dir=F), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, displayisolates=F)

# Close
plot(network((yrdist < 50)*AllyNet,dir=F), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, displayisolates=F)

4  ^Network Visualization

4.1  ^Positioning Nodes

# The gplot argument 'mode' offers many options
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", label.cex=.75, displayisolates=F, mode="circle")

# Save Coordinates
coords <- gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", label.cex=.75, displayisolates=F)

# Always save coordinates of published plots
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", label.cex=.75, displayisolates=F,coord=coords )

# Plot According to Country code and year
ccode <- get.vertex.attribute(AllyNet,"ccode")
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", label.cex=.75, displayisolates=F,coord=cbind(year,ccode))


4.2  ^Node Color, Size and Shape

# vertex.col, vertex.sides, vertex.cex
# Color Gradient according to year established
library(colorRamps)
grad <- blue2red(length(year))
vcol <- grad[rank(year)]
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", vertex.col=vcol,label.cex=.75, displayisolates=F,coord=coords )

# Size Nodes According to the k-cores (max value cores)
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", vertex.col=vcol,label.cex=.75, displayisolates=F,coord=coords, vertex.cex=kcores(AllyNet)/20 )

# Shape According to betweenness
bcSides <- 4 - (bc>0)
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col="grey55", vertex.col=vcol,label.cex=.75, displayisolates=F,coord=coords, vertex.cex=kcores(AllyNet)/20, vertex.sides= bcSides)


4.3  ^Edge Features

# edge.col, edge.lwd, edge.lty
# Color Edges according to contiguity
ecol <- ifelse(as.matrix(contig),"black","gray75")
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col=ecol,label.cex=.75, displayisolates=F,coord=coords )

# Edge Size according to SE Distance
gplot(AllyNet,gmode="graph", label = network.vertex.names(AllyNet), edge.col=ecol,label.cex=.75,edge.lwd = sedist(AllyNet)^4/200^3,displayisolates=F,coord=coords )

5  ^Communities and Clusters in Networks

5.1  ^Clustering by Structural Equivalence

# Blockmodeling is the Classical SNA Approach
# Goal is to group nodes based on structural equivalence
# Run a block model, identifying the number of clusters 
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=6)

# Create Vector for block membership, and fill
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow","green")

# Take a look at them
plot(network(AllyNet), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, displayisolates=F, vertex.col=bcols[bmem])

# Try it with 8 communities
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=8)
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green")
plot(network(AllyNet), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, displayisolates=F, vertex.col=bcols[bmem])

5.2  ^Clustering Based on Modularity

Modularity = 1/(2m)Σij[Aij-kikj/(2m)]1(ci=cj)
# Modularity-based community detection popular in physics
# Modularity = Dense within communities, sparse across 
library(igraph)

# Convert into a graph
AllyGR <- graph.adjacency(AllyNet,mode="undirected")
mem <- walktrap.community(AllyGR)$membership

# Get memberships and plot
mem <- mem +1
detach("package:igraph")
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green", "pink", "orange")
plot(network(AllyNet), label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75,  displayisolates=F, vertex.col=bcols[mem])