Sunbelt2013: introToSNA-script.R

File introToSNA-script.R, 20.6 KB (added by lxwang, 6 years ago)
Line 
1
2# ==============================
3# Introduction to basic R syntax
4# ==============================
5
61 + 3                   # evaluation
7
8a <- 3                  # assignment
9a                               # evaluation
10
11a<-3                            # spacing does not matter
12a    <-    3            # spacing does not matter
13
14sqrt(a)                 # use the square root function
15b <- sqrt(a)            # use function and save result
16b
17
18x                               # evaluate something that is not there
19
20a == b                  # is a equal to b?         
21a != b                  # is a not equal to b?
22
23# list objects in the R environment
24# (same as viewing the "Workspace" pane in RStudio)
25ls()
26
27# Getting help in R
28
29# get help with R generally
30# (same as viewing the "Help" pane in RStudio)
31help.start()
32
33# More targeted help
34?sqrt                   # get specific help for a function
35??sqrt                  # looking for help pertaining to sqrt
36
37apropos("sq")           # it's on the tip of my tongue...
38
39rm(a)                   # remove a single object
40rm(list=ls())           # remove everything from the environment
41
42# -------------------------
43# Vectors and matrices in R
44# -------------------------
45
46# Creating vectors using the "combine" operator
47a <- c(1,3,5)           # create a vector by combining values
48a
49a[2]            # select the second element
50
51b <- c("one","three","five")    # also works with strings
52b
53b[2]            # select the second element
54
55a <- c(a,a)     # can apply recursively
56a
57a <- c(a,b)     # mixing types---what happens?
58a                       # all converted to the same type
59
60# Sequences and replication
61a <- seq(from=1,to=5,by=1)      # a sequence from 1 to 5
62b <- 1:5                                        # a shortcut!
63
64rep(1,times=5)          # a lot of ones
65rep(1:5,times=2)        # repeat sequence 1 to 5, twice
66rep(1:5,each=2) # same as above, but element-wise
67rep(1:5,times=5:1)      # can vary the count of each element
68
69# Any, all, and which (with vectors)
70a <- 1:5                # create a vector (sequence from 1 to 5)
71a>2                     # some TRUE, some FALSE
72any(a>2)                # are any elements TRUE?
73all(a>2)                # are all elements TRUE?
74which(a>2)      # which indices are TRUE?
75
76# What about asking how long the vector is?
77length(a)
78
79# ---------------------------
80# From vectors to matrices...
81# ---------------------------
82
83# create a matrix the "formal" way...
84a <- matrix(data=1:25, nrow=5, ncol=5)
85a
86a[1,2]          # select an element (specifying two dimensions)
87a[1,]           # just the first row
88a[,2]           # just the second column
89a[2:3,3:5]      # select submatrices
90a[-1,]          # nice trick: negative numbers omit cells!
91a[-2,-2]                # get rid of row two and column two
92
93# another way to create matrices (bind together column-wise)
94b <- cbind(1:5,1:5)
95b
96
97# can perform with rows, too (bind together row-wise)
98d <- rbind(1:5,1:5)
99d
100cbind(b,d)      # no go: must have compatible dimensions!
101dim(b)          # what were those dimensions, anyway?
102dim(d)
103nrow(b)         # how many rows in b?
104ncol(b)         # how many columns in b?
105cbind(b,b)      # combining two matrices, column-wise
106
107t(b)                            # can transpose b
108cbind(t(b),d)           # now it works
109rbind(t(b),d)           # now it works
110
111# -----------------------
112# Element-wise operations
113# -----------------------
114
115# Most arithmetic operators are applied element-wise:
116a <- 1:5                # create a vector
117a + 1           # addition
118a * 2           # multiplication
119a / 3           # division
120a - 4           # subtraction
121a ^ 5           # you get the idea...
122a + a           # also works on pairs of vectors
123a * a
124a + 1:6         # problem: need same length
125
126# Same for many other basic transformations
127log(a)                          # log function
128exp(b)                          # exponential function
129sqrt(a+b)                               # note that we can nest statements!
130log((sqrt(a+b)+a)*b)    # more nesting
131
132a <- rbind(1:5,2:6)             # same principles apply to matrices
133b <- rbind(3:7,4:8)
134a + b
135a / b
136
137a %*% t(b)      # matrix multiplication
138
139# Logical operators (generally) work like
140# arithmetic ones:
141a > 0   # each value greater than zero?
142a == b  # corresponding values equivalent?
143a != b  # corresponding values not equivalent?
144
145
146# -----------
147# Data frames
148# -----------
149
150# Build a data frame from scratch, containing
151# three columns of data
152d <- data.frame(income=1:5,sane=c(T,T,T,T,F),id=LETTERS[1:5])
153d
154d[1,2]  # acts a lot like a matrix!
155d[,1]*5
156d[-1,]
157d$sane  # can use dollar sign notation to extract columns
158d$sane[3]<-FALSE        # making changes
159d
160d[2,3]  # shows factors for string values
161
162# If you want to do without factors
163d <- data.frame(income=1:5,sane=c(T,T,T,T,F),name=LETTERS[1:5],stringsAsFactors=FALSE)
164
165d[2,3]
166
167d <- as.data.frame(cbind(1:5,2:6))      # can create from matrices
168d
169is.data.frame(d)        # how can we tell it's not a matrix?
170is.matrix(d)            # the truth comes out
171
172# --------------------------
173# Finding built-in data sets
174# --------------------------
175
176# Many packages have built-in data for testing
177# and educational purposes:
178data()                  # lists them all
179?USArrests              # get help on a data set
180data(USArrests) # load the data set
181USArrests                       # view the object
182
183# ------------------------
184# Elementary visualization
185# ------------------------
186
187# R's graphics workhorse is the "plot" command:
188plot(x=USArrests$Murder,y=USArrests$UrbanPop)
189
190# Same as above, but now on log-log scale
191plot(x=USArrests$Murder,y=USArrests$UrbanPop,log="xy")
192
193# Adding plot title and clean up axis labels
194plot(x=USArrests$Murder,y=USArrests$Assault,xlab="Murder",ylab="Assault",main="USArrests")
195
196# Can also add text to a plot:
197
198# Step 1: set up a "blank" plot window
199plot(x=USArrests$Murder,y=USArrests$Assault,xlab="Murder",ylab="Assault", main="USArrests",type="n")
200
201# Step 2: add in text
202text(x=USArrests$Murder,y=USArrests$Assault,labels=rownames(USArrests))
203
204
205# Histograms and boxplots are often helpful
206hist(USArrests$Murder)
207boxplot(USArrests)
208
209       
210# ============================================================
211# Objects: Importing, Exploring, and Manipulating Network Data
212# ============================================================
213
214# -------------------------
215# Importing Relational Data
216# -------------------------
217
218# Be sure to be in the directory where you stored the data for
219# the workshop.  If you've not yet set the working directory,
220# you must do so now.
221list.files()            # Check what's in the working directory
222
223# Read an adjacency matrix (R stores it as a
224# data frame by default):
225relations<-read.csv(file="relationalData.csv",header=TRUE,row.names=1,stringsAsFactors=FALSE)
226
227relations
228
229# Here's a case where matrix format is preferred
230is.matrix(relations)    # first, let's check
231relations<-as.matrix(relations) # convert to matrix format     
232
233# Read in some vertex attribute data
234# (okay to leave it as a data frame):
235nodeInfo <- read.csv(file="vertexAttributes.csv",header=TRUE,stringsAsFactors=FALSE)
236
237nodeInfo
238
239# For more information...
240?list.files
241?read.csv
242?as.matrix
243?rownames
244?colnames
245
246# -------------------------
247# Built-in Network Datasets
248# -------------------------
249
250library(network)        # Make sure that network package is loaded
251data(package="network") # Available datasets in network package
252data(flo)               # Load a built-in data set; see ?flo for more
253flo                     # Examine the flo adjacency matrix
254
255# For more information...
256?data
257?flo
258
259# --------------------------
260# Creating "network" Objects
261# --------------------------
262
263# A network object is a special data structure in R, allowing
264# you to conveniently store data and meta-data about a
265# network in a single R object.  The "network" package
266# provides tools to help with the construction, manipulation,
267# and visualization of network objects.
268
269# Create a network object from the 'relations' matrix,
270# indicating the network is NOT directed, that it contains
271# edge values, and that we'll name the edge attributes 'strength'
272nrelations<-as.network.matrix(relations,directed=FALSE,ignore.eval=FALSE,names.eval="strength")
273
274nrelations      # Get a quick description of the data
275
276
277# For more information...
278?network
279?as.network.matrix
280
281# -----------------------------
282# Description and Visualization
283# -----------------------------
284
285summary(nrelations)                             # Get an overall summary
286network.dyadcount(nrelations)           # How many dyads in nflo?
287network.edgecount(nrelations)           # How many edges are present?
288network.size(nrelations)                        # How large is the network?
289as.sociomatrix(nrelations)              # Show connectivity
290nrelations[,]                                   # Another way to do it
291
292# Visualize the network object, using a shorthand
293# form of plot.network():
294plot(nrelations,displaylabels=TRUE)
295
296# Changing the layout...
297plot(nrelations,displaylabels=T,mode="circle")
298
299# For more information...
300?summary.network
301?network.dyadcount
302?network.edgecount
303?as.sociomatrix
304?as.matrix.network
305?is.directed
306?plot.network
307
308# ------------------
309# Working with edges
310# ------------------
311
312# Adding edges to an empty network object
313g <- network.initialize(5)      # Create an empty directed graph
314g[1,2] <- 1                             # Add an edge from 1 to 2
315g[2,] <- 1                              # Add edges from 2 to everyone else
316g[3,4:5]<-1                             # Add edges from 3 to 4 and 5
317g                                               # Examine the result
318plot(g,displaylabels=TRUE)
319
320# Deleting edges
321g[3,5] <- 0             # Remove the edge from 3 to 5
322g                               # It’s gone!
323g[,] <- 0                       # Remove all edges
324g                               # Now, an empty graph
325
326# Going back to 'nrelations', let's test for adjacency
327nrelations[4,5]         # 4 to 5?
328nrelations[4,]                  # all of 4's outgoing ties
329nrelations[1:4,5:8]             # Subsets are possible
330nrelations[-9,-9]               # Leaving Gil (9) out of the picture
331
332# Show edge values in a sociogram
333plot(nrelations,displaylabels=TRUE,edge.lwd="strength",label.pos=5)
334
335# Listing the edge attributes:
336list.edge.attributes(nrelations)
337
338# Retrieving edge values
339as.sociomatrix(nrelations,attrname="strength")
340
341nrelations[,]   # Original tie structure is preserved
342
343# For more information...
344?network.extraction
345?add.edge
346?delete.edges
347?delete.vertices
348?get.edges
349?upper.tri
350?symmetrize
351
352# -----------------------------
353# Network and vertex attributes
354# -----------------------------
355
356# Fold the vertex attributes into the network object
357nrelations %v% "id" <- nodeInfo$id
358nrelations %v% "age" <- nodeInfo$age
359nrelations %v% "sex" <- nodeInfo$sex
360nrelations %v% "handed" <- nodeInfo$handed
361nrelations %v% "lastDocVisit" <- nodeInfo$lastDocVisit
362
363# Listing the vertex attributes
364list.vertex.attributes(nrelations)
365
366# Retrieving attributes
367nrelations %v% "age"    # Retrieve vertex ages
368nrelations %v% "id"             # Retrieve vertex ids
369
370# For more information...
371?attribute.methods
372
373# =======================================================
374# Classical Network Analysis with the "sna" Package for R
375# =======================================================
376
377# ---------------
378# Getting started
379# ---------------
380
381library(sna)    # Load the sna library
382
383# ---------------------
384# Network data in "sna"
385# ---------------------
386
387# The 'sna' package can handle network data in many forms.
388# For instance, the function gden() calculates network
389# density; we can use it on a network object, an adjacency
390# matrix, a list of such matrices, etc.
391gden(flo)
392
393# Because an adjacency matrix stores information about which
394# edges are present AND which are absent, it can be cumbersome
395# for large, sparse graphs.  Edgelists are better for these
396# graphs, since they only store information about edges that are present.  But, we also need to "attach" an indicator of how
397# many nodes are in the graph, since not all nodes may show
398# up in the edgelist.
399
400# The sna package provides support for edgelists. These are
401# three-column matrices, each row of which represents an edge
402# (via its sender, recipient, and value, respectively).  These
403# "sna edgelists" have special attributes that indicate their
404# size, vertex names (if any), and bipartite status
405# (if applicable).
406
407eflo<-as.edgelist.sna(flo)    # Coerce flo to an sna edgelist
408eflo
409attr(eflo,"n")                # How many vertices are there?
410attr(eflo,"vnames")           # Are there vertex names?
411as.sociomatrix.sna(eflo)      # Can transform back
412
413
414# 'sna edgelists' can be handy with large data sets
415# (as a simple alternative to network objects).
416# To make one, just add an 'n' attribute (number of nodes)
417# to a valid three-column matrix...
418
419mat<-cbind(rep(2,4),3:6,rep(1,4))       # Create edges from 2 to 3:6
420
421# Add column names, for readability
422colnames(mat)<-c("sender","receiver","value")
423mat                             # Let's take a look at it
424attr(mat,"n")<-6        # Set total number of vertices to 6
425mat
426gden(mat)                       # Can now pass to sna routines
427as.sociomatrix.sna(mat)         # Can see in adjacency form
428
429# For more information...
430?as.edgelist.sna
431?as.sociomatrix.sna
432?attr
433?sna
434
435# ---------------------
436# Network visualization
437# ---------------------
438
439# Let's first establish sex-stereotypic colors for the nodes
440nodeColors<-ifelse(nodeInfo$sex=="F",yes="hotpink",no="dodgerblue")
441
442# Then let's apply those colors to the network visualization
443plot(nrelations,displaylabels=TRUE,vertex.col=nodeColors,label.pos=5,vertex.cex=2)
444
445# Load in external data (downloaded from the web)
446load("introduction_sunbelt_2013.Rdata")
447
448# Using some of the data we just loaded in, let's plot the
449# contiguity among nations in 1993 (from the Correlates of
450# War (CoW) project (see http://www.correlatesofwar.org
451# for more information)
452plot(contig_1993)       # The default visualization
453
454# We can add labels to the vertices
455plot(contig_1993,displaylabels=TRUE,label.cex=0.5,label.col="white",label.pos=5,vertex.cex=2.25)
456
457# Here's an example of directed data:
458# militarized interstate disputes (MIDs) for 1993
459plot(mids_1993,label.cex=0.5,label.col="white",displaylabels=TRUE,label.pos=5,vertex.cex=2.25)
460
461# All those isolates can get in the way.  We can suppress
462# them using the "displayisolates" parameter
463plot(mids_1993,label.cex=0.5,label.col="white",displaylabels=TRUE,label.pos=5,vertex.cex=2.25,displayisolates=FALSE)
464
465# The default layout algorithm is that of
466# Frutchterman-Reingold (1991), but we can use others
467plot(mids_1993,label.cex=0.5,label.col="white",displaylabels=TRUE,label.pos=5,vertex.cex=2.25,displayisolates=FALSE,mode="circle")
468
469# When a layout is generated, the results can be saved
470# for later reuse:
471capturedCoords<-plot(contig_1993)
472capturedCoords  # Show the vertex coordinates
473
474# Saved (or a priori) layouts can be used via
475# the "coord" argument
476plot(contig_1993,coord=capturedCoords)
477
478# When the default settings are insufficient,
479# interactive mode allows for tweaking
480customizedCoords<-plot(mids_1993,interactive=TRUE)
481
482# Now, applying those saved coordinates to a new plot
483plot(mids_1993,label.cex=0.5,coord=customizedCoords)
484
485# For more information...
486?gplot
487?gplot.layout
488
489# ------------------------------------------------------------
490# Basic centrality indices: degree, betweenness, and closeness
491# ------------------------------------------------------------
492
493# We begin with the simplest case: degree centrality
494degree(mids_1993)               # Default: total degree
495ideg<-degree(mids_1993,cmode="indegree") # in-degree
496odeg<-degree(mids_1993,cmode="outdegree") # out-degree
497
498# Once centrality scores are computed, let's plot
499# them against each other
500plot(x=ideg,y=odeg,type="n",xlab="Incoming MIDs",ylab="Outgoing MIDs",xlim=c(0,12),ylim=c(0,12)) # Plot ideg by odeg
501abline(a=0,b=1,lty=1,col="red")
502text(jitter(ideg), jitter(odeg), network.vertex.names(contig_1993), cex=0.75)
503
504
505# Centrality scores can also be used with other routines.
506# We can employ information about degree in a network
507# visualization.  Here, net agressors are in shades of red,
508# and net recipients of aggression are in shades of blue
509plot(mids_1993,vertex.cex=(ideg+odeg)^0.5, vertex.sides=50,label.cex=0.4, vertex.col=rgb(odeg/max(odeg),0,ideg/max(ideg)),displaylabels=TRUE,displayisolates=FALSE)
510
511# Betweenness and closeness are also popular measures
512bet<-betweenness(contig_1993,gmode="graph")
513bet
514plot(contig_1993,vertex.cex=(sqrt(bet)/25)+1)
515clo<-closeness(contig_1993) # Geographic closeness
516clo             # A large world after all?
517
518# let's try an alternate form
519clo2<-closeness(contig_1993,cmode="suminvundir")
520
521# Plot the bivariate relationship
522plot(x=clo2,y=bet,xlab="Closeness",ylab="Betweenness")
523
524# For more information...
525?betweenness
526?bonpow
527?closeness
528?degree
529?evcent
530?graphcent
531?infocent
532?prestige
533?stresscent
534
535# ------------------------------------
536# From centrality to centralization...
537# ------------------------------------
538
539# Do MIDs concentrate?
540centralization(mids_1993,degree,cmode="indegree")
541
542# Eigenvector centralization
543centralization(contig_1993,evcent)
544
545# For more information...
546?centralization
547
548# --------------------------------
549# Other graph-level indices (GLIs)
550# --------------------------------
551
552gden(mids_1993) # Density
553
554grecip(mids_1993)       # Dyadic reciprocity (seems high!)
555
556# Edgewise reciprocity (based on edges present)
557grecip(mids_1993, measure="edgewise")
558
559gtrans(mids_1993)       # Transitivity
560
561# For more information...
562?gden
563?grecip
564?gtrans
565
566# ------------------------
567# Subgraph census routines
568# ------------------------
569
570dyad.census(mids_1993)          # M,A,N counts
571dyad.census(contig_1993)                # No As in undirected graphs
572triad.census(mids_1993,mode="digraph")  # Directed triad census
573triad.census(contig_1993,mode="graph")  # Undirected triad census
574
575# Count paths of length <=6
576kpath.census(mids_1993,maxlen=6,tabulate.by.vertex=FALSE)
577
578# Count cycles of length <=6
579kcycle.census(mids_1993,maxlen=6,tabulate.by.vertex=FALSE)
580
581# Find cliques
582clique.census(mids_1993,tabulate.by.vertex=FALSE,enumerate=FALSE)
583
584# Can also look at more detailed tabulation/co-membership
585# for paths/cycles/cliques
586kpath.census(mids_1993,maxlen=4)        # Show tabulation by vertex
587
588# Component information can be obtained in various ways
589components(mids_1993)   # Strong component count
590components(mids_1993,connected="weak")  # Weak component count
591
592# Get weak components
593cd<-component.dist(mids_1993, connected="weak")
594cd
595
596# Who's in the largest component?
597cl<-component.largest(mids_1993, connected="weak")
598network.vertex.names(mids_1993)[cl] # Identify by name
599
600# Generate a new network object, containing only those
601# members of the largest, weak component
602midsComponent<-get.inducedSubgraph(mids_1993,v=(1:network.size(mids_1993))[cl])
603
604# Then visualize it
605plot(midsComponent,displaylabels=TRUE,label.cex=0.5,vertex.cex=2,label.col="white",label.pos=5)
606
607# Likewise, many routines exist for handling isolates
608is.isolate(mids_1993,ego=3)
609is.isolate(mids_1993,ego="BHM")
610is.isolate(mids_1993,ego="USA")
611isol<-isolates(mids_1993)
612isol
613
614# Which countries are isolates?
615network.vertex.names(mids_1993)[isol]
616
617# How to remove isolates
618midsNoIsos<-mids_1993 # copy over original network
619
620# Then take out isolates
621delete.vertices(midsNoIsos,vid=isol)
622
623# Then visualize it
624plot(midsNoIsos,displaylabels=TRUE,label.cex=0.5,vertex.cex=2,label.col="white",label.pos=5)
625
626# For more information...
627?clique.census
628?components
629?component.dist
630?dyad.census
631?is.isolate
632?isolates
633?kcycle.census
634?kpath.census
635?triad.census
636
637# ----------------------------------
638# Elementary random graph generation
639# ----------------------------------
640
641rgraph(n=10)    # A uniform random digraph of order 10
642
643# Homogeneous Bernoulli graph, specifying tie probability
644rgraph(n=10,tprob=3/9)
645rgraph(n=10,tprob=3/9,mode="graph") # As above, but undirected
646rgnm(n=1,nv=10,m=20)    # Uniform conditional on order, edges
647
648# Uniform conditional on dyad census
649rguman(n=1,nv=10,mut=0.5,asym=0.25,null=0.25)
650 
651# For more information...
652?rgbn
653?rgmn
654?rgraph
655?rguman
656?rgws
657
658# -----------------------------------------------------
659# Basic connectivity/distance measurement, and cohesion
660# -----------------------------------------------------
661
662g<-rgraph(n=20,tprob=3/19)      # Start with a random digraph
663g
664is.connected(g) # Is g strongly connected?
665is.connected(g,connected="weak")        # How about weakly connected?
666geodist(g)              # Get information on geodesics
667reachability(g) # Return the reachability matrix
668symmetrize(g)           # Symmetrize g using the "or" rule
669symmetrize(g,rule="strong")     # Symmetrize g using the "and" rule
670
671
672# Several ways to get relatively cohesive groups
673clique.census(g)        # Clique census
674kcores(g)                       # k-cores (by degree)
675cutpoints(g)            # find articulation points
676
677# Showing cohesion information can aid visualization.
678# Here, show critical positions
679plot(contig_1993,vertex.col=2+cutpoints(contig_1993,mode="graph",return.indicator=T)) 
680
681# Show the nesting of cores
682kc<-kcores(contig_1993,cmode="indegree")
683
684# Then plot it...
685plot(contig_1993,vertex.col=rainbow(length(unique(kc)))[kc+1],vertex.cex=2,label.cex=0.5,label.pos=5,displaylabels=TRUE)
686
687# Members of the 5-core only
688contig5core<-get.inducedSubgraph(contig_1993,v=which(kc>4))
689
690# Then plot it...
691plot(contig5core,vertex.col=rainbow(max(kc)+1)[kc[kc>4]+1],vertex.cex=2,label.cex=0.5,label.pos=5,displaylabels=TRUE)
692
693# For more information...
694?components
695?component.dist
696?cutpoints
697?geodist
698?kcores
699?is.connected
700?reachability
701?symmetrize