# Sunbelt2016: IntroToSNAinR.R

File IntroToSNAinR.R, 22.6 KB (added by morrism, 3 years ago)

R commands for tutorial

Line
1## 1 Getting Started
2## if you haven't installed the relevant packages, do so with the following code:
3## install.packages("network")
4## install.packages("statnet")
5
6###########################################################
7###########################################################
8##2 A Brief R Tutorial
9##2.1 Introduction to basic R syntax
101 + 3 # evaluation
11a <- 3 # assignment
12a # evaluation
13a<-3 # spacing does not matter
14a <- 3 # spacing does not matter
15a
16
17sqrt(a) # use the square root function
18b <- sqrt(a) # use function and save result
19b
20
21d # evaluate something that is not there
22a == b # is a equivalent to b?
23a != b # is a not equal to b?
24ls() # list objects in the global environment
25help.start() # get help with R generally
26?sqrt # get specific help for a function
27??sqrt # looking for help pertaining to sqrt
28apropos("sq") # it's on the tip of my tongue...
29rm(a) # remove a single object
30ls()
31rm(list=ls()) # remove everything from the environment
32ls()
33
34###########################################################
35##2.2 Vectors and matrices in R
36#Creating vectors using the "combine" operator
37c
38?c
39a <- c(1,3,5) # create a vector by combining values
40a
41a[2] # select the second element
42b <- c("one","three","five") # also works with strings
43b
44b[2]
45a <- c(a,a) # can apply recursively
46a
47a <- c(a,b) # mixing types---what happens?
48a # all converted to the same type
49
50#Sequences and replication
51a <- seq(from=1,to=5,by=1) # from 1 to 5 the slow way
52b <- 1:5 # a shortcut!
53a==b # all TRUE
54rep(1,times=5) # a lot of ones
55rep(1:5,times=2) # repeat an entire sequence
56rep(1:5,each=2) # same, but element-wise
57rep(1:5,times=5:1) # can vary the count of each element
58
59#any, all, and which (with vectors)
60a <- -1:4 # create a vector
61a
62a>2 # some TRUE, some FALSE
63any(a>2) # are any elements TRUE?
64all(a>2) # are all elements TRUE?
65which(a>2) # which indices are TRUE?
66
67#From vectors to matrices
68a <- matrix(data=1:25, nrow=5, ncol=5) # create a matrix the "formal" way
69a
70a[1,2] # select a matrix element (two dimensions)
71a[1,] # just the first row
72all(a[1,]==a[1,1:5]) # show the equivalence
73a[,2] # can also perform for columns
74a[2:3,3:5] # select submatrices
75a[-1,] # nice trick: negative numbers omit cells!
76a[-2,-2] # get rid of row two, column two
77
78b <- cbind(1:5,1:5) # another way to create matrices
79b
80d <- rbind(1:5,1:5) # can perform with rows, too
81d
82cbind(b,d) # no go: must have compatible dimensions!
83dim(b) # what were those dimensions, anyway?
84dim(d)
85NROW(b)
86NCOL(b)
87cbind(b,b) # combining two matrices
88
89t(b) # can transpose b
90cbind(t(b),d) # now it works
91rbind(t(b),d) # now it works
92
93###########################################################
94##2.3 Element-wise operations
95a <- 1:5
97a * 2 # multiplication
98a / 3 # division
99a - 4 # subtraction
100a ^ 5 # you get the idea...
101
102a + a # also works on pairs of vectors
103a * a
104a + 1:6 # problem: need same length
105
106a <- rbind(1:5,2:6) # same principles apply to matrices
107b <- rbind(3:7,4:8)
108a + b
109a / b
110a %*% t(b) # matrix multiplication
111
112#logical operators (generally) work like arithmetic ones
113a > 0 # each value greater than zero?
114a == b # corresponding values equivalent?
115a != b # corresponding values not equivalent?
116!(a == b) # same as above
117(a>2) | (b>4) # the OR operator
118(a>2) & (b>4) # the AND operator
119(a>2) || (b>4) # beware the "double-pipe"!
120(a>2) && (b>4) # (and the "double-ampersand"!)
121
122#ditto for many other basic transformations
123log(a)
124exp(b)
125sqrt(a+b) # note that we can nest statements!
126log((sqrt(a+b)+a)*b) # as recursive as we wanna be
127
128###########################################################
129##2.4 Data Frames
130d <- data.frame(income=1:5,sane=c(T,T,T,T,F),name=LETTERS[1:5])
131d
132d[1,2] # acts a lot like a matrix!
133d[,1]*5
134d[-1,]
135d\$sane # can use dollar sign notation
136d\$sane[3]<-FALSE # making changes
137d
138d[2,3] # shows factors for string values
139
140#if you want to do without factors
141d\$name <- LETTERS[1:5] # eliminate evil factors by overwriting
142d[2,3]
143d <- data.frame(income=1:5,sane=c(T,T,T,T,F),name=LETTERS[1:5],stringsAsFactors=FALSE)
144d
145
146d <- as.data.frame(cbind(1:5,2:6)) # can create from matrices
147d
148is.data.frame(d) # how can we tell it's not a matrix?
149is.matrix(d) # the truth comes out
150
151###########################################################
152##2.5 Finding built-in data sets
153#Many packages have built-in data for testing and educational purposes
154data() # lists them all
155?USArrests # get help on a data set
156data(USArrests) # load the data set
157USArrests # view the object
158
159##2.6 Elementary visualization
160#R's workhorse is the "plot" command
161plot(USArrests\$Murder,USArrests\$UrbanPop) # using dollar sign notation
162plot(USArrests\$Murder,USArrests\$UrbanPop,log="xy") # log-log scale
163
164#Adding plot title and axis labels
165plot(USArrests\$Murder,USArrests\$Assault,xlab="Murder",ylab="Assault",main="USArrests")
166
168plot(USArrests\$Murder,USArrests\$Assault,xlab="Murder",ylab="Assault", main="USArrests",type="n")
169text(USArrests\$Murder,USArrests\$Assault,rownames(USArrests))
170
171#Histograms and boxplots are often helpful
172hist(USArrests\$Murder)
173boxplot(USArrests)
174
175###########################################################
176###########################################################
177##3 network objects: importing, exploring, and manipulating network data
178##3.1 Built-in Network Datasets
179
180library(network) # Make sure that network package is loaded
181data(package="network") # List available datasets in network package
182data(flo) # Load a built-in data set; see ?flo for more
183flo # Examine the flo adjacency matrix
184
186?data
187?flo
188
189###########################################################
190##3.2 Importing Relational Data
191#Be sure to be in the directory where you stored the data for the workshop. If you've not set the working directory, you must do so now. See Section 1.7 for how to do this.
192#You can use setwd() to change it, or platform specific shortcuts
193getwd() # Check what directory you're in
194list.files() # Check what's in the working directory
195
196#Read an adjacency matrix (R stores it as a data frame by default)
198relations
199#Here's a case where matrix format is preferred
200relations <- as.matrix(relations) # convert to matrix format
201
202#Read in some vertex attribute data (okay to leave it as a data frame)
204nodeInfo
205
206#Since our relational data has no row/column names, let's set them now
207rownames(relations) <- nodeInfo\$name
208colnames(relations) <- nodeInfo\$name
209relations
210
211#Why did we set stringsAsFactors=FALSE?
213relations_wFactors[1,2]<-"Derek"
214
215numFactor<-as.factor(c(1,3,5))
216numFactor+1
217as.numeric(numFactor)
218
220?list.files
222?as.matrix
223?rownames
224
225###########################################################
226##3.3 Creating network objects
227
228nrelations<-network(relations,directed=FALSE) # Create a network object based on relations
229nrelations # Get a quick description of the data
230nempty <- network.initialize(5) # Create an empty graph with 5 vertices
231nempty # Compare with nrelations
232
234?network
235?as.network.matrix
236
237###########################################################
238##3.4 Description and Visualization
239
240summary(nrelations) # Get an overall summary
242network.edgecount(nrelations) # How many edges are present?
243network.size(nrelations) # How large is the network?
244as.sociomatrix(nrelations) # Show it as a sociomatrix
245nrelations[,] # Another way to do it
246
247plot(nrelations,displaylabels=T) # Plot with names
248plot(nrelations,displaylabels=T,mode="circle") # A less useful layout...
249
250library(sna) # Load the sna library
251gplot(nrelations) # Requires sna
252gplot(relations) # gplot Will work with a matrix object too
253
255?summary.network
257?network.edgecount
258?as.sociomatrix
259?as.matrix.network
260?is.directed
261
262###########################################################
263##3.5 Working With Edges
265g <- network.initialize(5) # Create an empty graph
266g[1,2] <- 1 # Add an edge from 1 to 2
267g[2,] <- 1 # Add edges from 2 to everyone else
268g # Examine the result
269m <- matrix(0, nrow=5, ncol=5) # Create an adjacency matrix
270m[3,4:5] <- 1 # Add entries from 3 to 4 and 5
271g[m>0] <- 1 # Add more entries
272g
273
274#Delete edges
275g[3,5] <- 0 # Remove the edge from 3 to 5
276g # Its gone!
277g[,] <- 0 # Remove all edges
278g # Now, an empty graph
279
281nrelations[4,5] # Emma to Sarah?
282nrelations[4,] # Entire Emma row
283nrelations[1:4,5:8] # Subsets are possible
284nrelations[-9,-9] # Leaving Gil out of the picture
285
286#Setting edge values
287m<-nrelations[,] # copy over the relational structure
288m[upper.tri(m)>0]<-rep(1:3,times=3) # give different edge values
289m<-symmetrize(m,rule="upper")
290m
291
292nrelations %e% "strength" <- m # Add the valued ties back in
293
294#Retrieving edge values
295list.edge.attributes(nrelations) # See whats available
296nrelations %e% "strength" # Use the %e% operator
297as.sociomatrix(nrelations,attrname="strength") # Can also do it this way
298nrelations[,] # Original tie structure is preserved
299
301?network.extraction
303?delete.edges
304?delete.vertices
305?get.edges
306?upper.tri
307
308###########################################################
309##3.6 Network and Vertex Attributes
311nrelations %v% "id" <- nodeInfo\$id # Add in our vertex attributes
312nrelations %v% "age" <- nodeInfo\$age
313nrelations %v% "sex" <- nodeInfo\$sex
314nrelations %v% "handed" <- nodeInfo\$handed
315nrelations %v% "lastDocVisit" <- nodeInfo\$lastDocVisit
316
317#Listing attributes
318list.vertex.attributes(nrelations) # List all vertex attributes
319list.network.attributes(nrelations) # List all network attributes
320
321#Retrieving attributes
322nrelations %v% "age" # Retrieve vertex ages
323nrelations %v% "id" # Retrieve vertex ids
324
326?attribute.methods
327
328###########################################################
329###########################################################
330##4 Classical Network Analysis with the SNA package
331##4.1 Getting started
332library(sna) # Load the sna library
333library(help="sna") # See a list of help pages for sna package
335
336###########################################################
337##4.2 Network data in SNA
338#The sna package can handle network data in many forms. For instance, the function gden() calculates network density; we can use it on a network object, an adjacency matrix, a list of such matrices, etc.
339
340data(flo)
342gden(flo)
343nflo<-network(flo,directed=FALSE) # Network form
344gden(nflo)
345
346#The sna package also supports a special kind of matrix called an \sna edgelist." These are three-column matrices, each row of which represents an edge (via its sender, recipient, and value, respectively). These sna edgelists" have special attributes that indicate their size, vertex names (if any), and bipartite status (if applicable).
347eflo<-as.edgelist.sna(flo) # Coerce flo to an sna edgelist
348eflo
349attr(eflo,"n") # How many vertices are there?
350attr(eflo,"vnames") # Are there vertex names?
351as.sociomatrix.sna(eflo) # Can transform back w/ as.sociomatrix.sna
352
353mat<-cbind(rep(2,4),3:6,rep(1,4)) # Create edges from 2 to 3:6
354attr(mat,"n")<-6 # Set total number of vertices to 6
355mat
356gden(mat) # Can now pass to sna routines
357as.sociomatrix.sna(mat) # Can see in adjacency form
358
360?as.edgelist.sna
361?as.sociomatrix.sna
362?attr
363?sna
364
365###########################################################
366##4.3 Network visualization with gplot()
367#Plotting the data we imported earlier
368gplot(nrelations)
369gplot(nrelations,displaylabels=TRUE) # This time with labels
370#Let's color the nodes in sex-stereotypic colors
371nodeColors<-ifelse(nodeInfo\$sex=="F","hotpink","dodgerblue")
372gplot(relations,gmode="graph",displaylabels=TRUE,vertex.col=nodeColors)
373
374#Using data we just loaded in, plot the contiguity among nations in 1993 (from the Correlates of War (CoW)1 project)
375gplot(contig_1993) # The default visualization
376gplot(contig_1993, usearrows=FALSE) # Turn off arrows manually
377gplot(contig_1993, gmode="graph") # Can also tell gplot the data is undirected
378
379#We can add labels to the vertices
380gplot(contig_1993, gmode="graph",displaylabels=TRUE,label.cex=0.5,label.col="blue")
381
382#Other ways to specify the labeling
383gplot(contig_1993,gmode="graph",label=network.vertex.names(contig_1993),label.cex=0.5,label.col="blue")
384
385#Here's an example of directed data|militarized interstate disputes (MIDs) for 1993
386gplot(mids_1993,label.cex=0.5,label.col="blue",displaylabels=TRUE)
387
388#All those isolates can get in the way|we can suppress them using displayisolates
389gplot(mids_1993,label.cex=0.5,label.col="blue",displaylabels=TRUE,displayisolates=FALSE)
390
391#The default layout algorithm is that of Frutchterman-Reingold (1991), can use others
392gplot(mids_1993,label.cex=0.5,label.col="blue",displaylabels=TRUE,displayisolates=FALSE,mode="circle") # The infamous circle
393
394#or perhaps
395gplot(mids_1993,label.cex=0.5,label.col="blue",displaylabels=TRUE,displayisolates=FALSE,mode="mds") # MDS of position similarity
396
397#When a layout is generated, the results can be saved for later reuse:
398coords <- gplot(contig_1993,gmode="graph",label=colnames(contig_1993[,]),label.cex=0.5,label.col="blue") # Capture the magic of the moment
399coords # Show the vertex coordinates
400
401#Saved (or a priori) layouts can be used via the coord argument
402gplot(mids_1993,gmode="graph",label=colnames(contig_1993[,]),label.cex=0.5,label.col="blue",coord=coords)
403
404#When the default settings are insuficient, interactive mode allows for tweaking
405coords <- gplot(contig_1993, interactive=TRUE) # Modify and save
406gplot(contig_1993,coord=coords,displaylabels=TRUE,gmode="graph",label.cex=0.5,label.col="blue") # Should reproduce the modified layout
407
409?gplot ?gplot.layout
410
411###########################################################
412##4.4 Basic centrality indices: degree, betweenness, and closeness
413#We begin with the simplest case: degree centrality
414degree(mids_1993) # Default: total degree
415ideg <- degree(mids_1993, cmode="indegree") # Indegree for MIDs
416odeg <- degree(mids_1993, cmode="outdegree") # Outdegree for MIDs
417all(degree(mids_1993) == ideg+odeg) # In + out = total?
418
419#Once centrality scores are computed, we can handle them using standard R methods:
420plot(ideg, odeg, type="n", xlab="Incoming MIDs", ylab="Outgoing MIDs") # Plot ideg by odeg
421abline(0, 1, lty=3)
422text(jitter(ideg), jitter(odeg), network.vertex.names(contig_1993), cex=0.75, col=2)
423
424#Plot simple histograms of the degree distribution:
425par(mfrow=c(2,2)) # Set up a 2x2 display
426hist(ideg, xlab="Indegree", main="Indegree Distribution", prob=TRUE)
427hist(odeg, xlab="Outdegree", main="Outdegree Distribution", prob=TRUE)
428hist(ideg+odeg, xlab="Total Degree", main="Total Degree Distribution", prob=TRUE)
429par(mfrow=c(1,1)) # Restore display
430
431#Centrality scores can also be used with other sna routines, e.g., gplot()
432gplot(mids_1993, vertex.cex=(ideg+odeg)^0.5/2, vertex.sides=50,label.cex=0.4,vertex.col=rgb(odeg/max(odeg),0,ideg/max(ideg)),displaylabels=TRUE)
433
434#Betweenness and closeness are also popular measures
435bet <- betweenness(contig_1993, gmode="graph") # Geographic betweenness
436bet
437gplot(contig_1993, vertex.cex=sqrt(bet)/25, gmode="graph") # Use w/gplot
438clo <- closeness(contig_1993) # Geographic closeness
439clo # A large world after all?
440
441#Can use sna routines to explore alternatives to the common measures. . .
442contig_1993_gdist<-geodist(contig_1993)\$gdist # matrix of geodesic distances
443contig_1993_gdist
4441/sum(contig_1993_gdist[1,2:186]) # calculate closeness for country 1
445
446which(contig_1993_gdist[,1]=="Inf") # disconnected matrix
447
448sum(1/contig_1993_gdist[1,2:186]) # alternate closeness for country 1
449
450closeness2 <- function(x){ # Create an alternate closeness function!
451geo <- 1/geodist(x)\$gdist # Get the matrix of 1/geodesic distance
452diag(geo) <- 0 # Define self-ties as 0
453apply(geo, 1, sum) # Return sum(1/geodist) for each vertex
454}
455
456clo2 <- closeness2(contig_1993) # Use our new function on contiguity data
457clo2
458
459hist(clo2, xlab="Alt. Closeness", prob=TRUE) # Much better behaved!
460cor(clo2, bet) # Correlate with betweenness
461plot(clo2, bet) # Plot the bivariate relationship
462all(clo2/185==closeness(contig_1993,cmode="suminvundir")) # Actually, we support this in sna!
463
465?betweenness
466?bonpow
467?closeness
468?degree
469?evcent
470?graphcent
471?infocent
472?prestige
473?stresscent
474
475###########################################################
476##4.5 From centrality to centralization
477centralization(mids_1993, degree, cmode="indegree") # Do MIDs concentrate?
478centralization(contig_1993, evcent) # Eigenvector centralization
479
481?centralization
482
483###########################################################
484##4.6 Elementary graph-level indices
485gden(mids_1993) # Density
487grecip(mids_1993, measure="edgewise") # Edgewise reciprocity
488gtrans(mids_1993) # Transitivity
489
491?gden
492?grecip
493?gtrans
494
495###########################################################
496##4.7 Subgraph census routines, isolates, clusters, and geodesic distance
497
499dyad.census(contig_1993) # No As in undirected graphs
502kpath.census(mids_1993, maxlen=6, tabulate.by.vertex=FALSE) # Count paths of length <=6
503kcycle.census(mids_1993, maxlen=6, tabulate.by.vertex=FALSE) # Count cycles of length <=6
504clique.census(mids_1993, tabulate.by.vertex=FALSE, enumerate=FALSE) # Find maximal cliques
505
506#Can also look at more detailed tabulation/co-membership for paths/cycles/cliques
507kpath.census(mids_1993, maxlen=4) # Show tabulation by vertex
509gplot(indirect,label.cex=0.4,vertex.cex=0.75,displaylabels=TRUE,edge.col=rgb(0,0,0,0.25)) # Plot indirect MIDs
510
511#Component information can be obtained in various ways
512components(mids_1993) # Strong component count
513components(mids_1993, connected="weak") # Weak component count
514cd <- component.dist(mids_1993, connected="weak") # Get weak components
515cd
516
517#Component sizes
518plot(1:length(cd\$cdist),cd\$cdist,xlab="Size",ylab="Frequency")
519
520#Who's in the largest component?
521cl <- component.largest(mids_1993, connected="weak")
522cl
523
524#Plot the largest weak component
525gplot(mids_1993[cl,cl], boxed.lab=FALSE, label.cex=0.5,label.col=4,label=network.vertex.names(mids_1993)[cl])
526
527#Likewise, many routines exist for handling isolates
528is.isolate(mids_1993, 3) # Is the third vertex (BHM) an isolate?
529is.isolate(mids_1993, "BHM") # The peaceful islands?
530is.isolate(mids_1993, "USA") # Perhaps less so....
531isol <- isolates(mids_1993) # Get the entire list of isolates
532isol
533network.vertex.names(mids_1993)[isol] # Which countries are isolates?
534
535#Another way to remove isolates from sociograms
536gplot(mids_1993[-isol,-isol], label.cex=0.5,label.col=4,label=network.vertex.names(mids_1993)[-isol])
537
538#Geodesic distances
539contig.dist<-geodist(contig_1993)
540
541#look at the resulting object
542attributes(contig.dist)
543contig.dist\$gdist
544contig.dist\$counts
545
547?clique.census
548?components
549?component.dist
551?is.isolate
552?isolates
553?kcycle.census
554?kpath.census
556?geodist
557
558###########################################################
559##4.8 Elementary random graph generation
560
561rgraph(10) # A uniform random digraph of order 10
562rgraph(10, tprob=3/9) # Homogeneous Bernoulli w/mean degree 3
563rgraph(10, tprob=3/9, mode="graph") # As above, but undirected
564rgnm(1, 10, 20) # Uniform conditional on order, edges
565rguman(1, 10, mut=0.5, asym=0.25, null=0.25) # Homogeneous multinomial on dyad census
566rguman(1, 10, mut=0, asym=1, null=0) # An extreme case: random tournament
567gplot(rgws(1,50,1,2,0)) # A Watts-Strogatz process - baseline
568gplot(rgws(1,50,1,2,0.05)) # ...with rewiring probability 0.05
569gplot(rgws(1,50,1,2,0.2)) # ...with rewiring probability 0.2
570
571##now let's re-create the plot from Watts and Strogatz
572##a list is a new type of data structure...
573wgNets<-list()
574temp<-seq(-3.2,0,.2)
575temp
576length(temp)
577
578p_vals<-10^(temp)
579p_vals
580
581for(i in 1:17){
582  wgNets[[i]]<-rgws(n=1,nv=50,d=1,z=2,p=p_vals[i])
583}
584
585##lists use 'double-bracket' notation
586gplot(wgNets[[1]])
587gplot(wgNets[[17]])
588
589l_vals<-vector(length=17)
590geodist(wgNets[[1]])\$gdist
591geodist(wgNets[[17]])\$gdist
592for(i in 1:17){
593  temp<-geodist(wgNets[[i]])\$gdist
594  temp[which(temp=="Inf")]<-0
595  l_vals[i]<-max(temp)
596}
597l_vals
598l_vals<-l_vals/l_vals[1]
599
600##now for the clustering coefficient
602c_vals<-vector(length=17)
603for(i in 1:17){
605  c_vals[i]<-temp[4]/(3*temp[4]+temp[3])
606}
607c_vals
608c_vals<-c_vals/c_vals[1]
609
610plot(p_vals,l_vals,log="x",ylim=c(0,1),xlab="Re-Wiring Level",ylab="Fraction of Initial Value")
611lines(p_vals,c_vals,type="p",pch=2)
612
614legend("bottomleft",legend=c("Diameter","Clustering Coefficient"),pch=c(1,2))
615
617?rgbn
618?rgmn
619?rgraph
620?rguman
621?rgws
622?list
623?lines
624?legend
625
626###########################################################
627##4.9 Basic connectivity/distance measurement, and cohesion
629g
630is.connected(g) # Is g strongly connected?
631is.connected(g, connected="weak") # How about weakly connected?
632geodist(g) # Get information on geodesics
633reachability(g) # Return the reachability matrix
634symmetrize(g) # Symmetrize g using the "or" rule
635symmetrize(g, rule="strong") # Symmetrize g using the "and" rule
636
637#Several ways to get relatively cohesive groups
638clique.census(g) # Maximal clique census
639kcores(g) # k-cores (by degree)
640cutpoints(g) # find articulation points
641
642#Showing cohesion information can aid visualization. Here, show critical positions
643gplot(contig_1993,vertex.col=2+cutpoints(contig_1993,mode="graph",return.indicator=T))
644
645#Show the nesting of cores
646kc<-kcores(contig_1993,cmode="indegree")
647gplot(contig_1993,vertex.col=rainbow(max(kc)+1)[kc+1])
648
649#Showing members of the 5-core only
650gplot(contig_1993[kc>4,kc>4],vertex.col=rainbow(max(kc)+1)[kc[kc>4]+1])
651