# Sunbelt2013: basicNetworkStatistics.R

File basicNetworkStatistics.R, 14.7 KB (added by lxwang, 7 years ago)
Line
1##1 Getting Started
2install.packages("network")
3library(network)
4install.packages("statnet")
5library(statnet)
6
7###########################################################
8###########################################################
9##2 Node Level Indices, Network Covariates, and Network Regression
10##2.1 Network visualization and basic descriptives
11
12#make sure you're in the correct file directory or change the following command
14
15data(emon) # Load Drabek et al. data
16
17# Begin by examining the 'emon' dataset
18?emon
19class(emon)
20class(emon[])
21emon[]
22emon[]%v%"vertex.names" # Display vertex names
23
24# Extract ties from the Cheyenne EMON communicating at least "every few hours"
25g<-as.sociomatrix(emon[],"Frequency") # Need to get the frequency info
26g<-symmetrize((g>0)&(g<4)) # Note the reverse coding!
27
28#Get some potential covariates
29drs<-emon[]%v%"Decision.Rank.Score" # Get decision rank (see man page)
30crs<-emon[]%v%"Command.Rank.Score" # Get command rank
31
32# Plot Cheyenne EMON communications
33gplot(emon[])
34gplot(emon[], label.cex=0.5, label.col=4, label=network.vertex.names(emon[])) # Basic display, with labels
35
36#Calculate some basic centrality measures
37deg<-degree(g,gmode="graph")
38bet<-betweenness(g,gmode="graph")
39clo<-closeness(g,gmode="graph")
40
41#Raw correlations
42cor(cbind(deg,bet,clo),cbind(drs,crs))
43
44#Classical tests (using asymptotic t distribution)
45cor.test(deg,drs)
46cor.test(bet,drs)
47cor.test(clo,drs)
48
49###########################################################
50#2.2 Testing correlations
51#Lets build a permutation test
52deg
53drs
54c.obs<-cor(deg,drs)
55
56#Permute one of the data sets
57sample(drs)
58cor(deg,sample(drs))
59
60#write a for loop to permute one of the data sets many times
61c.rep<-vector(length=1000)
62for(niter in 1:1000){
63  c.rep[niter]<-cor(deg,sample(drs))
64}
65
66#look at what we've created
67c.rep
68hist(c.rep)
69
70#compare to empirical data
71abline(v=c.obs,col="red")
72
73#put it all into a function!
74perm.cor.test<-function(x,y,niter=1000){ #Define a simple test function
75  c.obs<-cor(x,y)
76  c.rep<-vector()
77  for(i in 1:niter)
78    c.rep[i]<-cor(x,sample(y))
79  c.rep
80}
81
82#look at the results
83new.c.rep<-perm.cor.test(deg,drs)
84hist(new.c.rep)
85abline(v=c.obs,col="red")
86
87#calculate quantiles
88mean(new.c.rep>=c.obs)
89mean(new.c.rep<=c.obs)
90mean(abs(new.c.rep)>=abs(c.obs))
91
92#make the output display these quantiles
93perm.cor.test<-function(x,y,niter=1000){ #Define a simple test function
94  c.obs<-cor(x,y)
95  c.rep<-vector()
96  for(i in 1:niter)
97    c.rep[i]<-cor(x,sample(y),use="complete.obs")
98  cat("Vector Permutation Test:\n\tObserved correlation: ",c.obs,"\tReplicate quantiles
99      (niter=",niter,")\n",sep="")
100  cat("\t\tPr(rho>=obs):",mean(c.rep>=c.obs),"\n")
101  cat("\t\tPr(rho<=obs):",mean(c.rep<=c.obs),"\n")
102  cat("\t\tPr(|rho|>=|obs|):",mean(abs(c.rep)>=abs(c.obs)),"\n")
103  invisible(list(obs=c.obs,rep=c.rep))
104}
105
107?cor.test
108?t.test
109?sample
110
111###########################################################
112#2.3 Using NLIs as regression covariates
113
114pstaff<-emon[]%v%"Paid.Staff" # Get more EMON covariates
115vstaff<-emon[]%v%"Volunteer.Staff"
117
118#Simple model: decision rank is linear in size, degree, and govt status
119mod<-lm(drs~deg+pstaff+vstaff+govt)
120summary(mod)
121anova(mod) #Some useful lm tools
122AIC(mod)
123
124#Try with alternative measures....
125mod2<-lm(drs~bet+pstaff+vstaff+govt) #Betweenness
126summary(mod2)
127mod3<-lm(drs~clo+pstaff+vstaff+govt) #Closeness
128summary(mod3)
129AIC(mod,mod2,mod3) #Closeness wins!
130
132?lm
133?anova
134?AIC
135
136###########################################################
137#1.4 Graph correlation and bivariate QAP
138# Remember the Florentine families data
139data(florentine)
141gplot(flomarriage) # Examine marriage ties
142
143# Could the two be related?
144par(mfrow=c(1,2))
148title("Marriage Ties")
149par(mfrow=c(1,1))
150
151# Let's try a graph correlation
153
154# Why can't we use our previous permutation test?
156# take a look
157rmperm
158
159par(mfrow=c(1,2))
164par(mfrow=c(1,1))
165
166#now look at qaptest
167qaptest
168
169#and use it
171summary(qt) # Examine the results
172plot(qt) # Plot the QAP distribution
173
174# Testing against covariate effects
175wealth<-sapply(flomarriage%v%"wealth",rep,network.size(flomarriage))
176wealth
177wealthdiff<-abs(outer(flomarriage%v%"wealth",flomarriage%v%"wealth","-"))
178wealthdiff
179qt1<-qaptest(list(flomarriage,wealth),gcor,g1=1,g2=2)
180qt2<-qaptest(list(flomarriage,wealthdiff),gcor,g1=1,g2=2)
181summary(qt1) # Do wealthy families have more ties?
182summary(qt2) # Is there a wealth difference effect?
183
185?qaptest
186?gcor
187?outer
188?sapply
189?rep
190
191###########################################################
192#2.5 Network Regression
193
194# Combine the previous tests (takes a while to perform QAP test)
196summary(marriagefit) # Examine the results
197
198# Another example: we begin by preparing the response variable. We will use the Cheyenne
199# EMON in valued form, but need to recode the frequency data
200Y<-as.sociomatrix(emon[], "Frequency") # Extract frequencies
201Y[Y>0]<-5-Y[Y>0] # Now, higher -> more frequent
202
203# Extract some vertex attributes
204crk<-emon[]%v% "Command.Rank.Score" # Command rank
206
207# Form some predictor matrices (X variables)
208Xcr<-sapply(crk,rep,length(crk)) # Column effects for command rank
209Xcr
210Xsp<-outer(spon,spon,"!=") # Dyadic effect for type difference
211Xsp
212
213# Fit the model (takes a while to perform QAP test)
214cmfit<-netlm(Y,list(Xcr,Xsp))
215summary(cmfit) # Examine the results
216
217cmfitB<-netlm(Y,list(Xcr,Xsp),nullhyp="classical")
218summary(cmfitB) # In this example, pretty similar
219
221?outer
222?netlm
223
224###########################################################
225###########################################################
226##3 Graph Level Indices and Conditional Uniform Graph Tests
227#3.1 Permutation tests for GLI/graph-level covariate association
228
229# Here we consider the famous Sampson monastery data:
230par(mfrow=c(4,3),mar=c(2,1,4,1))
231for(i in 1:length(sampson))
232  gplot(sampson[[i]],displaylabel=TRUE,boxed.label=FALSE,main=names(sampson)[i])
233par(mfrow=c(1,1),mar=c(5,4,4,2)+.01)
234
235# Are positive relations more reciprocal (relative to density) than negative
236# ones? Let's try a simple permutation test:
237
238# Calculate reciprocity levels for each network
239r4<-grecip(sampson,measure="edgewise.lrr")
240r4
241
242# Categorize relationships by positive and negative
243ispos<-c(TRUE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,FALSE,TRUE,FALSE)
244
245# Calculate the empirical relationship
246obs<-sum(r4[ispos])-sum(r4[!ispos])
247obs # But what does this mean?
248
249# Run a permutation test
250reps<-vector()
251for(i in 1:1e4){
252  temp<-sample(ispos)
253  reps[i]<-sum(r4[temp])-sum(r4[!temp])
254}
255
256#calculate statistics and observe
257mean(reps>=obs) #Upper tail p-value
258mean(abs(reps)>=abs(obs)) # Two-sided version
259hist(reps)
260abline(v=obs,col=2,lwd=3) # Visualize it
261
262
263# We can look at transitivity as well. How does transitivity compare to density? (Log-odds method)
264log(gtrans(sampson)/gden(sampson))
265
266# Are positive relations more transitive (relative to density) than negative ones? Let's try another vector permutation test:
267ltr<-log(gtrans(sampson)/gden(sampson))
268obs<-sum(ltr[ispos])-sum(ltr[!ispos])
269reps<-vector()
270for(i in 1:1e4){
271  temp<-sample(ispos)
272  reps[i]<-sum(ltr[temp])-sum(ltr[!temp])
273}
274mean(reps>=obs) # Upper tail p-value
275mean(abs(reps)>=abs(obs)) # Two-sided version
276hist(reps)
277abline(v=obs,col=2,lwd=3) # Visualize it
278
279###########################################################
280##3.2 Comparing graphs via the triad census
281# Let's get the triad census for each network
283tc
284
285# Cool trick: two-way correspondence analysis of graphs and their triad census
286# scores (aka a "Faust diagram"). Networks here appear close to the triad
287# types they contain at excess frequency (distances are chi-squared based;
288# see references in ?corresp for more detail).
289library(MASS) # Requires the MASS package
291
292# What if this data were symmetric? We can symmetrize to illustrate.
294rownames(tc)<-names(sampson)
295plot(corresp(tc,nf=2)) # Plot undirected network/triad association
296
298?gtrans
300?corresp
301?symmetrize
302
303###########################################################
304#3.3 Simple univariate conditional uniform graph tests
305# The cug.test function provides a simple interface for univariate CUG tests.
306# Let's try testing some data on trade in complex manufactured goods to see if overall
307# activity (density) is greater then would be expected from size alone.
308cug.test(ctrade,gden) # Call cug.test with the gden (density) function
309
310# Is there more reciprocity than density would suggest? Let's see.
311cug.test(ctrade,grecip,cmode="edges") # Conditioning on edges, calling grecip
312
313# Given biases in density and reciprocity, we might look to see if there is a
314# bias in transitivity, too. This time, let's condition on all of the above.
316
317# We are not limited to simple commands. Let's try indegree centralization:
319# Note that we here must pass not only arguments to
320# cug.test, but also to centralization and degree!
321
322ct # Print the result
323plot(ct) # Can also plot it!
324
325###########################################################
326###########################################################
327##4 Multivariate Analysis of Graph Sets
328#4.1 Distance based methods: clustering and scaling
329
330# Start by calculating Hamming distances for the Sampson data
331samphd<-hdist(sampson)
332samphd
333
334# Now, try an MDS solution
335sampmds<-cmdscale(samphd)
336sampmds
337plot(sampmds,type="n") # Plot the results
338text(sampmds,label=names(sampson))
339
340# MDS suggests a three-cluster solution; let's try hclust
341samphc<-hclust(as.dist(samphd))
342plot(samphc,labels=names(sampson)) # Very clear solution
343rect.hclust(samphc,k=3)
344
345# Examine central graphs for each cluster
346sampcg<-gclust.centralgraph(samphc,k=3,sampson)
347par(mfrow=c(2,2))
348gplot(sampcg[1,,],main="Positive CG")
349gplot(sampcg[2,,],main="Negative CG")
350gplot(sampcg[3,,],main="Liking CG")
351par(mfrow=c(1,1))
352
353# More fun - can plot stats by cluster!
354gclust.boxstats(samphc,k=3,grecip(sampson,measure="edgewise.lrr"), names=c("Positive","Negative","Liking"), main="Edgewise LRR Reciprocity by Relational Type")
355gclust.boxstats(samphc,k=3,gtrans(sampson), names=c("Positive","Negative","Liking"), main="Transitivity by Relational Type")
356
358?hdist
359?cmdscale
360?hclust
361?rect.hclust
362?gclust.centralgraph
363?gdist.plotstats
364
365###########################################################
366#4.2 Network PCA
367# To begin, let's get the graph correlation matrix for the Sampson nets
368sampcor<-gcor(sampson)
369sampcor
370
371# Now, we compute the eigendecomposition (could also have used gcov above)
372sampeig<-eigen(sampcor)
373
374# Eigenvalues contain variance explained, to whit:
375evals<-sampeig\$value # Extract eigenvalues
376evals/sum(evals) # Variance explained
377
378# Show this as a scree plot
379barplot(evals/sum(evals),names.arg=1:length(evals))
380
385
386# Can rotate using varimax
388
389# Try plotting the first two components
390Basic Network Statistics with statnet Tutorial - Sunbelt 2012 - 8
392abline(h=0,v=0,lty=3)
395
396# Finally, extract scores
400
401#Visualize a score graph (not too helpful in this case!)
402coord<-gplot.layout.fruchtermanreingold(as.edgelist.sna(S1>0),NULL)
403gplot(S1!=0,edge.col=sign(S1)+3,coord=coord)
404
406?gcor
407?gcov
408?eigen
409?varimax
410?abline
411?arrows
412?sweep
413?gplot.layout
414
415###########################################################
416#4.3 Network CCA
417# For this one, we're going to use the country trade data. Somewhat perversely,
418# we're going to use yacca instead of netcancor (b/c this data set has
419# missing data which netcancor doesn't handle, and b/c yacca currently has
420# better visualizations).
421library(yacca) # You'll need to install this....
422# Perform a canonical correlation analysis between relations and attribute
423# differences
426
427# Visualize the output
429
431?netcancor
432?yacca
433?cca
434?gvectorize
435
436###########################################################
437#4.4 Studying Qualitative Dynamics with Network MDS
438# Johnson collected 'liking' scores for researchers working in the South Pole on a monthly basis over two years
439class(johnsonPolarY1)
440dim(johnsonPolarY1)
441johnsonPolarY1[1,,]
442
443# Visualize the data using weight or color to aid interpretation
444gplot(johnsonPolarY1)
445gplot(johnsonPolarY1[1,,],edge.lwd=johnsonPolarY1[1,,])
446coords<-gplot(johnsonPolarY1[1,,],edge.col=rainbow(10)[johnsonPolarY1[1,,]], vertex.col="black")
448for(i in 1:8)
449  gplot(johnsonPolarY1[i,,],edge.col=rainbow(10)[johnsonPolarY1[i,,]],
450        vertex.col="black",coord=coords)
451
452# Calculate Hamming distances for each of the two Johnson data sets
453jp1d <- hdist(johnsonPolarY1)
454jp2d <- hdist(johnsonPolarY2)
455
456# Now try an MDS solution
457jp1mds<-cmdscale(jp1d)
458jp1mds
459plot(jp1mds,type="l",lty=2) # Plot the results
460text(jp1mds,label=rownames(johnsonPolarY1),font=2)
461jp2mds<-cmdscale(jp2d)
462jp2mds
463plot(jp2mds,type="l",lty=2) # Plot the results
464text(jp2mds,label=rownames(johnsonPolarY2),font=2)