## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
knitr::opts_chunk$set(cache=F, comment=NA)
# knitr::opts_chunk$set(tidy = "styler")
knitr::opts_chunk$set(tidy.opts=list(indent=3, width.cutoff=60))


## ----outhook, include=FALSE---------------------------------------------------
local({
  hook_output <- knitr::knit_hooks$get('output')
  knitr::knit_hooks$set(output = function(x, options) {
    if (!is.null(options$max.height)) options$attr.output <- c(
      options$attr.output,
      sprintf('style="max-height: %s;"', options$max.height)
    )
    hook_output(x, options)
  })
})


## ----dev, child = 'common/statnet-dev-team.md'--------------------------------


## ----project, child = 'common/statnet-project.md'-----------------------------


## ----install-ergmgp,eval=FALSE------------------------------------------------
# install.packages(c("ergmgp","sna","tsna","ndtv"))


## ----loadPackage--------------------------------------------------------------
library(ergmgp)
library(sna)
library(tsna)
library(ndtv)


## ----version------------------------------------------------------------------
packageVersion("ergmgp")


## ----first-egp-ctergm, tidy=TRUE, eval=TRUE, cache=TRUE-----------------------
set.seed(1331)
net <- network.initialize(10, directed = FALSE)  #Initialize the network
simbg <- simEGP(form = net ~ edges, coef = log(1.5/(9-1.5))/2, time = 100, 
    process = "CTERGM") #Run the simulation


## ----first-egp-ctergm-out, tidy=TRUE, eval=TRUE, cache=TRUE-------------------
class(simbg)     # What is sim?
simbg            # Examine the simulation output


## ----twogrp-egp-cstergm-setup, tidy=TRUE, eval=TRUE, cache=TRUE---------------
set.seed(1331)
net <- network(rgraph(50, tp=1.5/49, mode="graph"), directed = FALSE)
net %v% "x" <- sample(0:1, 50, replace = TRUE)


## ----twogrp-egp-cstergm-setup2, tidy=TRUE, eval=TRUE, cache=TRUE--------------
# Set things up
tgform <- list(                              # Create the formulas
  formation = net ~ edges + nodematch("x"),
  dissolution = net ~ edges + esp(0)
)
tgcoef <- list(
  formation = c(log(1.5/48.5), 3),
  dissolution = c(log(1/5), -3)
)


## ----twogrp-egp-cstergm-run, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="300px"----
# Run the simulation (this one takes a while!)
set.seed(1331)
simtg <- simEGP(form = tgform, coef = tgcoef, time = 100, process = "CSTERGM") 
simtg


## ----twogrp-egp-cstergm-plot, tidy=TRUE, eval=TRUE, cache=TRUE----------------
plot(simtg, vertex.col="x")   # Plot the simulated network


## ----second-egp-ctergm, tidy=TRUE, eval=TRUE, cache=TRUE,max.height="200px"----
set.seed(1331)
net <- network.initialize(10, directed = FALSE) 
simbg <- simEGPTraj(form = net ~ edges, coef = log(1.5/(9-1.5))/2, time = 100, 
    process = "CTERGM", trajectories = 100, statsonly = TRUE)


## ----second-egp-ctergm-out, tidy=TRUE, eval=TRUE, cache=TRUE------------------
class(simbg)         # What kind of animal is it?
length(simbg)        # How long is it?
simbg[[1]]           # Examine the first element


## ----second-egp-ctergm-md, tidy=TRUE, eval=TRUE, cache=TRUE-------------------
md <- sapply(simbg,function(z){z[,"edges"][2]/choose(10,2)*9})  # Mean degrees
mean(md)                                                        # Get the mean of means
hist(md, xlab = "Mean Degree", main = "MD Across Trajectories") # Plot the distribution


## ----egp-2r-run, tidy=TRUE, eval=TRUE, cache=TRUE-----------------------------
set.seed(1331)
n <- 150
net <- network.initialize(n, directed=FALSE)
sim2r <- simEGPTraj(net~edges+kstar(2)+nsp(1:2), coef=c(109-log(n),-25,-1.25,3.25), time=1000, checkpoints = 11, log.sampling = TRUE, process = "LERGM", verbose = TRUE) 


## ----egp-2r-viz, tidy=TRUE, eval=TRUE, cache=TRUE-----------------------------
par(mfrow=c(3,4), mar=c(0.1,0.1,2,0.1))
for(i in 1:12) 
  plot(sim2r[[i]], main = paste("Time =",round(sim2r[[i]]%n%"Time", 2)), mode="kamadakawai")


## ----first-egp-ctergm-hist, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="200px"----
set.seed(1331)
net <- network.initialize(10, directed = FALSE) 
simbg <- simEGP(form = net ~ edges, coef = log(1.5/(9-1.5))/2, time = 100, 
    process = "CTERGM", return.changetime = TRUE, return.history = TRUE)
list.network.attributes(simbg)


## ----first-egp-ctergm-hist-out, tidy=TRUE, eval=TRUE, cache=TRUE--------------
simbg %n% "LastChangeTime"      # Get matrix of last change times
head(simbg %n% "EventHistory")  # Examine event history


## ----first-egp-ctergm-hist-out2, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="200px"----
set.seed(1331)
simnd <- simEGP(form = net ~ edges, coef = log(1.5/(9-1.5))/2, time = 100, 
    process = "CTERGM", return.networkDynamic = TRUE)
simnd


## ----first-egp-ctergm-hist-out3, tidy=TRUE, eval=TRUE, cache=TRUE-------------
slice <- network.extract(simnd, at = 2*pi)  # Pull the state at a given moment
slice2 <- simnd %t% (2*pi)                  # Can also use the %t% operator for this
all(slice[,]==slice2[,])                    # The results should be the same


## ----history-egp-cstergm-run, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="200px"----
set.seed(1331)
net <- network(rgraph(50, tp=1.5/49, mode="graph"), directed = FALSE)
net %v% "x" <- sample(0:1, 50, replace = TRUE)

set.seed(1331)
simtgh <- simEGP(form = tgform, coef = tgcoef, time = 100, 
    process = "CSTERGM", return.history = TRUE) 
set.seed(1331)
simtgnd <- simEGP(form = tgform, coef = tgcoef, time = 100, 
    process = "CSTERGM", return.networkDynamic = TRUE)
set.seed(1331)
simtgcp <- simEGPTraj(form = tgform, coef = tgcoef, time = 100, 
    process = "CSTERGM", checkpoints = 10)


## ----history-egp-cstergm-get, tidy=TRUE, eval=TRUE, cache=TRUE----------------
# Quick helper function to update a network using toggle information
histAccum <- function(basenet, toggles) {
  ergm.godfather(basenet ~ edges, changes = toggles, end.network=TRUE)
}

# Find the network state at several checkpoints
cpts <- vector(mode="list",length=9)
cpts[[1]] <-net
for(i in 2:9){             # Walk through the checkpoints
  # Select the toggles we want
  togs <- (simtgh %n% "EventHistory")[ (simtgh %n% "EventHistory")[,1]<= 100*(i-1)/8 ,2:3]
  # Apply and recover the graph
  cpts[[i]] <- histAccum(net, togs)
}

# Plot the results
par(mfrow=c(3,3), mar=c(0.1,0.1,2,0.1))
for(i in 1:9)
  plot(cpts[[i]], vertex.col = "x", main = paste("Time",(i-1)/8*100))


## ----history-egp-cstergm-stats, tidy=TRUE, eval=TRUE, cache=TRUE--------------
attr(simtgcp,"stats")   # Obtain the statistics for each checkpoint


## ----history-egp-cstergm-stats2, tidy=TRUE, eval=TRUE, cache=TRUE-------------
summary(simtgcp ~ edges + kstar(2) + esp(1:3) + gwdegree(0.5, fixed= TRUE)) # Arbitrary example


## ----history-egp-cstergm-stats3, tidy=TRUE, eval=TRUE, cache=TRUE-------------
# Track evolution of edge and within-group edge counts
tog <- (simtgh%n%"EventHistory")[,2:3]    # Get the toggle set
stats <- ergm.godfather(net ~ edges + nodematch("x"), 
    changes= lapply(apply(tog,1,as.matrix,simplify=F),"t"), stats.start = TRUE)

# Plot edge counts over time
plot(c(0.0001,(simtgh%n%"EventHistory")[,1]), stats[,2], type="l", col =2, 
    xlab= "Time", ylab = "Edge Count", ylim=c(0.0001,160), log="x")
lines(c(0.0001,(simtgh%n%"EventHistory")[,1]), stats[,1]-stats[,2], col = 4)
legend("topright", lty=1, col=c(2,4), legend = c("Within-group","Between-group"))


## ----history-egp-cstergm-durations, tidy=TRUE, eval=TRUE, cache=TRUE----------
dur <- durations(simtgnd)  # Compute the durations
head(dur)                  # Note the censoring column

# Plot the duration distribution
plot(density(dur[,1]), log="x")


## ----history-egp-cstergm-frp, tidy=TRUE, eval=TRUE, cache=TRUE----------------
forward.reachable(simtgnd, v=1)                         # Whom can 1 reach over the whole period
forward.reachable(simtgnd, v=1, end=median(dur))        # Whom can 1 reach in a typical edge duration
forward.reachable(simtgnd, v=1, start=100-median(dur))  # Same at the end
forward.reachable(simtgnd, v=1, start=50, end=50+median(dur)) # Same in the middle

# How many vertices can an average vertex reach, at a random time, in a typical duration?
cnt <- sapply(1:150, function(z){
  st <- runif(1,0,100)
  length(forward.reachable(simtgnd, v=sample(1:50,1), start=st, end=st+median(dur)))
})
hist(cnt, xlab = "Number Reachable", main = "Forward Reachability Distribution")


## ----history-egp-cstergm-tp, tidy=TRUE, eval=TRUE, cache=TRUE-----------------
plot(tPath(simtgnd, v=1, start=50))                    # Plot paths from 1 to others
plot(tPath(simtgnd, v=1, end=50, direction="bkwd", type="latest"))  # Plot paths from others to 1


## ----history-egp-cstergm-tsna, tidy=TRUE, eval=TRUE, cache=TRUE---------------
# Examine transitivity through time
plot(tSnaStats(simtgnd, snafun="gtrans", time.interval=1),type="b", ylab="Transitivity")

# Examine the degree distribution over time
dd <- tSnaStats(simtgnd, snafun="degree", time.interval=100/9, gmode="graph")
dim(dd)  # This is a time x vertex matrix
par(mfrow=c(3,3))
for(i in 1:9)
  hist(dd[i,], xlab="Degree", main = paste("Time =",attr(dd,"tsp")[3]*i))


## ----history-egp-cstergm-ndtv, tidy=TRUE, eval=FALSE--------------------------
# simtgnd%n%"slice.par"<-list(start=0,end=10,aggregate.dur=0.0008, interval=0.1,rule="latest")
# render.d3movie(simtgnd, vertex.col="x")


## ----hazcalc, tidy=TRUE, eval=TRUE, cache=TRUE--------------------------------
data(florentine)
haz <- EGPHazard(flomarriage ~ edges + esp(0), coef = c(-1, -1), process = "LERGM")
head(haz)


## ----hazcalc-2, tidy=TRUE, eval=TRUE, cache=TRUE------------------------------
fnam <- network.vertex.names(flomarriage)      # Get the family names
ord <- order(haz[,"logHaz"], decreasing=TRUE)  # Order by hazard
# Get the top ten
cbind(fnam[haz[ord,1]],fnam[haz[ord,2]], haz[ord,"IsForm"],round(haz[ord,"logHaz"],2))[1:10,] 


## ----hazcalc-3, tidy=TRUE, eval=TRUE, cache=TRUE------------------------------
a <- function(z) (z-min(z))/diff(range(z))    # Convenience scaling function

# Create a matrix of color codes - blue = fast, red = slow
hm <- matrix(0,16,16)
hm[upper.tri(hm)] <- a(haz[,4])
hm[lower.tri(hm)] <- t(hm)[lower.tri(hm)]
cm <- matrix(hsv((hm)*0.6), 16, 16)

plot(flomarriage, edge.col=cm, displaylabels=TRUE)


## ----hazcalc-4, tidy=TRUE, eval=TRUE------------------------------------------
# Select out the nulls, and winnow them down
fhm <- hm
fhm <- fhm*(1-flomarriage[,])
fhm <- fhm > quantile(fhm,0.75)  # Pick everything in the upper quartile

# Plot the most likely candidates
gplot(fhm, edge.col=cm, gmode="graph", displaylabels=TRUE) # Use gplot to plot adjacency matrices


## ----rateest, tidy=TRUE, eval=TRUE, cache=TRUE--------------------------------
set.seed(1331)

# How much time, on average, to get 100 events?
EGPRateEst(flomarriage ~ edges + esp(0), coef = c(-1, -1), process = "LERGM",event.target=100)

# How many events, on average, in 10 time units?
EGPRateEst(flomarriage ~ edges + esp(0), coef = c(-1, -1), process = "LERGM",time.target=10)


## ----simdynam-fit, tidy=TRUE, eval=TRUE, cache=TRUE---------------------------
data(faux.desert.high)  # Load the data set
plot(faux.desert.high, vertex.col="grade")  # Take a quick look at the network, by grade

set.seed(1331)
dhfit <- ergm(faux.desert.high ~ edges + mutual + absdiff("grade") + nodefactor("race",base=5)
    + nodefactor("grade",base=3) + nodefactor("sex") + nodematch("race",diff=TRUE)
    + nodematch("grade", diff=TRUE) + nodematch("sex") +idegree(0:1) +odegree(0:1)
    + gwesp(0.1,fixed=TRUE), control=control.ergm(MCMC.interval=1e4, main.method="Stochastic",
    SA.nsubphases=5,init=c(log(0.038),rep(0,29))))
summary(dhfit)


## ----simdynam-setup, tidy=TRUE, eval=TRUE, cache=TRUE-------------------------
# Define the coefficients
dhdco <- log(-log(0.15)/6)       # Dissolution coefficient (15% survival at 6 months)
dhfco <- coef(dhfit)             # Formation coefficients (from ERGM)
dhfco[1] <- dhfco[1] + dhdco     #   Need to adjust the edge term for dissolution
dhco <- list(formation = dhfco, dissolution = dhdco)

# Define the formula (which is just the ERGM formula)
dhform <- formula(dhfit)


## ----simdynam-sim, tidy=TRUE, eval=TRUE, cache=TRUE---------------------------
set.seed(1331)
dhsim <- simEGP(dhform, coef=dhco, process = "CDCSTERGM", time=12, return.networkDynamic=TRUE)


## ----simdynam-movie, tidy=TRUE, eval=FALSE------------------------------------
# # Render "a year in the life," at weekly resolution
# dhsim%n%"slice.par"<-list(start=0,end=12,aggregate.dur=0.25, interval=0.25,rule="latest")
# render.d3movie(dhsim, vertex.col="grade", edge.lwd=2, mode="kamadakawai")


## ----simdynam-drama, tidy=TRUE, eval=TRUE, cache=TRUE-------------------------
# Compute bonpow scores over time, with rho<0; we use a value close to the maximum
# convergent value
dhbp <- tSnaStats(dhsim, snafun="bonpow", exponent=-1/Mod(eigen(dhsim[,])$val[1]), start=0,
    end=12, time.interval=0.25)

# Visualize the scores
tsteps<-seq(from=0, to=12, by=0.25)
gr<-dhsim%v%"grade"
plot(0,0,type="n",xlim=c(0,12), ylim=range(dhbp),ylab="BonPow Score", xlab="Time (Months)")
for(i in 1:NCOL(dhbp))
  lines(tsteps, dhbp[,i], col=hsv((gr[i]-7)/(12-7)*0.6,alpha=0.5))
legend("topright", fill=hsv((0:5)/5*0.6,alpha=0.5), legend=paste("Grade",7:12))


## ----simdynam-drama-2, tidy=TRUE, eval=TRUE, cache=TRUE-----------------------
# Plot each week against the next
powlast<-as.vector(dhbp[-NROW(dhbp),])
pownext<-as.vector(dhbp[-1,])
plot(powlast,pownext, cex=0.5, col=rgb(0,0,0,0.5), xlab="Week i", ylab = "Week i+1", 
    main = "BonPow Over Time")

# What's the overall first-order autocorrelation"
cor(powlast,pownext)


## ----simdynam-drama-3, tidy=TRUE, eval=TRUE, cache=TRUE-----------------------
# Plot mean BonPow scores 
plot(0,0,type="n",xlim=c(0,12), ylim=c(0,1.5),ylab="BonPow Score", xlab="Time (Months)")
for(i in 7:12)
  lines(tsteps, dhbp%*%(gr==i)/sum(gr==i), col=hsv((i-7)/(12-7)*0.6), lwd=2)
legend("topleft", fill=hsv((0:5)/5*0.6), legend=paste("Grade",7:12), bg="white")


## ----sexdynam-fit, tidy=TRUE, eval=TRUE, cache=TRUE---------------------------
# Create our synthetic network (we don't fit to it, but it is computationally important)
set.seed(1331)
snet <- as.network(rgraph(922,tp=1.8/921,mode="graph")[1:453,454:922], 
    bipartite=453, directed=FALSE)
snet%v%"sex" <- rep(c("M","F"),times=c(453,469))

# Fit an ERGM to the population, using the estimated sufficient statistics
set.seed(1331)
snfit <- ergm(snet ~ edges+degree(0,by="sex"), target.stats=c(408, 181, 281),
    control=control.ergm(MCMC.interval=1e5))
summary(snfit)


## ----sexdynam-setup, tidy=TRUE, eval=TRUE, cache=TRUE-------------------------
# Define the coefficients
sndco <- -log(8.3)               # Dissolution coefficient (mean survival time 8.3 months)
snfco <- coef(snfit)             # Formation coefficients (from ERGM)
snfco[1] <- snfco[1] + sndco     #   Need to adjust the edge term for dissolution
snco <- list(formation = snfco, dissolution = sndco)

# Define the formula (which is just the ERGM formula)
snform <- formula(snfit)


## ----sexdynam-sim, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="300px"-------
snstart <- simulate(snfit, control=control.simulate.ergm(MCMC.interval=1e6))
plot(snstart, vertex.col="sex", vertex.cex=0.5, mode="kamadakawai")

snform<-as.formula(paste0("snstart ~",as.character(snform)[3]))
snsim <- simEGP(snform, coef=snco, process="CDCSTERGM", time = 24, return.networkDynamic=TRUE)


## ----sexdynam-movie, tidy=TRUE, eval=FALSE, cache=TRUE------------------------
# # Render two years, in monthly intervals
# snsim%n%"slice.par"<-list(start=0,end=24,aggregate.dur=0.25, interval=1,rule="latest")
# render.d3movie(snsim, vertex.col="sex", edge.lwd=2, vertex.cex=0.5, mode="kamadakawai")


## ----sexdynam-ob, tidy=TRUE, eval=TRUE, cache=TRUE----------------------------
meanobsize<-function(z){
  cs<-network.extract(snsim,at=z)
  sum(component.dist(cs, connected="weak")$csize^2)/network.size(cs)
}

obrisk<-sapply(0:24,meanobsize)
plot(obrisk,xlab="Time (Months)", ylab="Mean Outbreak Size", type ="b")


## ----emon-init, tidy=TRUE, eval=TRUE, cache=TRUE------------------------------
data(emon)
enet<-network(symmetrize(emon[[7]]),directed=FALSE)
crs<-emon[[7]]%v%"Command.Rank.Score"
crs[is.na(crs)]<-0
enet%v%"CRS"<-crs
enet%v%"Loc"<-emon[[7]]%v%"Location"
enet%v%"Type"<-emon[[7]]%v%"Sponsorship"


## ----emon-ergm, tidy=TRUE, eval=TRUE, cache=TRUE------------------------------
set.seed(1331)
emfit<-ergm(enet~edges + nodecov("CRS") + nodefactor("Type") +nodematch("Loc")
    + gwesp(0.75,fixed=TRUE))
summary(emfit)


## ----emon-ergm-gof, tidy=TRUE, eval=TRUE, cache=TRUE--------------------------
set.seed(1331)
emgof<-gof(emfit)
par(mfrow=c(2,2))
plot(emgof)


## ----emon-sim, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="300px"-----------
# Create an empty copy of the EMON object
enull<-enet
enull[,]<-0

# Simulate multiple trajectories, starting with the empty graph
set.seed(1331)
etraj<-simEGPTraj(enull~edges + nodecov("CRS") + nodefactor("Type") +nodematch("Loc")
    + gwesp(0.75,fixed=TRUE), coef=coef(emfit), process="LERGM", events=150, 
    checkpoints= 25, trajectories=25)


## ----emon-movie, tidy=TRUE, eval=FALSE----------------------------------------
# # Animate the first trajectory, scaling vertices by CRS and coloring by location
# render.d3movie(networkDynamic(network.list=etraj[[1]]), vertex.cex=0.25+2*crs/max(crs),
#     vertex.col="Loc")


## ----emon-vis, tidy=TRUE, eval=TRUE, cache=TRUE, max.height="300px"-----------
par(mfrow=c(2,2))
boxplot(t(sapply(etraj,function(z){attr(z,"stats")[,"edges"]})), xlab="Events",
    ylab="Edges", main="Edges")
boxplot(t(sapply(etraj,function(z){attr(z,"stats")[,"nodecov.CRS"]})), xlab="Events", 
    ylab="CRS-weighted Edges", main="CRS-weighted Edges")
boxplot(t(sapply(etraj,function(z){attr(z,"stats")[,"nodefactor.Type.State"]})),
    xlab="Events", ylab="State Edges", main="State Edges")
boxplot(t(sapply(etraj,function(z){attr(z,"stats")[,"gwesp.fixed.0.75"]})),
    xlab="Events", ylab="GWESP", main="GWESP")