library(igraph)
library(network)
library(tidygraph)
library(ggraph)
library(tidyverse)
library(sna)
library(patchwork)6 Analysing Networks in R
From Networks to Insights: centrality, clustering, community detection, and network-level measures
Now that you can build basic networks, it’s time to analyse them. This chapter introduces the fundamental descriptive tools: centrality measures that identify important nodes, clustering coefficients and transitivity that capture local structure, community detection algorithms that partition networks, and global indices like density. We’ll also touch on random graph generation as a baseline for comparison.
Setup and Essentials
Loading packages
Importing and Creating Networks
Built-in network data
data(flo) ## Medici family network
flo Acciaiuoli Albizzi Barbadori Bischeri Castellani
Acciaiuoli 0 0 0 0 0
Albizzi 0 0 0 0 0
Barbadori 0 0 0 0 1
Bischeri 0 0 0 0 0
Castellani 0 0 1 0 0
Ginori 0 1 0 0 0
Guadagni 0 1 0 1 0
Lamberteschi 0 0 0 0 0
Medici 1 1 1 0 0
Pazzi 0 0 0 0 0
Peruzzi 0 0 0 1 1
Pucci 0 0 0 0 0
Ridolfi 0 0 0 0 0
Salviati 0 0 0 0 0
Strozzi 0 0 0 1 1
Tornabuoni 0 0 0 0 0
Ginori Guadagni Lamberteschi Medici Pazzi Peruzzi
Acciaiuoli 0 0 0 1 0 0
Albizzi 1 1 0 1 0 0
Barbadori 0 0 0 1 0 0
Bischeri 0 1 0 0 0 1
Castellani 0 0 0 0 0 1
Ginori 0 0 0 0 0 0
Guadagni 0 0 1 0 0 0
Lamberteschi 0 1 0 0 0 0
Medici 0 0 0 0 0 0
Pazzi 0 0 0 0 0 0
Peruzzi 0 0 0 0 0 0
Pucci 0 0 0 0 0 0
Ridolfi 0 0 0 1 0 0
Salviati 0 0 0 1 1 0
Strozzi 0 0 0 0 0 1
Tornabuoni 0 1 0 1 0 0
Pucci Ridolfi Salviati Strozzi Tornabuoni
Acciaiuoli 0 0 0 0 0
Albizzi 0 0 0 0 0
Barbadori 0 0 0 0 0
Bischeri 0 0 0 1 0
Castellani 0 0 0 1 0
Ginori 0 0 0 0 0
Guadagni 0 0 0 0 1
Lamberteschi 0 0 0 0 0
Medici 0 1 1 0 1
Pazzi 0 0 1 0 0
Peruzzi 0 0 0 1 0
Pucci 0 0 0 0 0
Ridolfi 0 0 0 1 1
Salviati 0 0 0 0 0
Strozzi 0 1 0 0 0
Tornabuoni 0 1 0 0 0
Creating network objects with igraph
The igraph package is now the standard for network analysis in R. It’s faster, more flexible, and better maintained than older packages.
# From adjacency matrix
g_flo <- graph_from_adjacency_matrix(flo, mode = "undirected", weighted = NULL)
# Quick inspection
g_floIGRAPH 7e3456f UN-- 16 20 --
+ attr: name (v/c)
+ edges from 7e3456f (vertex names):
[1] Acciaiuoli--Medici Albizzi --Ginori
[3] Albizzi --Guadagni Albizzi --Medici
[5] Barbadori --Castellani Barbadori --Medici
[7] Bischeri --Guadagni Bischeri --Peruzzi
[9] Bischeri --Strozzi Castellani--Peruzzi
[11] Castellani--Strozzi Guadagni --Lamberteschi
[13] Guadagni --Tornabuoni Medici --Ridolfi
[15] Medici --Salviati Medici --Tornabuoni
+ ... omitted several edges
# Create from edge list
edges <- tibble(
from = c(1, 1, 2, 2, 3),
to = c(2, 3, 3, 4, 4)
)
edges# A tibble: 5 × 2
from to
<dbl> <dbl>
1 1 2
2 1 3
3 2 3
4 2 4
5 3 4
g <- graph_from_data_frame(edges, directed = FALSE)
gIGRAPH 264c02d UN-- 4 5 --
+ attr: name (v/c)
+ edges from 264c02d (vertex names):
[1] 1--2 1--3 2--3 2--4 3--4
Inspecting igraph objects
Once we have an igraph object (i.e. a network) we can inspect it.
# Printing the igraph object name returns descriptive information about the network
class(g_flo)[1] "igraph"
g_floIGRAPH 7e3456f UN-- 16 20 --
+ attr: name (v/c)
+ edges from 7e3456f (vertex names):
[1] Acciaiuoli--Medici Albizzi --Ginori
[3] Albizzi --Guadagni Albizzi --Medici
[5] Barbadori --Castellani Barbadori --Medici
[7] Bischeri --Guadagni Bischeri --Peruzzi
[9] Bischeri --Strozzi Castellani--Peruzzi
[11] Castellani--Strozzi Guadagni --Lamberteschi
[13] Guadagni --Tornabuoni Medici --Ridolfi
[15] Medici --Salviati Medici --Tornabuoni
+ ... omitted several edges
To examine vertex attributes:
vertex_attr(g_flo)$name
[1] "Acciaiuoli" "Albizzi" "Barbadori" "Bischeri"
[5] "Castellani" "Ginori" "Guadagni" "Lamberteschi"
[9] "Medici" "Pazzi" "Peruzzi" "Pucci"
[13] "Ridolfi" "Salviati" "Strozzi" "Tornabuoni"
igraph::degree(g_flo) Acciaiuoli Albizzi Barbadori Bischeri Castellani
1 3 2 3 3
Ginori Guadagni Lamberteschi Medici Pazzi
1 4 1 6 1
Peruzzi Pucci Ridolfi Salviati Strozzi
3 0 3 2 4
Tornabuoni
3
We can see there are $name and $degree attributes. Access them using igraph syntax: V(igraph_name)$attribute_name.
# Name attribute
V(g_flo)$name [1] "Acciaiuoli" "Albizzi" "Barbadori" "Bischeri"
[5] "Castellani" "Ginori" "Guadagni" "Lamberteschi"
[9] "Medici" "Pazzi" "Peruzzi" "Pucci"
[13] "Ridolfi" "Salviati" "Strozzi" "Tornabuoni"
# Degree attribute
V(g_flo)$degree <- igraph::degree(g_flo)
V(g_flo)$degree [1] 1 3 2 3 3 1 4 1 6 1 3 0 3 2 4 3
g_floIGRAPH 7e3456f UN-- 16 20 --
+ attr: name (v/c), degree (v/n)
+ edges from 7e3456f (vertex names):
[1] Acciaiuoli--Medici Albizzi --Ginori
[3] Albizzi --Guadagni Albizzi --Medici
[5] Barbadori --Castellani Barbadori --Medici
[7] Bischeri --Guadagni Bischeri --Peruzzi
[9] Bischeri --Strozzi Castellani--Peruzzi
[11] Castellani--Strozzi Guadagni --Lamberteschi
[13] Guadagni --Tornabuoni Medici --Ridolfi
[15] Medici --Salviati Medici --Tornabuoni
+ ... omitted several edges
We can add new attributes to vertices in the same way:
V(g_flo)+ 16/16 vertices, named, from 7e3456f:
[1] Acciaiuoli Albizzi Barbadori Bischeri
[5] Castellani Ginori Guadagni Lamberteschi
[9] Medici Pazzi Peruzzi Pucci
[13] Ridolfi Salviati Strozzi Tornabuoni
V(g_flo)$fav_pizza <- rep(c("Margherita", "Pepperoni", "Vegetarian", "Hawaiian", "Quattro Formaggi"), times = 4)[1:16]
g_floIGRAPH 7e3456f UN-- 16 20 --
+ attr: name (v/c), degree (v/n), fav_pizza (v/c)
+ edges from 7e3456f (vertex names):
[1] Acciaiuoli--Medici Albizzi --Ginori
[3] Albizzi --Guadagni Albizzi --Medici
[5] Barbadori --Castellani Barbadori --Medici
[7] Bischeri --Guadagni Bischeri --Peruzzi
[9] Bischeri --Strozzi Castellani--Peruzzi
[11] Castellani--Strozzi Guadagni --Lamberteschi
[13] Guadagni --Tornabuoni Medici --Ridolfi
[15] Medici --Salviati Medici --Tornabuoni
+ ... omitted several edges
E(g_flo)+ 20/20 edges from 7e3456f (vertex names):
[1] Acciaiuoli--Medici Albizzi --Ginori
[3] Albizzi --Guadagni Albizzi --Medici
[5] Barbadori --Castellani Barbadori --Medici
[7] Bischeri --Guadagni Bischeri --Peruzzi
[9] Bischeri --Strozzi Castellani--Peruzzi
[11] Castellani--Strozzi Guadagni --Lamberteschi
[13] Guadagni --Tornabuoni Medici --Ridolfi
[15] Medici --Salviati Medici --Tornabuoni
[17] Pazzi --Salviati Peruzzi --Strozzi
[19] Ridolfi --Strozzi Ridolfi --Tornabuoni
Edge attributes work identically. Access them with E(igraph_name)$attribute_name:
# Add a new edge attribute
E(g_flo)$trust_level <- rep(c("high", "low", "medium", "high", "low"), times = 4)[1:20]
# View existing edge attributes
edge_attr(g_flo)$trust_level
[1] "high" "low" "medium" "high" "low" "high" "low"
[8] "medium" "high" "low" "high" "low" "medium" "high"
[15] "low" "high" "low" "medium" "high" "low"
# Access a specific edge attribute
E(g_flo)$trust_level [1] "high" "low" "medium" "high" "low" "high" "low"
[8] "medium" "high" "low" "high" "low" "medium" "high"
[15] "low" "high" "low" "medium" "high" "low"
You can also query attributes conditionally. For example, to find vertices with degree greater than 2
# 1. Create the attribute
V(g_flo)$degree <- igraph::degree(g_flo)
# 2. Now you can filter using the attribute
V(g_flo)[degree > 2]+ 9/16 vertices, named, from 7e3456f:
[1] Albizzi Bischeri Castellani Guadagni Medici
[6] Peruzzi Ridolfi Strozzi Tornabuoni
Network Description and Visualization
Basic network properties
vcount(g_flo) # number of vertices[1] 16
ecount(g_flo) # number of edges[1] 20
is_directed(g_flo)[1] FALSE
is_weighted(g_flo)[1] FALSE
Static visualisation
Base igraph plotting
plot(g_flo,
vertex.size = 8,
vertex.label.cex = 0.8,
edge.width = 1.5)ggraph for publication-quality plots
ggraph(g_flo, layout = 'fr') +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(size = 10, colour = 'steelblue') +
geom_node_text(aes(label = name), repel = F, size = 3) +
theme_graph()Working with Edges and Vertices
Adding and removing edges
g <- make_empty_graph(n = 5)
# Add edges by indexing
g <- g + edge(c(1, 2, 2, 3))
gIGRAPH a3d83d9 D--- 5 2 --
+ edges from a3d83d9:
[1] 1->2 2->3
Vertex and edge attributes
# Add vertex attributes
V(g_flo)$degree <- igraph::degree(g_flo)
V(g_flo)+ 16/16 vertices, named, from 7e3456f:
[1] Acciaiuoli Albizzi Barbadori Bischeri
[5] Castellani Ginori Guadagni Lamberteschi
[9] Medici Pazzi Peruzzi Pucci
[13] Ridolfi Salviati Strozzi Tornabuoni
# Add edge attributes (on weighted example)
g_weighted <- g_flo
E(g_weighted)$weight <- runif(ecount(g_flo))Centrality Measures
Degree centrality
Indicators of centrality assign numbers or rankings to nodes within a graph corresponding to their network position. Degree centrality is defined as the number of links incident upon a node (i.e., the number of ties that a node has).
deg <- igraph::degree(g_flo)
deg Acciaiuoli Albizzi Barbadori Bischeri Castellani
1 3 2 3 3
Ginori Guadagni Lamberteschi Medici Pazzi
1 4 1 6 1
Peruzzi Pucci Ridolfi Salviati Strozzi
3 0 3 2 4
Tornabuoni
3
Betweenness centrality
Betweenness centrality is a measure based on shortest paths in a graph. It measures how frequently a node appears on the shortest path between other nodes in the graph.
bet <- igraph::betweenness(g_flo)
bet Acciaiuoli Albizzi Barbadori Bischeri Castellani
0.000000 19.333333 8.500000 9.500000 5.000000
Ginori Guadagni Lamberteschi Medici Pazzi
0.000000 23.166667 0.000000 47.500000 0.000000
Peruzzi Pucci Ridolfi Salviati Strozzi
2.000000 0.000000 10.333333 13.000000 9.333333
Tornabuoni
8.333333
Closeness centrality
Closeness centrality of a node is the average length of the shortest path between the node and all other nodes in the graph. Thus the more central a node is, the closer it is to all other nodes.
clo <- igraph::closeness(g_flo)
clo Acciaiuoli Albizzi Barbadori Bischeri Castellani
0.02631579 0.03448276 0.03125000 0.02857143 0.02777778
Ginori Guadagni Lamberteschi Medici Pazzi
0.02380952 0.03333333 0.02325581 0.04000000 0.02040816
Peruzzi Pucci Ridolfi Salviati Strozzi
0.02631579 NaN 0.03571429 0.02777778 0.03125000
Tornabuoni
0.03448276
Eigenvector centrality
Eigenvector centrality is a measure of the influence of a node in a network. It assigns relative scores to all nodes in the network based on the concept that connections to high-scoring nodes contribute more to the score of the node in question than equal connections to low-scoring nodes.
eig <- igraph::eigen_centrality(g_flo)$vector
eig Acciaiuoli Albizzi Barbadori Bischeri Castellani
0.3071155 0.5669336 0.4919853 0.6572037 0.6019551
Ginori Guadagni Lamberteschi Medici Pazzi
0.1741141 0.6718805 0.2063449 1.0000000 0.1041427
Peruzzi Pucci Ridolfi Salviati Strozzi
0.6407743 0.0000000 0.7937398 0.3390994 0.8272688
Tornabuoni
0.7572302
Visualising centrality
ggraph(g_flo, layout = 'fr') +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(aes(size = deg), colour = 'steelblue', alpha = 0.8) +
geom_node_text(aes(label = name), repel = F, size = 3) +
scale_size(range = c(2, 10)) +
theme_graph() +
labs(size = "Degree")Visualising centrality
# Calculate centrality measures
V(g_flo)$degree <- igraph::degree(g_flo)
V(g_flo)$betweenness <- igraph::betweenness(g_flo)
V(g_flo)$eigenvector <- igraph::eigen_centrality(g_flo)$vector
# Create a common layout for consistency across plots
layout_fr <- layout_with_fr(g_flo)
p1 <- ggraph(g_flo, layout = layout_fr) +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(aes(size = degree), colour = 'steelblue', alpha = 0.8) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
scale_size(range = c(2, 10)) +
theme_graph() +
labs(title = "Degree Centrality", size = "Degree")
p2 <- ggraph(g_flo, layout = layout_fr) +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(aes(size = betweenness), colour = 'coral', alpha = 0.8) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
scale_size(range = c(2, 10)) +
theme_graph() +
labs(title = "Betweenness Centrality", size = "Betweenness")
p3 <- ggraph(g_flo, layout = layout_fr) +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(aes(size = eigenvector), colour = 'seagreen', alpha = 0.8) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
scale_size(range = c(2, 10)) +
theme_graph() +
labs(title = "Eigenvector Centrality", size = "Eigenvector")
(p1 | p2 | p3)Network-Level Indices
Density and clustering
Edge density measures what proportion of all possible connections actually exist in the network. A value of 1 means the network is complete (everyone connected to everyone); 0 means no edges at all. For the Florentine families, this tells us how tightly knit the merchant elite were.
Transitivity (the global clustering coefficient) captures the tendency for triangles to form: if A is connected to B and B to C, how likely is it that A connects directly to C? Values range from 0 to 1, where higher values indicate more clustering. This is often interpreted as evidence of in-group cohesion or information flow redundancy in the network.
edge_density(g_flo)[1] 0.1666667
transitivity(g_flo) # global clustering coefficient[1] 0.1914894
In this example, an edge density of 0.167 means only about one in six possible connections exist—the merchant elite were far from fully interconnected. In social networks, resource constraints (time, trust, capital) limit how many relationships any actor can maintain.
The transitivity of 0.191 is notably similar to the edge density, which suggests the network lacks strong clustering. If the families formed tight cliques—say, through repeated intermarriage within regional or trade blocs—we’d expect transitivity to substantially exceed density.
Reciprocity
# Create a directed example
g_dir <- graph_from_data_frame(
tibble(from = c(1, 2, 2, 3), to = c(2, 1, 3, 2)),
directed = TRUE
)
reciprocity(g_dir)[1] 1
Components and connectivity
igraph::components(g_flo)$membership
Acciaiuoli Albizzi Barbadori Bischeri Castellani
1 1 1 1 1
Ginori Guadagni Lamberteschi Medici Pazzi
1 1 1 1 1
Peruzzi Pucci Ridolfi Salviati Strozzi
1 2 1 1 1
Tornabuoni
1
$csize
[1] 15 1
$no
[1] 2
is_connected(g_flo)[1] FALSE
A component is a maximal subset of vertices where every node can reach every other node through some path. components() returns the number of components and their sizes. If is_connected() returns TRUE, the network has a single component; otherwise, it’s fragmented into disconnected subgroups. For the Florentine families, this reveals whether the entire merchant elite formed one integrated social system or split into separate factions with no bridges between them.
## Geodesic distances
distances(g_flo)[1:3, 1:3] Acciaiuoli Albizzi Barbadori
Acciaiuoli 0 2 2
Albizzi 2 0 2
Barbadori 2 2 0
diameter(g_flo)[1] 5
Geodesic distance is the shortest path length between two nodes. The distance matrix shows pairwise distances; the diameter is the longest shortest path in the network—the maximum “steps” needed to reach one node from any other. A small diameter indicates efficient information or resource flow across the network; a large diameter suggests bottlenecks or structural holes. For these families, diameter tells us whether influence could spread quickly through intermarriage chains or whether some families were isolated from broader networks.
Subgroups and Cohesion
k-cores
kc <- coreness(g_flo)
kc Acciaiuoli Albizzi Barbadori Bischeri Castellani
1 2 2 2 2
Ginori Guadagni Lamberteschi Medici Pazzi
1 2 1 2 1
Peruzzi Pucci Ridolfi Salviati Strozzi
2 0 2 1 2
Tornabuoni
2
A k-core is a maximal subgraph where every node has degree at least k within that subgraph. coreness() assigns each vertex its core number—the highest k for which it belongs to a k-core. Core numbers reveal the “nested” structure of a network: high-coreness nodes form a densely connected inner circle, while low-coreness nodes sit on the periphery. For the Florentine families, this identifies the most influential merchant families (high coreness) who maintained tight reciprocal ties, versus those with fewer connections.
Cliques
cliques(g_flo, min = 3)[[1]]
+ 3/16 vertices, named, from 7e3456f:
[1] Medici Ridolfi Tornabuoni
[[2]]
+ 3/16 vertices, named, from 7e3456f:
[1] Castellani Peruzzi Strozzi
[[3]]
+ 3/16 vertices, named, from 7e3456f:
[1] Bischeri Peruzzi Strozzi
A clique is a subset of vertices where every pair is connected; i.e. a complete subgraph. cliques(g_flo, min = 3) finds all groups of three or more families where all members are mutually connected. Cliques represent the tightest cohesive structures in the network and often indicate shared interests or formal alliances. However, cliques can overlap substantially, so they capture local cohesion rather than global community structure.
Communities
comm <- cluster_louvain(g_flo) # modularity-based communities
membership(comm) Acciaiuoli Albizzi Barbadori Bischeri Castellani
1 2 3 3 3
Ginori Guadagni Lamberteschi Medici Pazzi
2 2 2 1 1
Peruzzi Pucci Ridolfi Salviati Strozzi
3 4 1 1 3
Tornabuoni
1
modularity(comm)[1] 0.39875
?cluster_louvain()Communities are groups of vertices that are more densely connected internally than to the rest of the network. The Louvain algorithm optimises modularity—a measure of how well the partition maximises within-group edges relative to between-group edges. membership(comm) assigns each vertex to a community; modularity(comm) returns a score from 0 to 1, where higher values indicate stronger community structure. Unlike cliques, communities partition the entire network into non-overlapping groups, offering a global view of the network’s factional organisation.
ggraph(g_flo, layout = 'fr') +
geom_edge_link(edge_width = 0.5, edge_colour = 'grey70') +
geom_node_point(aes(colour = factor(membership(comm))), size = 5) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
theme_graph() +
theme(legend.position = 'bottom')Community detection
Community detection (or cluster analysis) seeks to find groups of nodes that are internally densely connected and externally sparsely connected—that is, communities or clusters. The distinction between a network with clear cluster structure and one without becomes apparent visually. Below are two examples.
A network with visible and intuitive cluster structure:
set.seed(42)
# Create three separate cliques
clique1 <- graph_from_edgelist(
expand.grid(1:15, 1:15)[expand.grid(1:15, 1:15)[, 1] <
expand.grid(1:15, 1:15)[, 2], ] |> as.matrix()
)
clique2 <- graph_from_edgelist(
expand.grid(16:30, 16:30)[expand.grid(16:30, 16:30)[, 1] <
expand.grid(16:30, 16:30)[, 2], ] |> as.matrix()
)
clique3 <- graph_from_edgelist(
expand.grid(31:45, 31:45)[expand.grid(31:45, 31:45)[, 1] <
expand.grid(31:45, 31:45)[, 2], ] |> as.matrix()
)
# Combine cliques with a few sparse between-cluster edges
g_clustered <- clique1 + clique2 + clique3
g_clusteredIGRAPH 81af053 D--- 90 315 --
+ edges from 81af053:
[1] 1-> 2 1-> 3 2-> 3 1-> 4 2-> 4 3-> 4 1-> 5 2-> 5 3-> 5
[10] 4-> 5 1-> 6 2-> 6 3-> 6 4-> 6 5-> 6 1-> 7 2-> 7 3-> 7
[19] 4-> 7 5-> 7 6-> 7 1-> 8 2-> 8 3-> 8 4-> 8 5-> 8 6-> 8
[28] 7-> 8 1-> 9 2-> 9 3-> 9 4-> 9 5-> 9 6-> 9 7-> 9 8-> 9
[37] 1->10 2->10 3->10 4->10 5->10 6->10 7->10 8->10 9->10
[46] 1->11 2->11 3->11 4->11 5->11 6->11 7->11 8->11 9->11
[55] 10->11 1->12 2->12 3->12 4->12 5->12 6->12 7->12 8->12
[64] 9->12 10->12 11->12 1->13 2->13 3->13 4->13 5->13 6->13
[73] 7->13 8->13 9->13 10->13 11->13 12->13 1->14 2->14 3->14
+ ... omitted several edges
set.seed(12)
n_edges_add <- ceiling(ecount(g_clustered) * 0.3)
random_edges <- sample(1:vcount(g_clustered), 2 * n_edges_add, replace = TRUE)
g_clustered <- add_edges(g_clustered, random_edges)
ggraph(g_clustered, layout = 'fr') +
geom_edge_link(edge_width = 0.2, edge_colour = 'grey70') +
geom_node_point(colour = 'steelblue', size = 3, alpha = 0.8) +
theme_graph() g_clusteredIGRAPH 066e3a3 D--- 90 410 --
+ edges from 066e3a3:
[1] 1-> 2 1-> 3 2-> 3 1-> 4 2-> 4 3-> 4 1-> 5 2-> 5 3-> 5
[10] 4-> 5 1-> 6 2-> 6 3-> 6 4-> 6 5-> 6 1-> 7 2-> 7 3-> 7
[19] 4-> 7 5-> 7 6-> 7 1-> 8 2-> 8 3-> 8 4-> 8 5-> 8 6-> 8
[28] 7-> 8 1-> 9 2-> 9 3-> 9 4-> 9 5-> 9 6-> 9 7-> 9 8-> 9
[37] 1->10 2->10 3->10 4->10 5->10 6->10 7->10 8->10 9->10
[46] 1->11 2->11 3->11 4->11 5->11 6->11 7->11 8->11 9->11
[55] 10->11 1->12 2->12 3->12 4->12 5->12 6->12 7->12 8->12
[64] 9->12 10->12 11->12 1->13 2->13 3->13 4->13 5->13 6->13
[73] 7->13 8->13 9->13 10->13 11->13 12->13 1->14 2->14 3->14
+ ... omitted several edges
# Detect communities
g_clustered <- as.undirected(g_clustered)
comm_clustered <- cluster_louvain(g_clustered)
V(g_clustered)$community <- igraph::membership(comm_clustered)
g_clusteredIGRAPH 91db7ae U--- 90 393 --
+ attr: community (v/n)
+ edges from 91db7ae:
[1] 1-- 2 1-- 3 2-- 3 1-- 4 2-- 4 3-- 4 1-- 5 2-- 5 3-- 5
[10] 4-- 5 1-- 6 2-- 6 3-- 6 4-- 6 5-- 6 1-- 7 2-- 7 3-- 7
[19] 4-- 7 5-- 7 6-- 7 1-- 8 2-- 8 3-- 8 4-- 8 5-- 8 6-- 8
[28] 7-- 8 1-- 9 2-- 9 3-- 9 4-- 9 5-- 9 6-- 9 7-- 9 8-- 9
[37] 1--10 2--10 3--10 4--10 5--10 6--10 7--10 8--10 9--10
[46] 1--11 2--11 3--11 4--11 5--11 6--11 7--11 8--11 9--11
[55] 10--11 1--12 2--12 3--12 4--12 5--12 6--12 7--12 8--12
[64] 9--12 10--12 11--12 1--13 2--13 3--13 4--13 5--13 6--13
+ ... omitted several edges
ggraph(g_clustered, layout = 'fr') +
geom_edge_link(edge_width = 0.2, edge_colour = 'grey70') +
geom_node_point(aes(colour = factor(community)), size = 3, alpha = 0.8) +
theme_graph() +
labs(title = "", colour = "Community")A network with weak or absent cluster structure:
set.seed(42)
# Create a random Erdős–Rényi network
g_random <- sample_gnp(90, 0.1)
ggraph(g_random, layout = 'fr') +
geom_edge_link(edge_width = 0.2, edge_colour = 'grey70') +
geom_node_point(colour = 'black', size = 3, alpha = 0.8) +
theme_graph()The clustering algorithms implemented in igraph are:
[1] "cluster_edge_betweenness" "cluster_fast_greedy"
[3] "cluster_fluid_communities" "cluster_infomap"
[5] "cluster_label_prop" "cluster_leading_eigen"
[7] "cluster_leiden" "cluster_louvain"
[9] "cluster_optimal" "cluster_spinglass"
[11] "cluster_walktrap"
Most of these optimise modularity—the fraction of edges within groups minus the expected fraction under random assignment. These range from greedy optimisation (fast but approximate) to spectral methods and information-theoretic approaches, each with different computational costs and sensitivity to network structure. cluster_louvain() offers a good balance of speed and quality for most applications, though cluster_leiden() addresses known limitations with smaller clusters.
Structural Equivalence
Two people are structurally equivalent if they have the exact same connections to the same people, even if they don’t know each other directly. Put simply, it is trying to identify “who plays the same role” in a network.
We use method = "invlogweight", which is a Inverse Log-Weight to weight connections. It suggests that sharing a connection with a “popular” person (a hub) is less telling than sharing a connection with a “lonely” person. If you and I both know the same person who has 100 friends, it might be a coincidence; if we both know the same person who only has one friend, we are likely very similar in the social structure.
## Correlation of profiles
se_dist <- similarity(g_flo, method = "invlogweight")
## Hierarchical clustering
hc <- hclust(as.dist(1 - se_dist))
plot(hc)Nodes that are joined together at the very bottom of the tree are the most “structurally equivalent.” As you move up the tree, the groups become broader and less similar.
# Create Index-Name data frame
flo_families <- data.frame(
Index = 1:vcount(g_flo),
Name = V(g_flo)$name
)
# View the table
print(flo_families) Index Name
1 1 Acciaiuoli
2 2 Albizzi
3 3 Barbadori
4 4 Bischeri
5 5 Castellani
6 6 Ginori
7 7 Guadagni
8 8 Lamberteschi
9 9 Medici
10 10 Pazzi
11 11 Peruzzi
12 12 Pucci
13 13 Ridolfi
14 14 Salviati
15 15 Strozzi
16 16 Tornabuoni
Random Graph Models
Erdős–Rényi
Each possible edge is included independently with probability p. Produces a network with roughly uniform randomness and low clustering.
g_er <- sample_gnp(n = 20, p = 0.1)
plot(g_er, main = "Erdős–Rényi (Gnp)", vertex.size = 20, vertex.label = NA, edge.color = "gray")Configuration model (fixed degree sequence)
Generates a random graph that preserves a specified degree sequence. Useful to test network properties while controlling for node degrees.
g_conf <- igraph::sample_degseq(c(3,3,2,2,2))
plot(g_conf, main = "Configuration Model", vertex.size = 20, vertex.label = NA, edge.color = "steelblue")Small-world (Watts–Strogatz)
Starts with a regular lattice and randomly rewires edges with probability p. Produces networks with high clustering and short path lengths.
g_ws <- sample_smallworld(dim = 1, size = 50, nei = 2, p = 0.1)
plot(g_ws, main = "Small-World (Watts–Strogatz)", vertex.size = 5, vertex.label = NA, edge.color = "orange")Scale-free (preferential attachment)
Nodes are added one by one, preferentially attaching to high-degree nodes. Produces networks with hubs and a power-law degree distribution.
g_sf <- sample_pa(n = 50, power = 1, directed = FALSE)
plot(g_sf, main = "Scale-Free (Preferential Attachment)", vertex.size = 6, vertex.label = NA, edge.color = "purple")ggraph(g_flo, layout = 'fr') +
geom_edge_link(edge_width = 0.2, edge_colour = 'grey70') +
geom_node_point(aes(colour = factor(fav_pizza)), size = 3, alpha = 0.8) +
theme_graph() +
labs(title = "", colour = "Community")# Assortativity test for favourite pizza
assortativity_nominal(g_flo, types = as.numeric(factor(V(g_flo)$fav_pizza)))[1] -0.07424961