Skip to content

Commit

Permalink
progress subgraphs vignette
Browse files Browse the repository at this point in the history
  • Loading branch information
rsbivand committed Sep 6, 2024
1 parent 01e5973 commit 8a95820
Show file tree
Hide file tree
Showing 20 changed files with 199 additions and 6 deletions.
2 changes: 1 addition & 1 deletion R/summary.nb.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ print.nb <- function(x, ...) {
cat("Average number of links:", mean(c.nb), "\n")
if(any(c.nb == 0)) cat(length(c.nb[c.nb == 0]), " region",
ifelse(length(c.nb[c.nb == 0]) < 2L, "", "s"), " with no links:\n",
paste(strwrap(paste(regids[which(c.nb == 0)], collapse=" ")),
paste(strwrap(paste(regids[which(c.nb == 0)], collapse=", ")),
collapse="\n"), "\n", sep="")
nc <- 0
if (!is.null(attr(x, "ncomp"))) {
Expand Down
Binary file added inst/etc/shapes/GB_2024_Wales_50m.gpkg.zip
Binary file not shown.
Binary file added inst/etc/shapes/GB_2024_southcoast_50m.gpkg.zip
Binary file not shown.
Binary file removed inst/etc/shapes/bhicv.gpkg
Binary file not shown.
Binary file added inst/etc/shapes/bhicv.gpkg.zip
Binary file not shown.
Binary file removed inst/etc/shapes/california.gpkg
Binary file not shown.
Binary file added inst/etc/shapes/california.gpkg.zip
Binary file not shown.
4 changes: 3 additions & 1 deletion man/bhicv.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
%%\format{}
%%\details{}
\examples{
bh <- st_read(system.file("etc/shapes/bhicv.gpkg",
if (as.numeric_version(unname(sf_extSoftVersion()["GDAL"])) >= "3.7.0") {
bh <- st_read(system.file("etc/shapes/bhicv.gpkg.zip",
package="spdep")[1])
}
}
\keyword{data}% at least one, from doc/KEYWORDS
4 changes: 3 additions & 1 deletion man/mstree.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ mstree(nbw, ini = NULL)
%%\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }
\examples{
### loading data
bh <- st_read(system.file("etc/shapes/bhicv.gpkg",
if (as.numeric_version(unname(sf_extSoftVersion()["GDAL"])) >= "3.7.0") {
bh <- st_read(system.file("etc/shapes/bhicv.gpkg.zip",
package="spdep")[1], quiet=TRUE)
### data padronized
dpad <- data.frame(scale(as.data.frame(bh)[,5:8]))
Expand All @@ -70,6 +71,7 @@ plot(st_geometry(bh), border=gray(.5))
plot(mst.bh, st_coordinates(st_centroid(bh)), col=2,
cex.lab=.6, cex.circles=0.035, fg="blue", add=TRUE)
}
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{graphs}
Expand Down
4 changes: 3 additions & 1 deletion man/read.gwt2nb.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ nc1ia <- read.swmdbf2listw(fn, region.id=as.character(nc_sf$UniqueID),
style="B", zero.policy=TRUE)
nc1ia
all.equal(nc1i, nc1ia)
cal <- st_read(system.file("etc/shapes/california.gpkg", package="spdep")[1])
if (as.numeric_version(unname(sf_extSoftVersion()["GDAL"])) >= "3.7.0") {
cal <- st_read(system.file("etc/shapes/california.gpkg.zip", package="spdep")[1])
fn <- system.file("etc/misc/contiguity_myid.dbf", package="spdep")[1]
cal1 <- read.swmdbf2listw(fn, style="B")
cal1a <- read.swmdbf2listw(fn, region.id=as.character(cal$MYID), style="B")
Expand Down Expand Up @@ -115,4 +116,5 @@ all(isTRUE(all.equal(cal1a_1n$neighbours, cal1_1n_rt$neighbours)))
all(isTRUE(all.equal(cal1a_1n$weights, cal1_1n_rt$weights)))
}
}
}
\keyword{spatial}
4 changes: 3 additions & 1 deletion man/skater.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ skater(edges, data, ncuts, crit, vec.crit, method = c("euclidean",
\seealso{See Also as \code{\link{mstree}}}
\examples{
### loading data
bh <- st_read(system.file("etc/shapes/bhicv.gpkg",
if (as.numeric_version(unname(sf_extSoftVersion()["GDAL"])) >= "3.7.0") {
bh <- st_read(system.file("etc/shapes/bhicv.gpkg.zip",
package="spdep")[1], quiet=TRUE)
### data standardized
dpad <- data.frame(scale(as.data.frame(bh)[,5:8]))
Expand Down Expand Up @@ -198,6 +199,7 @@ all.equal(res1, pres1, check.attributes=FALSE)
invisible(set.coresOption(coresOpt))
}
}
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{cluster}
Expand Down
1 change: 1 addition & 0 deletions src/gearyw.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SEXP gearyw(SEXP nb, SEXP weights, SEXP x, SEXP card, SEXP zeropolicy,
PROTECT(ans = NEW_NUMERIC(n)); pc++;

for (i=0; i < n; i++) {
R_CheckUserInterrupt();
if (INTEGER_POINTER(card)[i] == 0) {
if (LOGICAL_POINTER(zeropolicy)[0] == TRUE)
NUMERIC_POINTER(ans)[i] = 0;
Expand Down
1 change: 1 addition & 0 deletions src/gsymtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ SEXP gsymtest(SEXP nb, SEXP glist, SEXP card)
SET_VECTOR_ELT(ans, 1, NEW_NUMERIC(1));

for (i=0; i < n; i++) {
R_CheckUserInterrupt();
icard = INTEGER_POINTER(card)[i];
for (j=0; j<icard; j++) {
k = INTEGER_POINTER(VECTOR_ELT(nb, i))[j];
Expand Down
1 change: 1 addition & 0 deletions src/jc.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SEXP jcintern(SEXP nb, SEXP weights, SEXP dum, SEXP card) {

sum1 = 0.0;
for (i=0; i < n; i++) {
R_CheckUserInterrupt();
sum = 0.0;
if (INTEGER_POINTER(card)[i] > 0) {
for (j=0; j<INTEGER_POINTER(card)[i]; j++) {
Expand Down
1 change: 1 addition & 0 deletions src/lagw.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ SEXP lagw(SEXP nb, SEXP weights, SEXP x, SEXP card, SEXP zeropolicy,
}

for (i=0; i < n; i++) {
R_CheckUserInterrupt();
if (INTEGER_POINTER(card)[i] == 0) {
if (LOGICAL_POINTER(zeropolicy)[0] == TRUE)
NUMERIC_POINTER(ans)[i] = 0;
Expand Down
1 change: 1 addition & 0 deletions src/listw2sn.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SEXP listw2sn(SEXP nbs, SEXP wts, SEXP card, SEXP ncard)
SET_VECTOR_ELT(ans, 2, NEW_NUMERIC(INTEGER_POINTER(ncard)[0]));

for (i=0, ii=0; i < n; i++) {
R_CheckUserInterrupt();
for (j=0; j < INTEGER_POINTER(card)[i]; j++) {
INTEGER_POINTER(VECTOR_ELT(ans, 0))[ii] = i+ROFFSET;
INTEGER_POINTER(VECTOR_ELT(ans, 1))[ii] =
Expand Down
9 changes: 9 additions & 0 deletions src/polypoly.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ SEXP polypoly(SEXP p1, SEXP n01, SEXP p2, SEXP n02, SEXP snap)
PROTECT(ans = NEW_INTEGER(1)); pc++;

for (i=0; (i < n1) && (k < 2); i++) {
R_CheckUserInterrupt();
x1 = NUMERIC_POINTER(p1)[i];
y1 = NUMERIC_POINTER(p1)[n1 + i];
for (j=0; (j < n2) && (k < 2); j++) {
Expand Down Expand Up @@ -191,6 +192,7 @@ int polypolyC(double *px1, double *py1, int n1, double *px2, double *py2,
double x1, x2, y1, y2, xd, yd;

for (i=0; (i < n1) && (k < crit); i++) {
R_CheckUserInterrupt();
x1 = px1[i];
y1 = py1[i];
for (j=0; (j < n2) && (k < crit); j++) {
Expand Down Expand Up @@ -244,6 +246,7 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,
cNRS = (int *) R_alloc((size_t) nn, sizeof(int));

for (i=0, li=0; i<nn; i++) {
R_CheckUserInterrupt();
card[i] = 0;
icard[i] = 0;
bb1[i] = NUMERIC_POINTER(bb)[i];
Expand All @@ -255,11 +258,13 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,
}

for (i=0; i<nn; i++) {
R_CheckUserInterrupt();
if (i == 0) cNRS[i] = 0;
else cNRS[i] = NRS[i-1] + cNRS[i-1];
}

for (i=0; i<uBound; i++) {
R_CheckUserInterrupt();
is[i] = 0;
jjs[i] = 0;
}
Expand All @@ -269,6 +274,7 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,

for (i=0, jj=0; i<nn; i++) {
nrsi = NRS[i];
R_CheckUserInterrupt();
for (j=0; j<nrsi; j++) {
plx[jj] = NUMERIC_POINTER(VECTOR_ELT(pl, i))[j];
ply[jj] = NUMERIC_POINTER(VECTOR_ELT(pl, i))[j+nrsi];
Expand All @@ -278,6 +284,7 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,
}

for (i=0; i<(nn-1); i++) {
R_CheckUserInterrupt();
li = length(VECTOR_ELT(i_findInBox, i));
nrsi = NRS[i];
for (j=0; j<li; j++) {
Expand Down Expand Up @@ -306,6 +313,7 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,
PROTECT(ans = NEW_LIST(nn)); pc++;

for (i=0; i<nn; i++) {
R_CheckUserInterrupt();
if (card[i] == 0) {
SET_VECTOR_ELT(ans, i, NEW_INTEGER(1));
INTEGER_POINTER(VECTOR_ELT(ans, i))[0] = 0;
Expand All @@ -328,6 +336,7 @@ SEXP poly_loop2(SEXP n, SEXP i_findInBox, SEXP bb, SEXP pl, SEXP nrs,
}

for (i=0; i<nn; i++) {
R_CheckUserInterrupt();
if ((li = length(VECTOR_ELT(ans, i))) > 1) {
for (j=0; j<li; j++)
icard[j] = INTEGER_POINTER(VECTOR_ELT(ans, i))[j];
Expand Down
1 change: 1 addition & 0 deletions src/symtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SEXP symtest(SEXP nb, SEXP card, SEXP verbose)

fstop = 0;
for (i=0; i < n; i++) {
R_CheckUserInterrupt();
icard = INTEGER_POINTER(card)[i];
for (j=0; j<icard; j++) {
flag = 0;
Expand Down
56 changes: 55 additions & 1 deletion vignettes/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -265,4 +265,58 @@ @article{JSSv091i01
pages = {1--30},
doi = {10.18637/jss.v091.i01},
url = {https://www.jstatsoft.org/v091/i01}
}
}

@incollection{bivand+portnov:04,
author = {Bivand, Roger and Portnov, B.A.},
editor = {Luc Anselin and Raymond J. G. M. Florax and S. J. Rey},
title = {Exploring spatial data analysis techniques using {R}: the case of observations with no neighbours},
booktitle = {Advances in Spatial Econometrics: Methodology, Tools, Applications},
year = {2004},
publisher = {Springer},
address = {Berlin},
doi = {10.1007/978-3-662-05617-2_6},
pages = {121--142}
}
@article{FRENISTERRANTINO201825,
title = {A note on intrinsic conditional autoregressive models for disconnected graphs},
journal = {Spatial and Spatio-temporal Epidemiology},
volume = {26},
pages = {25-34},
year = {2018},
issn = {1877-5845},
doi = {10.1016/j.sste.2018.04.002},
author = {Anna Freni-Sterrantino and Massimo Ventrucci and Håvard Rue}
}

@Article{Bivand2018,
author="Bivand, Roger and Wong, David W. S.",
title="Comparing implementations of global and local indicators of spatial association",
journal="TEST",
year="2018",
volume="27",
number="3",
pages="716--748",
doi="10.1007/s11749-018-0599-x"
}
@article{bivandetal13,
author = {Bivand, Roger and Hauke, Jan and Kossowski, Tomasz},
title = {Computing the {J}acobian in {G}aussian Spatial Autoregressive Models: An Illustrated Comparison of Available Methods},
journal = {Geographical Analysis},
volume = {45},
number = {2},
pages = {150--179},
year = {2013},
doi = {10.1111/gean.12008}
}

@article{smirnov+anselin:09,
title = "An {O(N)} parallel method of computing the {L}og-{J}acobian of the variable transformation for models with spatial interaction on a lattice",
journal = "Computational Statistics & Data Analysis",
volume = "53",
number = "8",
pages = "2980 - 2988",
year = "2009",
author = "O. Smirnov and L. Anselin",
doi = "10.1016/j.csda.2008.10.010"
}
116 changes: 116 additions & 0 deletions vignettes/subgraphs.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
---
title: "No-neighbour observation and subgraph handling"
author: "Roger Bivand"
output:
html_document:
toc: true
toc_float:
collapsed: false
smooth_scroll: false
toc_depth: 2
bibliography: refs.bib
vignette: >
%\VignetteIndexEntry{No-neighbour observation and subgraph handling}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Introduction

The `spdep` package has always been careful about disconnected graphs, especially where the disconnected observations are graph nodes with no neighbours, that is no incoming or outgoing edges. In `nb` neighbour objects, they are encoded as integer vectors of length 1 containing integer `0`, which is an invalid index on $[1, N]$, where $N$ is the observation count. Functions taking neighbour objects as arguments use the `zero.policy` argument to guide how to handle no-neighbour observations.

`spdep` has also had `n.comp.nb` to find the number of disjoint connected subgraphs in an `nb` object, contributed by Nicholas Lewin-Koh in 2001, showing in addition which observations belong to which subgraph. Obviously, no-neighbour observations are singleton graph nodes, but subgraphs are also troubling for spatial analysis, because there is no connection between the spatial processes in those subgraphs. The ripples in one pond cannot cross into a separate pond if they are not connected.

From `spdep` 1.3-1, steps began to raise awareness of the possibility that neighbour objects might be created that are disconnected in some way, mostly through warnings, and through the computation of subgraph measures by default. This vignette is intended to provide some background to these steps.


## No-neighbour observations

From the start, `nb` objects have recorded no-neighbour observations as an integer vector of unit length and value `0`, where neighbours are recorded as ID values between `1` and `N`, where `N` is the observation count. `print` and `summary` methods have always reported the presence of no-neighbour observations, and listed their IDs (or `region.id` values). If an `nb` object contains no-neighbour observations, the user has to decide whether to drop those observations, or if retained, what value to give its weights. The `zero.policy` argument uses zero as the value if TRUE, but if FALSE causes `nb2listw` to fail. The value of `zero.policy` in a call to functions like `nb2listw`, `subset.listw` or `mat2listw` creating `listw` objects representing sparse spatial weights matrices is added to the created object as an attribute, and used subsequently to pass through that choice to other functions. For example, `moran.test` takes the value of this attribute as default for its `zero.policy` argument:

```{r}
library(spdep)
args(moran.test)
```

If observation $i$ has no neighbours, its weights sum $\sum_{j=1}^N w_{ij} = 0$, as $w_{ij} = 0, \forall j$ (see discussion in @bivand+portnov:04). Its eigenvalue will also be zero, with consequences for analytical inference:

```{r}
eigen(0)$values
```
The `adjust.n` argument to measures of spatial autocorrelation is by default TRUE, and subtracts the count of singleton nodes from $N$ in an attempt to acknowledge the reduction in information available.

One way in which no-neighbour observations may occur is when they are islands, and neighbours are defined as polygon features with contiguous boundaries. This is clearly the case in @FRENISTERRANTINO201825, where Capraia and Giglio Isles are singleton nodes. Here we take Westminster constituencies for Wales used in the July 2024 UK general election.

```{r}
run <- as.numeric_version(unname(sf_extSoftVersion()["GDAL"])) >= "3.7.0"
```

The boundaries are taken from the Ordnance Survey Boundary-Line site, https://osdatahub.os.uk/downloads/open/BoundaryLine, choosing the 2024 Westminster constituencies (https://www.os.uk/opendata/licence), simplified using a tolerance of 50m to reduce object size, and merged with selected voting outcomes for constituencies in Great Britain https://electionresults.parliament.uk/countries/1, (https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/). Here, the subset for Wales is useful as we will see:

```{r, eval=run}
w50m <- st_read(system.file("etc/shapes/GB_2024_Wales_50m.gpkg.zip", package="spdep"))
```



```{r, eval=run}
(w50m |> poly2nb(row.names=as.character(w50m$Constituency)) -> nb_W_50m)
```
The two subgraphs are the singleton Ynys Môn and all the other 31 constituencies:

```{r, eval=run}
attr(nb_W_50m, "ncomp")$comp.id |>table() |> table()
```
The left map shows that Ynys Môn can be shown selecting by name, as a black border, and by the zero cardinality of its neighbour set, using `card`, filling the polygon. The right map shows the location of the island, known in English as Anglesey, north-west of the Welsh mainland, and with no neighbour links:

```{r, eval=run}
ynys_mon <- w50m$Constituency == "Ynys Môn"
pts <- st_point_on_surface(st_geometry(w50m))
opar <- par(mfrow=c(1, 2))
plot(st_geometry(w50m), border="grey75")
plot(st_geometry(w50m)[ynys_mon], add=TRUE)
plot(st_geometry(w50m)[card(nb_W_50m) == 0L], add=TRUE, border="transparent", col="wheat1")
plot(st_geometry(w50m), border="grey75")
plot(nb_W_50m, pts, add=TRUE)
par(opar)
```
From the maps, we can see that the island is close to two constituencies across the Afon Menai (Menai Strait in English), the three simplified polygons being less than 280m apart, measured between polygon boundaries:

```{r, eval=run}
dists <- st_distance(w50m[ynys_mon,], w50m[!ynys_mon,])
sort(dists)
```
Using a `snap` distance of 280m, we can join the island to its two obvious proximate neighbours:

```{r, eval=run}
(nb_W_50m_snap <- poly2nb(w50m, row.names=as.character(w50m$Constituency), snap=280))
```
```{r, eval=run}
plot(st_geometry(w50m), border="grey75")
plot(nb_W_50m_snap, pts, add=TRUE)
```
In this case, increasing `snap` from its default of 10mm (or close equivalents for geometries with known metrics; previously `sqrt(.Machine$double.eps)` `r print(sqrt(.Machine$double.eps))` in all cases) helps. This is not always going to be the case, but here the strait is narrow. If islands are much further offshore, other steps may be required, because a large `snap` distance will draw in extra neighbours for already connected observations. It is also possible that increasing the `snap` distance may fail to link islands if they are not considered candidate neighbours, that is if their extents (bounding boxes), buffered out by the `snap` value, do not intersect.

```{r, eval=run}
k2 <- knearneigh(pts, k=2)
k2$nn[which(ynys_mon),]
```


## Subgraphs

```{r, eval=run}
sc50m <- st_read(system.file("etc/shapes/GB_2024_southcoast_50m.gpkg.zip", package="spdep"))
```



## Unintentional disconnected graphs


## References

0 comments on commit 8a95820

Please sign in to comment.