Skip to content

Commit

Permalink
First commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown authored and unknown committed Jan 18, 2016
0 parents commit b572d1e
Show file tree
Hide file tree
Showing 24 changed files with 1,384 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
.\temp
.\test
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.Rproj.user
.Rhistory
.RData
12 changes: 12 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Package: spcreml
Type: Package
Title: Spatial Modelling using Composite REML
Version: 0.1
Date: 2016-01-08
Authors@R: person("Kohleth, Chia", email = "[email protected]", role = c("aut", "cre"))
Description: Spatial Modelling using Composite REML.
License: GPL-3
LazyData: TRUE
Imports:
geoR (>= 1.7-5.1),foreach (>= 1.4.3)
RoxygenNote: 5.0.1
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated by roxygen2: do not edit by hand

38 changes: 38 additions & 0 deletions R/calcABJB.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' calcAB, calcJB
#'

calcAB=function(pointsj,Y,coordsMat,theta,Xmat,cov.model,heter,...){
out=foreach(pointsj=pointsj,.packages="geoR",.export=c("genSigma"))%dopar%{
A=B=0
Sigmaj=genSigma(distM=dist(coordsMat[pointsj,]),theta = theta,cov.model = cov.model,addNugget=TRUE)
Yj=Y[pointsj]
Xj=Xmat[pointsj,]
A=A+crossprod(Xj,solve(Sigmaj,Xj))
B=B+crossprod(Xj,solve(Sigmaj,Yj))
list(A=A,B=B)
}
A=Reduce("+",lapply(out,function(x)x$A))
B=Reduce("+",lapply(out,function(x)x$B))
return(list(A=A,B=B))
}


calcJB=function(g2list,pointsj,Y,coordsMat,theta,Xmat,cov.model,heter,...){
out=foreach(g2=g2list,.packages="geoR",.export=c("genSigma"),.combine="+")%dopar%{
# g2=g2list[[1]]
j=pointsj[[g2[[1]]]]
j2=pointsj[[g2[[2]]]]
Xj=Xmat[j,]
Xj2=Xmat[j2,]
Sj=genSigma(distM = dist(coordsMat[j,]),theta = theta,cov.model = cov.model,addNugget = TRUE)
Sj2=genSigma(distM = dist(coordsMat[j2,]),theta = theta,cov.model = cov.model,addNugget = TRUE)
QXj=solve(Sj,Xj)
QXj2=solve(Sj2,Xj2)
# dist2set=function(jj,jj2)dist(coordsMat[c(jj,jj2),])
# Djj2=outer(j,j2,FUN = Vectorize(dist2set))
D0=as.matrix(dist(coordsMat[c(j,j2),]))
Djj2=D0[1:length(j),length(j)+(1:length(j2))]
covYjYj2=theta[1]*exp(-Djj2/theta[2])
crossprod(QXj,covYjYj2)%*%QXj2
}
}
47 changes: 47 additions & 0 deletions R/fitCReml.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Fit a spatial model by Composite REML
#'
#' @param form a formula that specifies the response and the fixed effect variables.
#' @param data a data.frame that contains the response variable and the fixed effect variables.
#' @param dS a list of functions to compute the first derivative of Sigma (dSigma/dtheta).
#' @param coordsMat a N by D coordinate matrix. D being the spatial dimension, typically 2 or 3.
#' @param init named vector of initial value for optimiation of \code{recl}. Must (at least) contain named elements \emph{sigma2} and \emph{phi}. Can also contain \emph{nugget} and \emph{kappa}.
#' @param poinstj A list of row numbers that indicates which row belongs to block j (block j is itself a union of block k and block l). See Details.
#' @param pointsjpair A list of pair integers from \code{1} to \code{length(pointsj)}. Each pair indicates which pairs of \code{pointjs} components should be bundled together in the variance calculation. See Details.
#' @param pointsk
#' @param cov.model character string that specifies the spatial covariance model. See \code{?geoR::cov.spatial}
#' @param lower Numeric vector of lower bound for the estimating parameters. Default to NULL (no lower bound).
#'


fitCReml=function(form,data,dS,coordsMat,init,pointsj,pointsjpair,pointsk,cov.model=cov.model,lower=NULL){

trend=lm(form,data=data,y=TRUE,x=TRUE)
Xmat=trend$x
Y=trend$y

t1=proc.time()
opt=optim(par = init,fn = recl,gr = gr,method = "L-BFGS-B",lower = lower,
control = list(fnscale=-1,trace=3),coordsMat=coordsMat,Y=Y,Xmat=Xmat,pointsj=pointsj,
cov.model=cov.model,dS=dS)
t2=proc.time()
AB=calcAB(pointsj = pointsj,Y = Y,coordsMat = coordsMat,theta = opt$par,Xmat = Xmat,cov.model = cov.model,heter = heter)
t3=proc.time()
betahat=with(AB,solve(A,B))
t4=proc.time()

list(opt$par,betahat)
JB=calcJB(g2list = pointsjpair,pointsj = pointsj,Y = Y,coordsMat = coordsMat,theta = opt$par,Xmat = Xmat,cov.model = cov.model)
t5=proc.time()
covbeta=solve(with(AB,A%*%solve(JB,A)))
t6=proc.time()
structure(c(opt,list(betahat=betahat,varbetahat=diag(covbeta),
form=form,time=t6-t1,
cov.model=cov.model,
pointsk=pointsk,
coordsMat=coordsMat,
Xmat=Xmat,
Y=Y,
optTime=t2-t1,ABtime=t3-t2,
JBtime=t5-t4)),class="spCReml")

}
44 changes: 44 additions & 0 deletions R/genSigma.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#' Generate the Covariance matrix Sigma.
#'
#' \code{genSigma} generates the covariance matrix.
#'
#' @param distM Either a distance matrix (result from \code{dist}), or a matrix of distances.
#' @param theta A vector of named covariance paramter. Must (at least) contain named element \emph{sigma2} and \emph{phi}. Other possible elements include \emph{nugget} and \emph{kappa}.
#' @param cov.model Argument that is passed to \code{geoR::cov.spatial}.
#' @param addNugget TRUE/FALSE. If true nugget is added to the covariance matrix.
#'

genSigma=function(distM,theta,cov.model,addNugget=TRUE){

if(!all(c("sigma2","phi")%in%names(theta)))stop("argument theta must contain named element 'sigma2' and 'phi'.")
if(inherits(distM,"matrix")){
if(addNugget&(nrow(distM)!=ncol(distM))){
stop("argument addNugget should only be TRUE if a symmetric distM is supplied.")
}
}
if(addNugget&is.na(theta["nugget"]))stop("theta does not contain the named element 'nugget', but addNugget=TRUE.")

# get kappa
if(cov.model%in%c("matern","powered.exponential","cauchy")){
kappa=theta["kappa"]
}else{
kappa=NA
}

Sigma=cov.spatial(obj = distM,
cov.model = cov.model,
cov.pars=theta[c("sigma2","phi")],
kappa=kappa)
Sigma=as.matrix(Sigma)

## add nugget only if computing covariance within 1 set of points (instead of covariance between 2 sets of points)
if(isSymmetric(Sigma)){
if(inherits(distM,"dist")){
Sigma=Sigma+diag(x=theta["nugget"]+theta["sigma2"],nrow=nrow(Sigma))
}else{
Sigma=Sigma+diag(x=theta["nugget"],nrow=nrow(Sigma))
}
}

return(Sigma)
}
65 changes: 65 additions & 0 deletions R/grad.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#' Computes the grad and hessian of \code{recl}.
#'
#' @param theta named vector of covariance parameter. Must (at least) contain named elements \emph{sigma2} and \emph{phi}. Can also contain \emph{nugget} and \emph{kappa}.
#' @param dS a list of functions to compute the first derivative of Sigma (dSigma/dtheta).
#' @param coordsMat a N by D coordinate matrix. D being the spatial dimension, typically 2 or 3.
#' @param Y the response vector
#' @param Xmat the design matrix (not the data.frame)
#' @param poinstj A list of row numbers that indicates which row belongs to block j (block j is itself a union of block k and block l). See Details.
#' @param cov.model character string that specifies the spatial covariance model. See \code{?geoR::cov.spatial}
#' @param gr TRUE/FALSE. Whether the grad vector should be returned.
#' @param hs TRUE/FALSE. Whether the Hessian matrix should be returned.

grHs=function(theta,dS,coordsMat,Y,Xmat,pointsj,cov.model="exp",gr=TRUE,hs=TRUE,...){
R=length(dS)
out=foreach(pointsj=pointsj,.packages=c("geoR"),.export=c("dist","traceWW","genSigma"))%dopar%{
u=rep(0,R)
H=matrix(0,nrow=R,ncol=R)
coordsMatj=coordsMat[pointsj,]
Dj=as.matrix(dist(coordsMatj))

Sigmaj=genSigma(distM = Dj,theta=theta,cov.model=cov.model,addNugget = TRUE)
Yj=Y[pointsj]
Xj=Xmat[pointsj,]
SinvX=solve(Sigmaj,Xj)
XSinvX=crossprod(Xj,solve(Sigmaj,Xj))

if(gr){
qj=solve(Sigmaj,Yj)-SinvX%*%solve(XSinvX,crossprod(SinvX,Yj))
}
for(r in 1:R){
dSdthetar=dS[[r]](Dj,theta,coordsM=coordsMatj)
Wjr=solve(Sigmaj,dSdthetar)-SinvX%*%solve(XSinvX,crossprod(SinvX,dSdthetar))
if(gr){
u[r]=u[r]-0.5*sum(diag(Wjr))+0.5*crossprod(qj,dSdthetar)%*%qj

}
if(hs){
for(s in r:R){
Wjs=solve(Sigmaj,dS[[s]](Dj,theta,coordsM=coordsMatj))-SinvX%*%solve(XSinvX,crossprod(SinvX,dS[[s]](Dj,theta,coordsM=coordsMatj)))
H[r,s]=H[s,r]=H[r,s]-0.5*traceWW(Wjr,Wjs)
}
}
}
list(H=H,u=u)
}
if(gr&hs)return(list(gr=Reduce("+",lapply(out,function(x)x$u)),
hs=Reduce("+",lapply(out,function(x)x$H))))
if(gr)return(Reduce("+",lapply(out,function(x)x$u)))
if(hs)return(Reduce("+",lapply(out,function(x)x$H)))

}

gr=function(theta,coordsMat,Y,Xmat,dS,pointsj,cov.model,...){
grHs(theta = theta,dS = dS,coordsMat = coordsMat,Y = Y,Xmat = Xmat,pointsj = pointsj,cov.model = cov.model,gr=TRUE,hs=FALSE)
}


Hs=function(theta,coordsMat,Y,Xmat,dS,pointsj,cov.model,...){
grHs(theta = theta,dS = dS,coordsMat = coordsMat,Y = Y,Xmat = Xmat,pointsj = pointsj,cov.model = cov.model,gr=FALSE,hs=TRUE)
}

traceWW=function(A,B){
stopifnot(ncol(A)==nrow(B))
sum(sapply(1:nrow(A),function(ii)A[ii,]%*%B[,ii]))
}
18 changes: 18 additions & 0 deletions R/hello.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Build and Reload Package: 'Ctrl + Shift + B'
# Check Package: 'Ctrl + Shift + E'
# Test Package: 'Ctrl + Shift + T'

hello <- function() {
print("Hello, world!")
}
33 changes: 33 additions & 0 deletions R/recl.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Calculate the Residual Composite log-Likelihood.
#'
#' \code{recl} calculates the residual composite log-likelihood.
#'
#' @param theta A named numeric vector containing the covariance parameters. It should (at least) have element \emph{sigma2} and \emph{phi}. Other possible elements are \emph{nugget} and \emph{kappa}.
#' @param coordsMat A N by D matrix of coordinates. D is the spatial dimension, typically 2 or 3.
#' @param Y the response vector
#' @param Xmat the design matrix (not the data.frame)
#' @param pointsj A list. Each list contains the row number of the data matrix (Y or Xmat or coordsMat) that correspond to the (k,l) block.
#' @param cov.model This is passed to \code{geoR::cov.spatial}

recl=function(theta,coordsMat,Y,Xmat,pointsj,cov.model="exp",...){
if(length(unique(c(nrow(coordsMat),length(Y),nrow(Xmat))))>1)stop("coordsMat, Y, and X must be of the same length (or have the same number of rows).")


recloglik=foreach(pointsj=pointsj,.packages=c("geoR","Matrix"),.combine="+",.export=c("dist","genSigma"))%dopar%{

coordsMatj=coordsMat[pointsj,]
Sigmaj=genSigma(distM = dist(coordsMatj),theta=theta,cov.model=cov.model,addNugget=TRUE)

Yj=Y[pointsj]
Xj=Xmat[pointsj,]

logdetS=determinant(Sigmaj,log=TRUE)$modulus
SinvX=solve(Sigmaj,Xj)
XSinvX=crossprod(Xj,solve(Sigmaj,Xj))
logdetXSinvX=determinant(XSinvX,logarithm = TRUE)$modulus
PYj=solve(Sigmaj,Yj)-SinvX%*%solve(XSinvX,crossprod(SinvX,Yj))

as.numeric(-(logdetS+logdetXSinvX+crossprod(Yj,PYj)))/2
}
return(recloglik)
}
13 changes: 13 additions & 0 deletions R/summary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#' Summary for spCReml object

summary.spCReml=function(obj){
b=obj$betahat
seb=sqrt(obj$varbetahat)
z=b/seb
p=pnorm(abs(z),0,1,lower.tail = FALSE)*2
out=data.frame(betahat=b,"se(betahat)"=seb,Z=z,"Pr(Z>|z|)"=p)
colnames(out)=c("betahat","se(betahat)","Z","Pr(|Z|>=z)")
out$stars=cut(out[,4],breaks = c(-1e-5,0.001,0.01,0.05,0.1,1),labels = c("***","**","*",".",""))
print(out,digits=3)
print("0 *** 0.001 ** 0.01 * 0.05 . 0.1 1")
}
97 changes: 97 additions & 0 deletions R/temp/checkRECL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
library(geoR)

N=500

sim=grf(N,cov.pars=c(0.5,3000),mean=5,nugget=0.3,xlim=c(0,100000),ylim=c(0,100000))
sim$borders=NULL
reml <- likfit(sim, ini=c(0.5, 4000), fix.nug = FALSE, lik.met = "REML")
summary(reml)

ml <- likfit(sim, ini=c(0.5, 4000), fix.nug = FALSE, lik.met = "ML")

plot(variog(sim))
lines(reml)

Ngrp=4
grp=kmeans(sim[[1]],Ngrp,nstart=100)
plot(sim[[1]],col=grp$cluster)
grplist=combn(1:Ngrp,2,simplify=FALSE)
pointsk=lapply(1:Ngrp,function(x)which(grp$cluster%in%x))
pointsj=lapply(grplist,function(x)which(grp$cluster%in%x))
g2list=list()
for(ii in 1:(Ngrp-1)){
for(jj in (ii+1):Ngrp){
if(length(intersect(grplist[[ii]],grplist[[jj]]))>0){
g2list=c(g2list,list(c(ii,jj)))
}
}
}
g2list=c(combn(1:length(grplist),2,simplify = FALSE),combn(1:length(grplist),3,simplify = FALSE))
simfit1=fitCLSM(form = data~1,data=list(data=sim$data),dS = dSexp,coordsMat = sim[[1]],
init = c(0.5,4000,0.1),pointsj=pointsj,pointsjpair=g2list,pointsk=pointsk,cov.model="exp",heter=FALSE,lower=c(1e-5,1e-5,1e-5))

format(data.frame("ml"=c(ml$cov.pars,ml$nugget,ml$beta,ml$beta.var),
"reml"=c(reml$cov.pars,reml$nugget,reml$beta,reml$beta.var),
"recl"=c(simfit1$par,simfit1$beta,simfit1$varbetahat),
"true"=c(0.5,3000,0.3,5,NA),row.names=c("s2","phi","tau2","beta","var(beta)")),scientific=FALSE)


#### kriging
bbox=apply(sim[[1]],2,range)
predgr=expand.grid(seq(bbox[1,1],bbox[2,1],l=50),seq(bbox[1,2],bbox[2,2],l=50))
Nk=list(c(2,3,4),c(1,3,4),c(1,2,4),c(1,2,3))
newcluster=kmeans(predgr,4,nstart = 100)
newpointsk=lapply(1:Ngrp,function(x)which(newcluster$cluster==x))

mlkgcl=krige.control(obj.model=ml)
mlkg=krige.conv(geodata=sim,locations=predgr,krige = mlkgcl)
remlkgcl=krige.control(obj.model=reml)
remlkg=krige.conv(geodata=sim,locations=predgr,krige = remlkgcl)

colnames(predgr)=colnames(simfit1$coordsMat)
newdf=data.frame(predgr,data=1)

simpred=predict.reclsm(fit = simfit1,newdf = newdf,newpointsk = newpointsk,pointsk = pointsk,
Nk =Nk,newcoordsMat = predgr)

plot(simpred,remlkg$predict)

plot(predgr,col=newcluster$cluster)
points(predgr[tempid,],col=6,pch=2)
points(sim[[1]],pch=3)

## try to use gstat
library(gstat)
data(meuse)
locs=meuse[rep(1,nrow(sim[[1]])),]
locs[,c(1:2)]=sim[[1]]
locs$z=sim[[2]]
coordinates(locs)=~x+y
data(meuse.grid)
newdat=meuse.grid[rep(1,nrow(predgr)),]
newdat[,c(1:2)]=predgr
gridded(newdat) = ~x+y
gstatkg=gstat::krige(z~1,locs,newdat,beta=simfit1$betahat[1],
model=vgm(simfit1$par[1],model="Exp",range=simfit1$par[2],nugget=simfit1$par[3]))

### now try large dataset
data(meuse)
locs=meuse[rep(1,nrow(sim[[1]])),]
locs[,c(1:2)]=sim[[1]]
locs$z=sim[[2]]
coordinates(locs)=~x+y
data(meuse.grid)
Bpredgr=expand.grid(seq(bbox[1,1],bbox[2,1],l=115),seq(bbox[1,2],bbox[2,2],l=115))
Bnewdat=meuse.grid[rep(1,nrow(Bpredgr)),]
Bnewdat[,c(1:2)]=Bpredgr
gridded(Bnewdat) = ~x+y
Bgstatkg=gstat::krige(z~1,locs,Bnewdat,
model=vgm(simfit1$par[1],model="Exp",range=simfit1$par[2],nugget=simfit1$par[3]))

reclkgcl=krige.control(obj.model=reml)
reclkgcl$beta=simfit1$betahat
reclkgcl$cov.pars=simfit1$par[1:2]
reclkgcl$nugget=simfit1$par[3]
geoRreclpred=krige.conv(geodata=sim,locations=predgr,krige = remlkgcl)

plot(remlkg$pred,geoRreclpred$pred)
Loading

0 comments on commit b572d1e

Please sign in to comment.