improve numeric stability

zhanxw · Feb 8, 2015 · fe1c16a · fe1c16a
1 parent e95d725
commit fe1c16a
Show file tree

Hide file tree

Showing 7 changed files with 47 additions and 13 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,7 @@
+2015-02-08  Xiaowei Zhan  <[email protected]>
+
+	* Improve numeric stability for linear mixed models
+
 2015-01-04  Xiaowei Zhan  <[email protected]>
 
 	* Improve speed and outputs of family-based models

diff --git a/Makefile b/Makefile
@@ -20,8 +20,10 @@ release: lib
 	$(MAKE) -C $(ROOT)/src release
 	$(MAKE) -C $(ROOT)/vcfUtils release
 
-debug: lib-dbg
+debug: debug.rvt debug.vcfUtil
+debug.rvt: lib-dbg
 	$(MAKE) -C $(ROOT)/src debug
+debug.vcfUtil: lib-dbg
 	$(MAKE) -C $(ROOT)/vcfUtils debug
 
 profile: lib-dbg

diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ Rvtests, which stands for Rare Variant tests, is a flexible software package for
 # Download
 
 Source files can be downloaded from [github](https://github.com/zhanxw/rvtests/archive/master.zip) or [github page](https://github.com/zhanxw/rvtests).
-Executable binary files (for Linux 64bit) can be downloaded from [here](https://github.com/zhanxw/rvtests/releases/download/v1.8.6/rvtests-20150104.tar.gz).
+Executable binary files (for Linux 64bit) can be downloaded from [here](https://github.com/zhanxw/rvtests/releases/download/v1.8.6/rvtests-20150208.tar.gz).
 
 
 # Quick Tutorial

diff --git a/regression/FamSkat.cpp b/regression/FamSkat.cpp
@@ -87,8 +87,6 @@ class FamSkat::FamSkatImpl{
         U.transpose();
 
     kin.close();
-
-
 #endif
     // std::ofstream p("P0");
     // p << P0;

diff --git a/regression/FastLMM.cpp b/regression/FastLMM.cpp
@@ -1,6 +1,7 @@
 #include "FastLMM.h"
 #include "EigenMatrix.h"
 #include "EigenMatrixInterface.h"
+
 #include "Eigen/Dense"
 #include "GSLMinimizer.h"
 #include "gsl/gsl_cdf.h" // use gsl_cdf_chisq_Q
@@ -38,6 +39,12 @@ class FastLMM::Impl{
     G_to_Eigen(mat_Xnull, &this->ux);
     G_to_Eigen(mat_y, &this->uy);
     this->lambda = kinshipS.mat;
+    // take absoluate value
+    // otherwise, small negative lambda may be instable
+    // e.g. lambd[i] = -0.00013, and delta = 8e-5, then (1/(lambda[i] + delta))
+    //      becomes extremely large negative values, and it affects results
+    this->lambda = this->lambda.cwiseAbs();
+
     const Eigen::MatrixXf& U = kinshipU.mat;
 
     // rotate
@@ -153,7 +160,7 @@ class FastLMM::Impl{
         altSigma2 = altSumResidual2 / (x.rows() - x.cols());
       }
       // 3. get new likelhood
-      // See 1.4 
+      // See 1.4
       double ret = 0;
       if (this->model == FastLMM::MLE) {
         ret = 1.0 * altUx.rows() * log( 2.0 * PI);
@@ -177,11 +184,19 @@ class FastLMM::Impl{
       // just return score test statistics
       Eigen::ArrayXf u_g_center;
       u_g_center = (U.transpose() *  (g.rowwise() - g.colwise().mean())).eval().array();
+#ifdef DEBUG
+      dumpToFile(U, "U");
+      dumpToFile(lambda, "lambda");
+      dumpToFile(g, "g");
+      dumpToFile(uResid, "uResid");
+      dumpToFile(u_g_center, "u_g_center");
+#endif
+
       this->Ustat = (  ( (u_g_center) *
                          (this->uResid).array()) /
                        (lambda.array() + delta)
                        ).sum() / this->sigma2;
-      // this->Vstat = (u_g_center.square() / (lambda.array() + delta)).sum() / this->sigma2 ;
+      // when there is no covariate: this->Vstat = (u_g_center.square() / (lambda.array() + delta)).sum() / this->sigma2 ;
       this->Vstat = ((u_g_center).matrix().transpose() * this->scaledK * (u_g_center.matrix()))(0, 0) / this->sigma2;
       if (this->Vstat > 0.0) {
         this->stat = this->Ustat * this->Ustat / this->Vstat;
@@ -221,7 +236,7 @@ class FastLMM::Impl{
     dumpToFile(u_g_center, "u_g_center");
     dumpToFile(u, "u");
     dumpToFile(v, "v");
-    
+
     {
       g = g.col(0);
       Eigen::ArrayXf u_g_center;
@@ -235,7 +250,7 @@ class FastLMM::Impl{
       fprintf(stderr, "Ustat = %g, Vstat = %g\n", Ustat, Vstat);
       fprintf(stderr, "U[0] = %g, V[0][0] = %g\n", u(0, 0), v(0, 0));
     }
-#endif    
+#endif
     return 0;
   }
   double getSumResidual2(double delta) {
@@ -245,8 +260,12 @@ class FastLMM::Impl{
     // Eigen::MatrixXf x = (this->lambda.array() + delta).sqrt().matrix().asDiagonal() * this->ux;
     // Eigen::MatrixXf y = (this->lambda.array() + delta).sqrt().matrix().asDiagonal() * this->uy;
     // this->beta = (x.transpose() * x).eval().ldlt().solve(x.transpose() * y);
-    this->beta = (ux.transpose() * (this->lambda.array() + delta).inverse().matrix().asDiagonal() * ux)
-                 .ldlt().solve(ux.transpose() * (this->lambda.array() + delta).inverse().matrix().asDiagonal() * uy);
+#ifdef DEBUG
+    dumpToFile(ux, "ux");
+    dumpToFile(uy, "uy");
+#endif
+    this->beta = (ux.transpose() * (this->lambda.array() + delta).abs().inverse().matrix().asDiagonal() * ux)
+                 .ldlt().solve(ux.transpose() * (this->lambda.array() + delta).abs().inverse().matrix().asDiagonal() * uy);
 
     double sumResidual2 = getSumResidual2(delta);
     if ( model == FastLMM::MLE) {
@@ -400,6 +419,17 @@ class FastLMM::Impl{
     EigenMatrix& b = *beta;
     b.mat = this->beta;
   }
+ private:
+  // define function to be applied coefficient-wise
+  template<typename Scalar>
+  struct  Ramp{
+    const Scalar operator()(const Scalar& x) const{
+      if (x > 0.)
+        return x;
+      else
+        return 0.;
+    }
+  };
  private:
   // Eigen::MatrixXf S;
   float sigma2;     // sigma2_g

diff --git a/src/Main.cpp b/src/Main.cpp
@@ -29,7 +29,7 @@
 
 Logger* logger = NULL;
 
-const char* VERSION = "20150104";
+const char* VERSION = "20150208";
 
 void banner(FILE* fp) {
   const char* string =
@@ -39,7 +39,7 @@ void banner(FILE* fp) {
       "   ...      Bingshan Li, Dajiang Liu          ...      \n"
       "    ...      Goncalo Abecasis                  ...     \n"
       "     ...      [email protected]                  ...    \n"
-      "      ...      January 2015                      ...   \n"
+      "      ...      February 2015                     ...   \n"
       "       ...      zhanxw.github.io/rvtests          ...  \n"
       "        .............................................. \n"
       "                                                       \n"

diff --git a/vcfUtils/vcf2kinship.cpp b/vcfUtils/vcf2kinship.cpp
@@ -896,7 +896,7 @@ int main(int argc, char** argv){
     fprintf(stdout, "Total [ %d ] variants are used to calculate chromosome X kinship matrix.\n",
             variantXUsed);
     if (numMaleHemiWrongCoding > 0.5 * numMaleHemiMissing ) {
-      fprintf(stdout, "NOTE: In hemizygous region, males have [ %d ] missing genotypes and [ %d ] of them are due to wrong coding. Please confirm male genotypes are coded corrected, in particular, do not code as as 0/1.\n", numMaleHemiMissing, numMaleHemiWrongCoding);
+      fprintf(stdout, "NOTE: In hemizygous region, males have [ %d ] missing genotypes and [ %d ] of them are due to wrong coding. Please confirm male genotypes are coded correctly, in particular, do not code them as '0/1'.\n", numMaleHemiMissing, numMaleHemiWrongCoding);
     }
   }