From 8e72fb522a00f1faf7b489ed1411cb1520e930f2 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sat, 12 Sep 2015 18:05:40 +0200
Subject: [PATCH 01/23] An optional MeatAxe package

---
 build/pkgs/meataxe/SPKG.txt               | 27 +++++++
 build/pkgs/meataxe/checksums.ini          |  4 +
 build/pkgs/meataxe/package-version.txt    |  1 +
 build/pkgs/meataxe/patches/IO_fixes.patch | 78 +++++++++++++++++++
 build/pkgs/meataxe/spkg-install           | 95 +++++++++++++++++++++++
 build/pkgs/meataxe/type                   |  1 +
 6 files changed, 206 insertions(+)
 create mode 100644 build/pkgs/meataxe/SPKG.txt
 create mode 100644 build/pkgs/meataxe/checksums.ini
 create mode 100644 build/pkgs/meataxe/package-version.txt
 create mode 100644 build/pkgs/meataxe/patches/IO_fixes.patch
 create mode 100755 build/pkgs/meataxe/spkg-install
 create mode 100644 build/pkgs/meataxe/type

diff --git a/build/pkgs/meataxe/SPKG.txt b/build/pkgs/meataxe/SPKG.txt
new file mode 100644
index 00000000000..9892003f3e2
--- /dev/null
+++ b/build/pkgs/meataxe/SPKG.txt
@@ -0,0 +1,27 @@
+= MeatAxe =
+
+== Description ==
+
+The MeatAxe is a set of programs for working with matrix representations
+over finite fields. Permutation representations are supported to some
+extent, too.
+
+The MeatAxe is developed for the UNIX operating system. Supported platforms
+include Linux (x86), SunOS/Solaris (Sparc), HP/UX, DEC OSF/1 (Alpha), and
+Windows NT 4.0 (x86, Alpha, PPC).
+
+== License ==
+
+The C Meat-Axe is free software: you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation, either version 2 of the License, or (at your option) any later
+version.
+
+== Upstream Contact ==
+
+Michael Ringe (mringe@math.rwth-aachen.de)
+
+== Special Update/Build Instructions ==
+
+The original upstream tarball was re-packaged, so that it unpacks into a single
+folder called meataxe-2.4.24
diff --git a/build/pkgs/meataxe/checksums.ini b/build/pkgs/meataxe/checksums.ini
new file mode 100644
index 00000000000..b4d4356ad4e
--- /dev/null
+++ b/build/pkgs/meataxe/checksums.ini
@@ -0,0 +1,4 @@
+tarball=meataxe-VERSION.tar.gz
+sha1=0aa4313cc430c78e058068feba805428ef2324aa
+md5=e0f384e37a69671c73c2904e4e69dc01
+cksum=3083268116
diff --git a/build/pkgs/meataxe/package-version.txt b/build/pkgs/meataxe/package-version.txt
new file mode 100644
index 00000000000..208b2a0070d
--- /dev/null
+++ b/build/pkgs/meataxe/package-version.txt
@@ -0,0 +1 @@
+2.4.24.p1
diff --git a/build/pkgs/meataxe/patches/IO_fixes.patch b/build/pkgs/meataxe/patches/IO_fixes.patch
new file mode 100644
index 00000000000..933908be6be
--- /dev/null
+++ b/build/pkgs/meataxe/patches/IO_fixes.patch
@@ -0,0 +1,78 @@
+Read and create library files in the directory given by MtxLibDir.
+
+The patch keeps a promise given by upstream.
+
+AUTHOR: Simon King 2015-09-18, simon.king@uni-jena.de
+
+diff --git a/src/maketabF.c b/src/maketabF.c
+index fa03eda..d7af83e 100644
+--- a/src/maketabF.c
++++ b/src/maketabF.c
+@@ -319,7 +319,7 @@ static void writeheader()
+     int i, j;
+ 
+     sprintf(filename,"p%3.3ld.zzz",Q);
+-    fd = SysFopen(filename,FM_CREATE);
++    fd = SysFopen(filename,FM_CREATE|FM_LIB);
+     if (fd == NULL)
+     {
+ 	perror(filename);
+diff --git a/src/os.c b/src/os.c
+index a7f4271..b07b971 100644
+--- a/src/os.c
++++ b/src/os.c
+@@ -227,25 +227,31 @@ FILE *SysFopen(const char *name, int mode)
+ 	MTX_ERROR1("Invalid file mode %d",mode);
+ 	return NULL;
+     }
+-    f = fopen(name,fmodes[m]);
+-    if (f != NULL) 
+-	return f;
+ 
+     /* Search library directory
+        ------------------------ */
+     if ((mode & FM_LIB) != 0) 
+     {
+-	strcpy(buf,MtxLibDir);
+-	strcat(buf,"/");
+-	strcat(buf,name);
+-	f = fopen(buf,fmodes[m]);
++        if (*MtxLibDir != 0)
++            {
++            strcpy(buf,MtxLibDir);
++            strcat(buf,"/");
++            strcat(buf,name);
++            f = fopen(buf,fmodes[m]);
++            }
++        else
++            f = fopen(name,fmodes[m]);
+     }
+-
++    else
++    {
++        f = fopen(name,fmodes[m]);
++    }
++    if (f != NULL)
++	return f;
+     /* Error handling
+        -------------- */
+     if (f == NULL && (mode & FM_NOERROR) == 0)
+-	MTX_ERROR1("%s: %S",name);
+-
++    MTX_ERROR1("%s: %S",name);
+     return f;
+ }
+ 
+diff --git a/src/zcv.c b/src/zcv.c
+index a9ad7a3..763c9fb 100644
+--- a/src/zcv.c
++++ b/src/zcv.c
+@@ -584,7 +584,7 @@ static int Init(int argc, const char **argv)
+     inpname = App->ArgV[0];
+     if (strcmp(inpname,"-"))
+     {
+-	src = SysFopen(inpname,FM_READ|FM_TEXT|FM_LIB);
++	src = SysFopen(inpname,FM_READ|FM_TEXT);
+ 	if (src == NULL)
+ 	{
+ 	    MTX_ERROR1("Cannot open %s",inpname);
diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
new file mode 100755
index 00000000000..7733e9e44cb
--- /dev/null
+++ b/build/pkgs/meataxe/spkg-install
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+if [ -z "$SAGE_LOCAL" ]; then
+    echo >&2 "SAGE_LOCAL undefined ... exiting"
+    echo >&2 "Maybe run 'sage --sh'?"
+    exit 1
+fi
+
+cd src
+
+for patch in ../patches/*.patch; do
+    [ -r "$patch" ] || continue  # Skip non-existing or non-readable patches
+    echo "Applying $patch"
+    patch -p1 <"$patch"
+    if [ $? -ne 0 ]; then
+        echo >&2 "Error applying '$patch'"
+        return 1
+    fi
+done
+
+## The following *could* be put into Makefile.conf
+
+# This is the place where arithmetic tables and some other input files are
+# searched by default.
+export MTXLIB="$DOT_SAGE/meataxe"
+# Directory where executables are installed.
+export MTXBIN="$SAGE_LOCAL/bin"
+# Default compiler flags
+export CFLAGS1="-std=gnu99 -O -Wall -fPIC"
+# Field size up to GF(256)
+export ZZZ=0
+
+# In principle, one should uncomment for field sizes up to GF(2^16).
+# But upstream doesn't provide the required sources.
+#export ZZZ=1
+
+# The following is just to make MeatAxe's Makefile happy
+touch Makefile.conf
+
+# We create a directory for the multiplication tables
+mkdir -p $MTXLIB
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating directory for multiplication tables."
+    exit 1
+fi
+
+## Install! Aparently MeatAxe would rebuild everything when
+## testing, and "make check" also installs. So, if a test
+## is requested then we do it in one go.
+
+if [ "x$SAGE_CHECK" = xyes ]; then
+    $MAKE check
+else
+    $MAKE
+fi
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error installing MeatAxe."
+    exit 1
+fi
+
+## Surprisingly, MeatAxe's Makefile does NOT install the binaries
+## in MTXBIN. Hence, we do it manually.
+
+mv bin/* "$MTXBIN"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe executables."
+    exit 1
+fi
+
+# We move the meataxe library to a permanent location
+mv tmp/libmtx.a "$SAGE_LOCAL/lib"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe library."
+    exit 1
+fi
+
+cp src/meataxe.h "$SAGE_LOCAL/include/"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe header."
+    exit 1
+fi
+
+# Are we supposed to install the documentation?
+if [ "x$SAGE_SPKG_INSTALL_DOCS" = xyes ] ; then
+    mkdir -p $SAGE_ROOT/local/share/doc/meataxe/
+    cp -r doc/* $SAGE_ROOT/local/share/doc/meataxe/
+    if [ $? -ne 0 ]; then
+        echo "Error copying documentation."
+        exit 1
+    else
+        echo "The documentation can be found in $SAGE_ROOT/local/share/doc/meataxe/"
+    fi
+fi
diff --git a/build/pkgs/meataxe/type b/build/pkgs/meataxe/type
new file mode 100644
index 00000000000..134d9bc32d5
--- /dev/null
+++ b/build/pkgs/meataxe/type
@@ -0,0 +1 @@
+optional

From c7d75fbf5f72e435dab9af5f5fc2e7cb253b9ba3 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Fri, 18 Sep 2015 09:37:27 +0200
Subject: [PATCH 02/23] Implement and use Strassen-Winograd matrix
 multiplication in MeatAxe

---
 .../StrassenWinogradImplementation.patch      | 1246 +++++++++++++++++
 .../patches/StrassenWinogradUsage.patch       |  359 +++++
 2 files changed, 1605 insertions(+)
 create mode 100644 build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
 create mode 100644 build/pkgs/meataxe/patches/StrassenWinogradUsage.patch

diff --git a/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch b/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
new file mode 100644
index 00000000000..43664cf6560
--- /dev/null
+++ b/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
@@ -0,0 +1,1246 @@
+Implement Strassen-Winograd multiplication in MeatAxe.
+
+We use the schedule from Douglas-Heroux-Slishman-Smith;
+see also Boyer-Pernet-Zhou, "Memory efficient scheduling of
+Strassen-Winograd's matrix multiplication algorithm",
+Table 1 (ISSAC 2009).
+
+AUTHOR: Simon King 2015-09-19, simon.king@uni-jena.de
+
+diff --git a/Makefile b/Makefile
+index b78e244..2ada31e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -88,6 +88,7 @@ LIB_OBJS=\
+ 	temap \
+ 	tkinfo vec2mat \
+ 	wgen \
++	window \
+ 	zcleanrow zcmprow zgap zpermrow \
+ 	zzz2 \
+ 	version
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 4f0a973..178b6cb 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -24,8 +24,8 @@
+ MTX_DEFINE_FILE_INFO
+ 
+ typedef unsigned char BYTE;
+-static int MPB = 0;		/* No. of marks per byte */
+-static int LPR = 0;		/* Long ints per row */
++int MPB = 0;		/* No. of marks per byte */
++int LPR = 0;		/* Long ints per row */
+ 
+ 
+ 
+@@ -646,7 +646,7 @@ PTR FfAddRow(PTR dest, PTR src)
+ 
+ 
+ /**
+- ** Add a part two rows.
++ ** Add a part of two rows.
+  ** This works like FfAddRow(), but the operation is performed only on a given range of
+  ** columns. Note that the working range is not specified as column indexes but in units of
+  ** long integers!
+@@ -707,7 +707,217 @@ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len)
+     return dest;
+ }
+ 
++/**
++ ** Subtract two rows.
++ ** This function subtracts src from dest. Field order and row size must have been set before.
++ ** @param dest The row to subtract from.
++ ** @param src The row to subtract.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRow(PTR dest, PTR src)
++{
++    register int i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++    {	
++#ifdef ASM_MMX
++    /* This assumes Intel with 4 bytes per long, but MMX implies Intel anyway.*/
++	__asm__(
++	"    pushl %ebx\n"
++	"    pushl %ecx\n"
++	"    pushl %edx\n"
+ 
++	"    movl 8(%ebp),%ecx\n"
++        "    movl 12(%ebp),%ebx\n"
++        "    movl LPR,%edx\n"
++        "    sarl $1,%edx\n"
++        "    je .SUBROW2\n"
++        "    .align 16\n"
++	".SUBROW1:\n"
++        "    movq (%ebx),%mm0\n"
++        "    addl $8,%ebx\n"
++        "    pxor (%ecx),%mm0\n"
++        "    movq %mm0,(%ecx)\n"
++        "    addl $8,%ecx\n"
++        "    decl %edx\n"
++        "    jne .SUBROW1\n"
++	".SUBROW2:\n"
++	"    popl %edx\n"
++	"    popl %ecx\n"
++	"    popl %ebx\n"
++	);
++#else
++	register long *l1 = (long *) dest;
++	register long *l2 = (long *) src;
++	for (i = LPR; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    if (x != 0) *l1 ^= x;
++	    l1++;
++	}
++#endif
++    }
++    else		/* any other characteristic */
++    {
++        FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++#ifdef ASM_MMX
++        register BYTE *p1 = dest;
++        register unsigned long *p2 = (unsigned long *) src;
++        for (i = LPR; i != 0; --i)
++        {
++            register unsigned long a;
++            if ((a = *p2++) != 0) {
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++            } else
++              p1 += 4;
++        }
++#else
++        register FEL *p1 = dest;
++        register FEL *p2 = src;
++        for (i = FfTrueRowSize(FfNoc); i != 0; --i)
++        {
++            register int x = *p2++;
++            if (x != 0) *p1 = mtx_tadd[*p1][table_inv[x]];
++            p1++;
++        }
++#endif
++    }
++    return dest;
++}
++
++
++/**
++ ** Subtract a part of two rows.
++ ** This works like FfSubRow(), but the operation is performed only on a given range of
++ ** columns. Note that the working range is not specified as column indexes but in units of
++ ** long integers!
++ ** @param dest The row to subtract from.
++ ** @param src The row to subtract.
++ ** @param first Number of long integers to skip.
++ ** @param len Number of long integers to add.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRowPartial(PTR dest, PTR src, int first, int len)
++{
++    register long i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++#ifdef ASM_MMX
++	__asm__("\n	movl 8(%ebp),%ecx\n"
++		"	movl 12(%ebp),%ebx\n"
++		"	movl 16(%ebp),%edx\n"
++		"       sall $2,%edx\n"
++		"       addl %edx,%ecx\n"
++		"       addl %edx,%ebx\n"
++		"       movl 20(%ebp),%edx\n"
++		"	sarl $1,%edx\n"
++		"	je .SUBROWPART_1\n"
++		"	.align 16\n"
++		".SUBROWPART_2:\n"
++		"	movq (%ebx),%mm0\n"
++		"	addl $8,%ebx\n"
++		"	pxor (%ecx),%mm0\n"
++		"	movq %mm0,(%ecx)\n"
++		"	addl $8,%ecx\n"
++		"	decl %edx\n"
++		"	jne .SUBROWPART_2\n"
++		".SUBROWPART_1:\n"
++	       );
++#else
++    {	register long *l1 = (long *) dest + first;
++	register long *l2 = (long *) src + first;
++	for (i = len; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    *l1 ^= x;
++	    l1++;
++	}
++    }
++#endif
++    else		/* any other characteristic */
++    {   FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++        register BYTE *p1 = dest + first * sizeof(long);
++        register BYTE *p2 = src + first * sizeof(long);
++        for (i = len*sizeof(long); i != 0; --i)
++        {
++            register int x = *p2++;
++            *p1 = mtx_tadd[*p1][table_inv[x]];
++            p1++;
++        }
++    }
++    return dest;
++}
++
++
++/**
++ ** Subtract a part of two rows.
++ ** The difference to FfSubRowPartial is that dest is replaced
++ ** by src-dest, not by dest-src.
++ ** @param dest The row to subtract.
++ ** @param src The row to subtract from.
++ ** @param first Number of long integers to skip.
++ ** @param len Number of long integers to add.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len)
++{
++    register long i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++#ifdef ASM_MMX
++	__asm__("\n	movl 8(%ebp),%ecx\n"
++		"	movl 12(%ebp),%ebx\n"
++		"	movl 16(%ebp),%edx\n"
++		"       sall $2,%edx\n"
++		"       addl %edx,%ecx\n"
++		"       addl %edx,%ebx\n"
++		"       movl 20(%ebp),%edx\n"
++		"	sarl $1,%edx\n"
++		"	je .SUBROWPART_1\n"
++		"	.align 16\n"
++		".SUBROWPART_2:\n"
++		"	movq (%ebx),%mm0\n"
++		"	addl $8,%ebx\n"
++		"	pxor (%ecx),%mm0\n"
++		"	movq %mm0,(%ecx)\n"
++		"	addl $8,%ecx\n"
++		"	decl %edx\n"
++		"	jne .SUBROWPART_2\n"
++		".SUBROWPART_1:\n"
++	       );
++#else
++    {	register long *l1 = (long *) dest + first;
++	register long *l2 = (long *) src + first;
++	for (i = len; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    *l1 ^= x;
++	    l1++;
++	}
++    }
++#endif
++    else		/* any other characteristic */
++    {   FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++        register BYTE *p1 = dest + first * sizeof(long);
++        register BYTE *p2 = src + first * sizeof(long);
++        for (i = len*sizeof(long); i != 0; --i)
++        {
++            register int x = *p2++;
++            *p1 = mtx_tadd[table_inv[*p1]][x];
++            p1++;
++        }
++    }
++    return dest;
++}
+ 
+ 
+ /**
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 819e88e..e2f5a84 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -107,6 +107,9 @@ extern int FfChar;		/**< Current characteristic */
+ extern FEL FfGen;		/**< Generator */
+ extern int FfNoc;		/**< Number of columns for row ops */
+ extern size_t FfCurrentRowSize;
++extern int FfCurrentRowSizeIo;
++extern int MPB;         /** No. of marks per byte */
++extern int LPR;         /** Long ints per row */
+ 
+ 
+ /* Arithmetic */
+@@ -125,6 +128,9 @@ int FfSetNoc(int noc);
+ void FfAddMulRow(PTR dest, PTR src, FEL f);
+ PTR FfAddRow(PTR dest, PTR src);
+ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len);
++PTR FfSubRow(PTR dest, PTR src);
++PTR FfSubRowPartial(PTR dest, PTR src, int first, int len);
++PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len);
+ PTR FfAlloc(int nor);
+ int FfCmpRows(PTR p1, PTR p2);
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv);
+@@ -519,6 +525,8 @@ int MatIsValid(const Matrix_t *m);
+ Matrix_t *MatLoad(const char *fn);
+ Matrix_t *MatMul(Matrix_t *dest, const Matrix_t *src);
+ Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff);
++Matrix_t *MatMulStrassen(Matrix_t *dest, const Matrix_t *A, const Matrix_t *B);
++void StrassenSetCutoff(size_t size);
+ long MatNullity(const Matrix_t *mat);
+ long MatNullity__(Matrix_t *mat);
+ Matrix_t *MatNullSpace(const Matrix_t *mat);
+diff --git a/src/window.c b/src/window.c
+new file mode 100644
+index 0000000..f374028
+--- /dev/null
++++ b/src/window.c
+@@ -0,0 +1,944 @@
++/* ========================== C MeatAxe =============================
++   window.c -  Matrix window operations and Strassen-Winograd multiplication
++
++   (C) Copyright 2015 Simon King, Institut fuer Mathematik,
++   FSU Jena, Germany  <simon.king@uni-jena.de>
++   This program is free software; see the file COPYING for details.
++   ================================================================== */
++
++#include <string.h>
++#include <stdlib.h>
++#include <inttypes.h>
++#include "meataxe.h"
++
++/* --------------------------------------------------------------------------
++   Local data
++   -------------------------------------------------------------------------- */
++
++MTX_DEFINE_FILE_INFO
++
++typedef unsigned char BYTE;
++
++typedef struct
++{
++  int Nor;                      /* #rows of the window */
++  size_t RowSize;               /* size of window rows in long integers */
++  Matrix_t *Matrix;             /* ambient matrix containing the window */
++  PTR ULCorner;                 /* Pointer to the upper left window corner */
++}
++    MatrixWindow_t;
++
++size_t cutoff = sizeof(long)/2;
++
++/** The divide-and-conquer approach is only done for
++ * matrices with at least "cutoff*MPB*sizeof(long)" rows which
++ * are formed by at least "cutoff" longs.
++ *
++ * The above rule means that the "critical matrices" are square.
++ **/
++void StrassenSetCutoff(size_t size)
++{   if (size)
++        cutoff = size;
++    else
++        cutoff = sizeof(long)/2;
++}
++
++/* ------------------------------------------------------------------
++
++   Allocation and deallocation of a matrix window
++
++   ------------------------------------------------------------------ */
++/**
++ * Note that the rowsize is given in long, not in byte. The reason is
++ * functions such as FfAddRowPartial or FfAddMapRowWindow internally
++ * operating on longs. By consequence, in the Strassen-Winograd
++ * multiplication algorithm, we have to divide our matrix rows
++ * into longs, not into bytes.
++ **/
++
++/* Allocation with initialisation */
++/* Create an empty matrix that is identical with the window. */
++/* fl is the field size, nor is the number of rows. rowsize is */
++/* the size of a row in longs. */
++MatrixWindow_t *WindowAlloc(int fl, int nor, size_t rowsize)
++{
++    MatrixWindow_t *out;
++    out = ALLOC(MatrixWindow_t);
++    if (out == NULL)
++    {
++        MTX_ERROR1("%E",MTX_ERR_NOMEM);
++        return NULL;
++    }
++    FfSetField(fl);
++    out->Matrix = MatAlloc(fl, nor, rowsize*sizeof(long)*MPB);
++    if (out->Matrix == NULL)
++    {
++        free(out);
++        MTX_ERROR1("%E",MTX_ERR_NOMEM);
++        return NULL;
++    }
++    out->ULCorner = out->Matrix->Data;
++    out->Nor = nor;
++    out->RowSize = rowsize;
++    return out;
++}
++
++/** WARNING: Only to be used if the surrounding matrix can be destroyed
++    Otherwise, just do free(m)! **/
++void WindowFree(MatrixWindow_t *m)
++{
++    if (m->Matrix != NULL)
++      {
++    MatFree(m->Matrix);
++      }
++    free(m);
++}
++
++/* ------------------------------------------------------------------
++ * Auxiliary / Debugging
++ ----------------------------------------------------------------- */
++
++void WindowShow(MatrixWindow_t *A)
++{
++long i,j;
++PTR p = A->ULCorner;
++FfSetNoc(A->Matrix->Noc);
++for (i=A->Nor; i>0; i--, FfStepPtr(&p))
++  {
++  for (j=0; j< (A->RowSize)*sizeof(long); j++)
++    printf("%3.3d ", (unsigned char)p[j]);
++  printf("\n");
++  }
++}
++
++/**
++ ** Overwrite the window by zeroes, but let the
++ ** rest of the ambient matrix untouched
++ **/
++
++void WindowClear(MatrixWindow_t *A)
++{
++register long i;
++register size_t rowsize = A->RowSize*sizeof(long);
++PTR p = A->ULCorner;
++FfSetNoc(A->Matrix->Noc);
++for (i=A->Nor; i>0; i--, FfStepPtr(&p))
++{ memset(p, FF_ZERO, rowsize); }
++}
++
++/**
++ ** Multiply a vector by a matrix window.
++ ** This function multiplies the vector @em row from the right by the matrix window
++ ** @em mat and adds the result into @em result.
++ ** The number of columns in both @em mat and @em result is determined by @em rowsize.
++ ** @attention @em result and @em row must not overlap. Otherwise the result is
++ ** undefined.
++ ** @param row The source vector (nor columns).
++ ** @param matrix A matrix window (nor by (rowsize*sizeof(long)*MPB)) of a matrix whose rowsize is FfCurrRowSize.
++ ** @param nor number of rows in the matrix window.
++ ** @param[out] result The resulting vector ((rowsize*sizeof(long)*MPB) columns).
++ ** @param rowsize number of longs forming a row of @em mat.
++ **/
++
++void FfAddMapRowWindow(PTR row, PTR matrix, int nor, PTR result, size_t rowsize)
++
++{
++    register int i;
++    register FEL f;
++    BYTE *m = (BYTE *) matrix;
++
++#ifdef DEBUG
++    if (result >= row && result < row + FfRowSize(nor))
++    MTX_ERROR("row and result overlap: undefined result!");
++    if (row >= result && row < result + (rowsize*sizeof(long)))
++    MTX_ERROR("row and result overlap: undefined result!");
++#endif
++
++    if (FfOrder == 2)       /* GF(2) is a special case */
++    {
++        register long *x1 = (long *) matrix;
++        register BYTE *r = (BYTE *) row;
++
++        for (i = nor; i > 0; ++r)
++        {
++            register BYTE mask;
++            if (*r == 0)
++            {
++                i -= 8;
++                x1 += 8 * LPR;  /* Skip 8 rows of the matrix window in the ambient matrix*/
++                continue;
++            }
++            for (mask = 0x80; mask != 0 && i > 0; mask >>= 1, --i)
++            {
++                if ((mask & *r) == 0)
++                {
++                    x1 += LPR;  /* Skip a single row */
++                    continue;
++                }
++
++#ifdef ASM_MMX
++__asm__("    pushl %ebx\n");
++__asm__("    movl %0,%%ebx" : : "g" (x1) );
++__asm__("    pushl %ecx\n"
++    "    pushl %edx\n"
++    "    movl 20(%ebp),%ecx\n"  /* result */
++    );
++__asm__ (
++        "    movl 24(%ebp),%edx\n"   /* this time, it is rowsize, not LPR */
++        "    sarl $1,%edx\n"
++        "    je .FASTXOR_1\n"
++        "    .align 16\n"
++    ".FASTXOR_2:\n"
++        "    movq (%ebx),%mm0\n"
++        "    addl $8,%ebx\n"
++        "    pxor (%ecx),%mm0\n"
++        "    movq %mm0,(%ecx)\n"
++        "    addl $8,%ecx\n"
++        "    decl %edx\n"
++        "    jne .FASTXOR_2\n"
++    ".FASTXOR_1:\n"
++    "    popl %edx\n"
++    "    popl %ecx\n");
++__asm__("    movl %%ebx,%0" : : "g" (x1) );
++__asm__("    popl %ebx\n"
++    );
++#else
++                {
++                    register long *x2 = (long *)result;
++                    register int k;
++                    for (k = rowsize; k; --k)
++                        *x2++ ^= *x1++;
++                    /* Now, x1 points to the first item
++                     * after the current line of the window.
++                     * We need to move it to the first position
++                     * of the next line of the window.
++                     */
++                    x1 += (LPR-rowsize);
++                }
++#endif
++            }
++        }
++    }
++    else                /* Any other field */
++    {
++        register BYTE *brow = (BYTE *) row;
++        register int pos = 0;
++        size_t l_rowsize = rowsize*sizeof(long);
++        for (i = nor; i > 0; --i)
++        {
++            f = mtx_textract[pos][*brow];
++            if (++pos == (int) MPB)
++            {
++                pos = 0;
++                ++brow;
++            }
++            if (f != FF_ZERO)
++            {
++                register BYTE *v = m;
++                register BYTE *r = result;
++                if (f == FF_ONE)
++                {
++                    register size_t k = l_rowsize;
++                    for (; k != 0; --k)
++                    {
++                        *r = mtx_tadd[*r][*v++];
++                        ++r;
++                    }
++                }
++                else
++                {
++                    register BYTE *multab = mtx_tmult[f];
++                    register size_t k = l_rowsize;
++                    for (; k != 0; --k)
++                    {
++                        if (*v != 0)
++                            *r = mtx_tadd[multab[*v]][*r];
++                        ++v;
++                        ++r;
++                    }
++                }
++            }
++            m += FfCurrentRowSize;  /* next row of window in the ambient matrix */
++        }
++    }
++}
++
++/** dest := left+right
++   left and right must be distinct, but one of them may coincide with dest -- under the assumption
++   that, in that case, the ambient matrices coincide as well. **/
++MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++  PTR x, result, tmp;
++  int i;
++
++  int lnoc, rnoc, dnoc;
++
++  FfSetField(left->Matrix->Field);
++  if (left->Matrix->Field != right->Matrix->Field || (left->Nor != right->Nor) || (left->RowSize != right->RowSize))
++    {
++      MTX_ERROR1("Windows cannot be added: %E", MTX_ERR_INCOMPAT);
++      return NULL;
++    }
++  size_t rowsize = left->RowSize;
++
++  lnoc = left->Matrix->Noc;
++  rnoc = right->Matrix->Noc;
++  dnoc = dest->Matrix->Noc;
++  /* We have to distinguish cases as to whether dest
++     is equal to either left or right */
++  result = dest->ULCorner;
++  if (left->ULCorner == dest->ULCorner)
++    {   /* we write into left */
++        x = right->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else if (right->ULCorner == dest->ULCorner)
++    {   /* we write into right */
++        x = left->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(lnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else
++    {   /* we need to copy left into dest first */
++        x = right->ULCorner;
++        tmp = left->ULCorner;
++        size_t l_rowsize = rowsize * sizeof(long);
++        for (i = left->Nor; i != 0; --i)
++        {
++            memcpy(result, tmp, l_rowsize);
++            FfSetNoc(lnoc);
++            FfStepPtr(&tmp);
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  return dest;
++}
++
++/** dest := left-right
++   left and right must be distinct, but one of them may coincide with dest -- under the assumption
++   that, in that case, the ambient matrices coincide as well.
++**/
++MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++  PTR x, result, tmp;
++  int i;
++  int lnoc, rnoc, dnoc;
++
++  FfSetField(left->Matrix->Field);
++  if (left->Matrix->Field != right->Matrix->Field || (left->Nor != right->Nor) || (left->RowSize != right->RowSize))
++    {
++      MTX_ERROR1("Windows cannot be subtracted: %E", MTX_ERR_INCOMPAT);
++      return NULL;
++    }
++  size_t rowsize = left->RowSize;
++
++  lnoc = left->Matrix->Noc;
++  rnoc = right->Matrix->Noc;
++  dnoc = dest->Matrix->Noc;
++  /* We have to distinguish cases as to whether dest
++     is equal to either left or right */
++  result = dest->ULCorner;
++  if (left->ULCorner == dest->ULCorner)
++    {   /* we write into left */
++        x = right->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfSubRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else if (right->ULCorner == dest->ULCorner)
++    {   /* we write into right */
++        x = left->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfSubRowPartialReverse(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(lnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else
++    {   /* we need to copy left into dest first */
++        x = right->ULCorner;
++        tmp = left->ULCorner;
++        size_t l_rowsize = rowsize * sizeof(long);
++        for (i = left->Nor; i != 0; --i)
++        {
++            memcpy(result, tmp, l_rowsize);
++            FfSetNoc(lnoc);
++            FfStepPtr(&tmp);
++            FfSubRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  return dest;
++}
++
++/**
++   Add left*right to dest.
++
++   It is assumed that "dest->Matrix" is allocated (with the correct field and dimensions as well), so that we
++   can write the result into it. Moreover, the chunk of memory pointed at by dest MUST be disjoint
++   from the chunks for left and right!
++
++   Dimensions are not tested!
++**/
++MatrixWindow_t *WindowAddMul(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++    PTR x, y, result;
++    long i;
++
++    FfSetField(left->Matrix->Field);
++    x = left->ULCorner;
++    y = right->ULCorner;
++    result = dest->ULCorner;
++
++    for (i = dest->Nor; i != 0; --i)
++    {
++      /* Set the noc of the surrounding matrix of the right factor,
++     which is assumed by zmaprow_window */
++      FfSetNoc(right->Matrix->Noc);
++      FfAddMapRowWindow(x, y, right->Nor, result, right->RowSize);
++      /* We want to step to the next line of the left factor */
++      FfSetNoc(left->Matrix->Noc);
++      FfStepPtr(&x);
++      /* We want to step to the next line of the result */
++      FfSetNoc(dest->Matrix->Noc);
++      FfStepPtr(&result);
++    }
++    /*
++      dest->RowSize = right->RowSize;
++      dest->Nor = left->Nor;
++    */
++    return dest;
++}
++
++inline void MatrixToWindow (MatrixWindow_t *out, const Matrix_t *M, long nor, long rowsize, PTR p)
++/* presumably M will be freed separately. Hence, use free(...) to free
++   the result of this function
++*/
++{
++  out->Matrix = M;
++  out->Nor = nor;
++  out->RowSize = rowsize;
++  out->ULCorner = p;
++}
++
++/**
++ ** Multiply matrix windows
++ ** This function multiplies @em A_win from the right by @em B_win and writes
++ ** the result into @em dest_win.
++ ** The matrix windows must be compatible for multiplication, i.e. they must be over
++ ** the same field, and the number of columns of @em A_win must be equal to the
++ ** number of rows of @em B_win.
++ ** Moreover, it is assumed that @em dest_win is allocated in the right dimensions.
++ ** Since parts of @em dest_win are used to store temporary results, it is essential
++ ** that @em dest_win initially is zero!
++ ** @param[out] dest_win Result.
++ ** @param A_win Left factor.
++ ** @param B_win Right factor
++ ** @return The function returns 0 on success and a nonzero value on error.
++ **/
++
++int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t *B_win)
++{
++  FfSetField(A_win->Matrix->Field);
++  int MPL = MPB*sizeof(long);
++  int full_nrow_cutoff = cutoff*MPL;
++  /* Determine the size of submatrices in divide-and-conquer */
++  /**
++   * Note that the rowsize is given in the unit "long".
++   * Generally we have trailing padding empty bytes. We have to cut
++   * so that two full blocks fit into the non-padded area. This is what we do:
++   * - We halve the number of rows of A (rounded down).
++   * - We halve the rowsize of B (rounded down) , since padding doesn't matter here.
++   * - We determine how many FULL longs fit into a *row* (of A) of B->Nor items.
++   *   Half of it (rounded down) gives the rowsize of A's submatrices.
++   * - From that rowsize, we obtain the corresponding number of rows of
++   *   B's submatrices.
++   **/
++  /*
++  printf("we start with A_win\n");
++  WindowShow(A_win);
++  */
++  int A_sub_nrows = A_win->Nor/2;
++  size_t B_sub_rowsize = B_win->RowSize/2;
++  size_t A_sub_rowsize = (B_win->Nor/MPL)/2;
++  int B_sub_nrows = A_sub_rowsize*MPL;
++  /*printf("A_sub_nrows %d\nA_subrowsize %d\nB_sub_nrows %d\nB_sub_rowsize %d\n", A_sub_nrows,A_sub_rowsize,B_sub_nrows,B_sub_rowsize);*/
++
++  /* If the submatrices were too small, we use school book multiplication */
++  if ((A_sub_nrows < full_nrow_cutoff) ||
++      (B_sub_nrows < full_nrow_cutoff) ||
++      (A_sub_rowsize < cutoff) ||
++      (B_sub_rowsize < cutoff))
++    {
++      /* The ambient matrix of dest_win is supposed to be empty. Thus, we add rather than overwrite */
++      /* printf("Classical for %d x %d and %d x %d\n", A_win->Nor, A_win->RowSize*MPB*sizeof(long), B_win->Nor, B_win->RowSize*MPB*sizeof(long));*/
++      WindowAddMul(dest_win, A_win, B_win);
++      return 0;
++    }
++  /* printf("Strassen step for %d x %d and %d x %d\n", A_win->Nor, A_win->RowSize*MPB*sizeof(long), B_win->Nor, B_win->RowSize*MPB*sizeof(long));*/
++  size_t B_sub_rowsize2 = B_sub_rowsize + B_sub_rowsize;
++  size_t A_sub_rowsize2 = A_sub_rowsize + A_sub_rowsize;
++  size_t B_sub_rowsize2b = B_sub_rowsize2*sizeof(long); /* size in byte */
++  size_t A_sub_rowsize2b = A_sub_rowsize2*sizeof(long);
++  int B_sub_nrows2 = B_sub_nrows + B_sub_nrows;
++  int A_sub_nrows2 = A_sub_nrows + A_sub_nrows;
++
++  Matrix_t *A, *B, *dest;
++  A = A_win->Matrix;
++  B = B_win->Matrix;
++  dest = dest_win->Matrix;
++
++  /* Because of rounding, there are stripes on the right
++   * and the lower boundary that are not part of the
++   * clean divide-and-conquer algorithm.
++   * */
++  int A_nrows_rem = A_win->Nor - A_sub_nrows2;
++  size_t A_rowsize_rem = A_win->RowSize - A_sub_rowsize2;
++
++  int B_nrows_rem = B_win->Nor - B_sub_nrows2;
++  size_t B_rowsize_rem = B_win->RowSize - B_sub_rowsize2;
++
++  /* ----------------------------------------------------
++   * Allocate temporary space.
++   * We use a schedule introduced by Douglas-Heroux-Slishman-Smith
++   * (see also Boyer-Pernet-Zhou, "Memory efficient scheduling of
++   * Strassen-Winograd's matrix multiplication algorithm", Table 1).
++     ---------------------------------------------------- */
++
++  MatrixWindow_t *X, *Y;
++  if (A_sub_rowsize>B_sub_rowsize)
++    {
++      X = WindowAlloc(A->Field, A_sub_nrows, A_sub_rowsize); }
++  else
++    {
++      X = WindowAlloc(A->Field, A_sub_nrows, B_sub_rowsize); }
++  if (X == NULL)
++  {  MTX_ERROR1("Error allocating a temporary window: %E",MTX_ERR_NOMEM);
++     return 1;
++  }
++  Y = WindowAlloc(A->Field, B_sub_nrows, B_sub_rowsize);
++  if (Y == NULL)
++    {
++      WindowFree(X);
++      MTX_ERROR1("Error allocating a temporary window: %E",MTX_ERR_NOMEM);
++      return 1;
++    }
++
++  /* Define the sub-windows of A, B and dest */
++  /*
++  printf("original windows\n");
++  printf("A\n");
++  WindowShow(A_win);
++  printf("B\n");
++  WindowShow(B_win);
++  printf("dest\n");
++  WindowShow(dest_win);
++  printf("scratch X\n");
++  WindowShow(X);
++  printf("scratch Y\n");
++  WindowShow(Y);
++  */
++  FfSetNoc(A->Noc);
++  MatrixWindow_t A00[1], A01[1], A10[1], A11[1], B00[1], B01[1], B10[1], B11[1];
++  MatrixWindow_t A_last_col[1], A_last_row[1];
++  MatrixWindow_t B_last_col[1], B_last_row[1], B_bulk[1];
++  MatrixWindow_t dest_last_col[1], dest_last_row[1], dest_bulk[1];
++  MatrixToWindow(A00, A, A_sub_nrows, A_sub_rowsize, A_win->ULCorner);
++  MatrixToWindow(A01, A, A_sub_nrows, A_sub_rowsize, (PTR)((char*)(A_win->ULCorner)+A_sub_rowsize*sizeof(long)));
++  MatrixToWindow(A10, A, A_sub_nrows, A_sub_rowsize, FfGetPtr(A_win->ULCorner, A_sub_nrows));
++  MatrixToWindow(A11, A, A_sub_nrows, A_sub_rowsize,
++          (PTR)((char*)(A_win->ULCorner)+(A_sub_nrows*FfCurrentRowSize+A_sub_rowsize*sizeof(long))));
++  /*
++  printf("A00\n");
++  WindowShow(A00);
++  printf("A01\n");
++  WindowShow(A01);
++  printf("A10\n");
++  WindowShow(A10);
++  printf("A11\n");
++  WindowShow(A11);
++  */
++  FfSetNoc(B->Noc);
++  MatrixToWindow(B00, B, B_sub_nrows, B_sub_rowsize, B_win->ULCorner);
++  MatrixToWindow(B01, B, B_sub_nrows, B_sub_rowsize, (PTR)((char*)(B_win->ULCorner)+B_sub_rowsize*sizeof(long)));
++  MatrixToWindow(B10, B, B_sub_nrows, B_sub_rowsize, FfGetPtr(B_win->ULCorner, B_sub_nrows));
++  MatrixToWindow(B11, B, B_sub_nrows, B_sub_rowsize,
++          (PTR)((char*)(B_win->ULCorner)+(B_sub_nrows*FfCurrentRowSize+B_sub_rowsize*sizeof(long))));
++  /*
++  printf("B00\n");
++  WindowShow(B00);
++  printf("B01\n");
++  WindowShow(B01);
++  printf("B10\n");
++  WindowShow(B10);
++  printf("B11\n");
++  WindowShow(B11);
++  */
++  FfSetNoc(dest->Noc);  // since we may multiply into X, the size is not necessarily the same as for B.
++  PTR dest00 = dest_win->ULCorner;
++  PTR dest01 = (PTR)((char*)(dest_win->ULCorner)+B_sub_rowsize*sizeof(long));
++  PTR dest10 = FfGetPtr(dest_win->ULCorner,A_sub_nrows);
++  PTR dest11 = (PTR)((char*)(dest_win->ULCorner)+(A_sub_nrows*FfCurrentRowSize)+B_sub_rowsize*sizeof(long));
++
++  /* Matrix windows containing temporary results */
++  MatrixWindow_t S0[1], S1[1], S2[1], S3[1], T0[1], T1[1], T2[1], T3[1], P0[1], P1[1], P2[1], P3[1], P4[1], P5[1], P6[1], U0[1], U1[1], U2[1], U3[1], U4[1], U5[1], U6[1];
++
++  /* 1.  S2 = A00-A10 in X */
++  S2->Nor = A_sub_nrows;
++  S2->RowSize = A_sub_rowsize;
++  S2->Matrix = X->Matrix;
++  S2->ULCorner = X->ULCorner;
++  WindowDif(S2, A00, A10);
++  /*
++  printf("1.  S2 = A00-A10 in X\n");
++  WindowShow(X);
++  printf("resp.\n");
++  WindowShow(S2);
++  */
++
++  /* 2.  T2 = B11-B01 in Y */
++  T2->Nor = B_sub_nrows;
++  T2->RowSize = B_sub_rowsize;
++  T2->Matrix = Y->Matrix;
++  T2->ULCorner = Y->ULCorner;
++  WindowDif(T2, B11, B01);
++  /*
++  printf("2.  T2 = B11-B01 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 3.  P6 = S2*T2 in dest10 */
++  P6->Nor = A_sub_nrows;
++  P6->RowSize = B_sub_rowsize;
++  P6->Matrix = dest;
++  P6->ULCorner = dest10;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P6, S2, T2)) return 1;
++  /*
++  printf("3.  P6 = S2*T2 in dest10\n");
++  WindowShow(dest_win);
++  */
++
++  /* 4.  S0 = A10+A11 in X */
++  S0->Nor = A_sub_nrows;
++  S0->RowSize = A_sub_rowsize;
++  S0->Matrix = X->Matrix;
++  S0->ULCorner = X->ULCorner;
++  WindowSum(S0, A10, A11);
++  /*
++  printf("4.  S0 = A10+A11 in X\n");
++  WindowShow(X);
++  */
++
++  /* 5.  T0 = B01-B00 in Y */
++  T0->Nor = B_sub_nrows;
++  T0->RowSize = B_sub_rowsize;
++  T0->Matrix = Y->Matrix;
++  T0->ULCorner = Y->ULCorner;
++  WindowDif(T0, B01, B00);
++  /*
++  printf("5.  T0 = B01-B00 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 6.  P4 = S0*T0 in dest11 */
++  P4->Nor = A_sub_nrows;
++  P4->RowSize = B_sub_rowsize;
++  P4->Matrix = dest;
++  P4->ULCorner = dest11;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P4, S0, T0)) return 1;
++  /*
++  printf("6.  P4 = S0*T0 in dest11\n");
++  WindowShow(dest_win);
++  */
++
++  /* 7.  S1 = S0-A00 in X */
++  S1->Nor = A_sub_nrows;
++  S1->RowSize = A_sub_rowsize;
++  S1->Matrix = X->Matrix;
++  S1->ULCorner = X->ULCorner;
++  WindowDif(S1, S0, A00);
++  /*
++  printf("7.  S1 = S0-A00 in X\n");
++  WindowShow(X);
++  */
++
++  /* 8.  T1 = B11-T0 in Y */
++  T1->Nor = B_sub_nrows;
++  T1->RowSize = B_sub_rowsize;
++  T1->Matrix = Y->Matrix;
++  T1->ULCorner = Y->ULCorner;
++  WindowDif(T1, B11, T0);
++  /*
++  printf("8.  T1 = B11-T0 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 9.  P5 = S1*T1 in dest01 */
++  P5->Nor = A_sub_nrows;
++  P5->RowSize = B_sub_rowsize;
++  P5->Matrix = dest;
++  P5->ULCorner = dest01;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P5, S1, T1)) return 1;
++  /*
++  printf("9.  P5 = S1*T1 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*10.  S3 = A01-S1 in X */
++  S3->Nor = A_sub_nrows;
++  S3->RowSize = A_sub_rowsize;
++  S3->Matrix = X->Matrix;
++  S3->ULCorner = X->ULCorner;
++  WindowDif(S3, A01, S1);
++  /*
++  printf("10.  S3 = A01-S1 in X\n");
++  WindowShow(X);
++  */
++
++  /*11.  P2 = S3*B11 in dest00 */
++  P2->Nor = A_sub_nrows;
++  P2->RowSize = B_sub_rowsize;
++  P2->Matrix = dest;
++  P2->ULCorner = dest00;
++  /* That part of dest is still supposed to be empty */
++  if (StrassenStep(P2, S3, B11)) return 1;
++  /*
++  printf("11.  P2 = S3*B11 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*12.  P0 = A00*B00 in X */
++  P0->Nor = A_sub_nrows;
++  P0->RowSize = B_sub_rowsize;
++  P0->Matrix = X->Matrix;
++  P0->ULCorner = X->ULCorner;
++  /*
++     This time, the matrix we write our product to may be non-empty.
++     Hence, we clear the destination first.
++  */
++  WindowClear(P0);
++  if (StrassenStep(P0, A00, B00)) return 1;
++  /*
++  printf("12. P0 = A00*B00 in X\n");
++  WindowShow(X);
++  */
++
++  /*13.  U1 = P0+P5 in dest01 */
++  U1->Nor = A_sub_nrows;
++  U1->RowSize = B_sub_rowsize;
++  U1->Matrix = dest;
++  U1->ULCorner = dest01;
++  WindowSum(U1, P0, P5);
++  /*
++  printf("13.  U1 = P0+P5 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*14.  U2 = U1+P6 in dest10 */
++  U2->Nor = A_sub_nrows;
++  U2->RowSize = B_sub_rowsize;
++  U2->Matrix = dest;
++  U2->ULCorner = dest10;
++  WindowSum(U2, U1, P6);
++  /*
++  printf("14.  U2 = U1+P6 in dest10\n");
++  WindowShow(dest_win);
++  */
++
++  /*15.  U3 = U1+P4 in dest01 */
++  U3->Nor = A_sub_nrows;
++  U3->RowSize = B_sub_rowsize;
++  U3->Matrix = dest;
++  U3->ULCorner = dest01;
++  WindowSum(U3, U1, P4);
++  /*
++  printf("15.  U3 = U1+P4 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*16.  U6 = U2+P4 in dest11 (final) */
++  U6->Nor = A_sub_nrows;
++  U6->RowSize = B_sub_rowsize;
++  U6->Matrix = dest;
++  U6->ULCorner = dest11;
++  WindowSum(U6, U2, P4);
++  /*
++  printf("16.  U6 = U2+P4 in dest11 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*17.  U4 = U3+P2 in dest01 (final) */
++  U4->Nor = A_sub_nrows;
++  U4->RowSize = B_sub_rowsize;
++  U4->Matrix = dest;
++  U4->ULCorner = dest01;
++  WindowSum(U4, U3, P2);
++  /*
++  printf("17.  U4 = U3+P2 in dest01 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*18.  T3 = T1-B10 in Y */
++  T3->Nor = B_sub_nrows;
++  T3->RowSize = B_sub_rowsize;
++  T3->Matrix = Y->Matrix;
++  T3->ULCorner = Y->ULCorner;
++  WindowDif(T3, T1, B10);
++  /*
++  printf("18.  T3 = T1-B10 in Y\n");
++  WindowShow(Y);
++  */
++
++  /*19.  P3 = A11*T3 in dest00 */
++  P3->Nor = A_sub_nrows;
++  P3->RowSize = B_sub_rowsize;
++  P3->Matrix = dest;
++  P3->ULCorner = dest00;
++  /* Meanwhile dest00 is non-empty. Hence, overwrite */
++  WindowClear(P3);
++  if (StrassenStep(P3, A11, T3)) return 1;
++  /*
++  printf("19.  P3 = A11*T3 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*20.  U5 = U2-P3 in dest10 (final) */
++  U5->Nor = A_sub_nrows;
++  U5->RowSize = B_sub_rowsize;
++  U5->Matrix = dest;
++  U5->ULCorner = dest10;
++  WindowDif(U5, U2, P3);
++  /*
++  printf("20.  U5 = U2-P3 in dest10 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*21.  P1 = A01*B10 in dest00 */
++  P1->Nor = A_sub_nrows;
++  P1->RowSize = B_sub_rowsize;
++  P1->Matrix = dest;
++  P1->ULCorner = dest00;
++  /* Again, we need to overwrite */
++  WindowClear(P1);
++  if (StrassenStep(P1, A01, B10)) return 1;
++  /*
++  printf("21.  P1 = A01*B10 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*22.  U0 = P0+P1 in dest00 (final) */
++  U0->Nor = A_sub_nrows;
++  U0->RowSize = B_sub_rowsize;
++  U0->Matrix = dest;
++  U0->ULCorner = dest00;
++  WindowSum(U0, P0, P1);
++  /*
++  printf("22.  U0 = P0+P1 in dest00 (final)\n");
++  WindowShow(dest_win);
++  */
++  WindowFree(X);
++  WindowFree(Y);
++
++  /* ---------------------------------------------------------
++     Deal with the leftovers on the bottom and the right wing
++     --------------------------------------------------------- */
++
++  if (B_rowsize_rem)
++    {
++      MatrixToWindow(B_last_col, B, B_win->Nor, B_rowsize_rem, (PTR)((char*)(B_win->ULCorner) + B_sub_rowsize2b));
++      MatrixToWindow(dest_last_col, dest, A_win->Nor, B_rowsize_rem, (PTR)((char*)(dest_win->ULCorner) + B_sub_rowsize2b));
++      /* that part of dest is still supposed to be empty, so we can add the product */
++      WindowAddMul(dest_last_col, A_win, B_last_col);
++    }
++  if (A_nrows_rem)
++    {
++      FfSetNoc(A->Noc);
++      MatrixToWindow(A_last_row, A, A_nrows_rem, A_win->RowSize, (PTR)((char*)(A_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++      if (B_rowsize_rem) /* We have already considered the lower right corner in the previous if-clause */
++      {
++          MatrixToWindow(B_bulk, B, B_win->Nor, B_sub_rowsize2, B_win->ULCorner);
++          FfSetNoc(dest->Noc);
++          MatrixToWindow(dest_last_row, dest, A_nrows_rem, B_sub_rowsize2, (PTR)((char*)(dest_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++          /* that part of dest is still supposed to be empty, so we can add the product */
++          WindowAddMul(dest_last_row, A_last_row, B_bulk);
++      }
++      else
++      {
++          FfSetNoc(dest->Noc);
++          MatrixToWindow(dest_last_row, dest, A_nrows_rem, B_win->RowSize, (PTR)((char*)(dest_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++          /* that part of dest is still supposed to be empty, so we can add the product */
++          WindowAddMul(dest_last_row, A_last_row, B_win);
++      }
++    }
++  if (A_rowsize_rem)
++    { /* By the above operations, we don't need to consider the lower right corner of either A or B. */
++      MatrixToWindow(A_last_col, A, A_sub_nrows2, A_rowsize_rem, (PTR)((char*)(A_win->ULCorner) + A_sub_rowsize2b));
++      FfSetNoc(B->Noc);
++      MatrixToWindow(B_last_row, B, B_nrows_rem, B_sub_rowsize2, (PTR)((char*)(B_win->ULCorner) + (B_sub_nrows2*FfCurrentRowSize)));
++      FfSetNoc(dest->Noc);
++      MatrixToWindow(dest_bulk, dest, A_sub_nrows2, B_sub_rowsize2, dest_win->ULCorner);
++      /* now we are supposed to add the product to the result obtained so far */
++      WindowAddMul(dest_bulk, A_last_col, B_last_row);
++    }
++  return 0;
++}
++
++/**
++ ** Multiply matrices
++ ** This function multiplies @em A from the right by @em B and writes
++ ** the result into @em dest.
++ ** The matrices must be compatible for multiplication, i.e. they must be over
++ ** the same field, and the number of columns of @em A must be equal to the
++ ** number of rows of @em B.
++ ** Moreover, it is assumed that @em dest is allocated in the right dimensions.
++ ** Since parts of @em dest are used to store temporary results, it is essential
++ ** that @em dest initially is zero!
++ ** @param[out] dest Result.
++ ** @param A Left factor.
++ ** @param B Right factor
++ ** @return The function returns @em dest, or NULL on error.
++ **/
++Matrix_t *MatMulStrassen(Matrix_t *dest, const Matrix_t *A, const Matrix_t *B)
++{
++  FfSetField(A->Field);
++  MatrixWindow_t A_win[1], B_win[1], dest_win[1];
++  FfSetNoc(A->Noc);
++  MatrixToWindow(A_win, A, A->Nor, LPR, A->Data);
++  FfSetNoc(B->Noc);
++  MatrixToWindow(B_win, B, B->Nor, LPR, B->Data);
++  FfSetNoc(dest->Noc);
++  MatrixToWindow(dest_win, dest, A->Nor, LPR, dest->Data);
++  if (StrassenStep(dest_win, A_win, B_win)) return NULL;
++  return dest;
++}
diff --git a/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch b/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch
new file mode 100644
index 00000000000..1959336c3fe
--- /dev/null
+++ b/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch
@@ -0,0 +1,359 @@
+Use Strassen-Winograd multiplication in some MeatAxe functions.
+
+AUTHOR: Simon King 2015-09-18, simon.king@uni-jena.de 
+
+diff --git a/src/chbasis.c b/src/chbasis.c
+index 1ea9c9f..34cf886 100644
+--- a/src/chbasis.c
++++ b/src/chbasis.c
+@@ -9,6 +9,8 @@
+ 
+ 
+ #include "meataxe.h"
++#include <stdlib.h>
++#include <string.h>
+ 
+ MTX_DEFINE_FILE_INFO
+ 
+@@ -36,9 +38,6 @@ MTX_DEFINE_FILE_INFO
+ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 
+ {
+-    Matrix_t *bi;
+-    int i;
+-
+     /* Check arguments
+        --------------- */
+     if (!MrIsValid(rep))
+@@ -46,11 +45,6 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 	MTX_ERROR1("rep: %E",MTX_ERR_BADARG);
+ 	return -1;
+     }
+-    if (!MatIsValid(trans))
+-    {
+-	MTX_ERROR1("trans: %E",MTX_ERR_BADARG);
+-	return -1;
+-    }
+     if (rep->NGen <= 0)
+ 	return 0;
+     if (trans->Field != rep->Gen[0]->Field || 
+@@ -60,54 +54,50 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 	MTX_ERROR1("%E",MTX_ERR_INCOMPAT);
+ 	return -1;
+     }
+-
+-
+-    /* Basis transformation
+-       -------------------- */
+-    if ((bi = MatInverse(trans)) == NULL) 
+-    {
+-	MTX_ERROR("Basis transformation is singular");
+-	return -1;
+-    }
+-    for (i = 0; i < rep->NGen; ++i)
+-    {
+-	Matrix_t *tmp = MatDup(trans);
+-	MatMul(tmp,rep->Gen[i]);
+-	MatMul(tmp,bi);
+-        MatFree(rep->Gen[i]);
+-	rep->Gen[i] = tmp;
+-    }
+-    MatFree(bi);
+-    return 0;
++    return ChangeBasis(trans, rep->NGen, (const Matrix_t **)(rep->Gen), rep->Gen);
+ }
+ 
+ 
+-
+-int ChangeBasisOLD(const Matrix_t *M, int ngen, const Matrix_t *gen[],
++/** Conjugate a list @em gen of @em ngen square matrices over the same
++ *  field and of the same dimensions by a mattrix @em trans
++ *  and write the result into @em newgen. If @em gen == @em newgen, then
++ *  the previous content of @em newgen will be overridden. **/
++int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[])
+ 
+ {
+-    Matrix_t *bi, *tmp;
++    Matrix_t *bi;
+     int i;
+ 
+     MTX_VERIFY(ngen >= 0);
+-    if (!MatIsValid(M))
++    if (!MatIsValid(trans))
++    {
++	MTX_ERROR1("trans: %E",MTX_ERR_BADARG);
+ 	return -1;
+-    if ((bi = MatInverse(M)) == NULL) 
++    }
++
++    if ((bi = MatInverse(trans)) == NULL)
+     {
+-	MTX_ERROR("Matrix is singular");
++	MTX_ERROR("Basis transformation is singular");
+ 	return -1;
+     }
++
++    Matrix_t *tmp = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++    size_t tmpsize = FfCurrentRowSize*trans->Nor;
+     for (i = 0; i < ngen; ++i)
+     {
+-	tmp = MatDup(M);
+-	MatMul(tmp,gen[i]);
+-	MatMul(tmp,bi);
+-	if ((const Matrix_t **)newgen == gen)
+-	    MatFree(newgen[i]);
+-	newgen[i] = tmp;
++        MTX_VERIFY(gen[i]->Nor==trans->Nor);
++        MTX_VERIFY(gen[i]->Noc==trans->Noc);
++        memset(tmp->Data, FF_ZERO, tmpsize);
++        MatMulStrassen(tmp, trans, gen[i]);
++        if ((const Matrix_t **)newgen == gen)
++            memset(newgen[i]->Data, FF_ZERO, tmpsize);
++        else
++            newgen[i] = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++        MatMulStrassen(newgen[i], tmp, bi);
+     }
+     MatFree(bi);
++    MatFree(tmp);
+     return 0;
+ }
+ 
+diff --git a/src/chop.c b/src/chop.c
+index 65a2a98..0f3f38f 100644
+--- a/src/chop.c
++++ b/src/chop.c
+@@ -538,7 +538,7 @@ static int checkspl(const MatRep_t *rep, Matrix_t *nsp)
+        ------------------------------------------------------------ */
+     sb1 = SpinUp(nsp,rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+     MTX_VERIFY(sb1 != NULL && sb1->Nor == sb1->Noc);
+-    ChangeBasisOLD(sb1,LI.NGen,(const Matrix_t **)rep->Gen,g1);
++    ChangeBasis(sb1,LI.NGen,(const Matrix_t **)rep->Gen,g1);
+     endo = MrAlloc(0,NULL,0);
+ 
+     sb2 = NULL;	/* Mark as unused */
+@@ -576,7 +576,7 @@ static int checkspl(const MatRep_t *rep, Matrix_t *nsp)
+ 	sb2 = SpinUp(v2,rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+ 	MTX_VERIFY(sb2 != NULL && sb2->Nor == sb2->Noc);
+ 	MatFree(v2);
+-	ChangeBasisOLD(sb2,rep->NGen,(const Matrix_t **)rep->Gen,g2);
++	ChangeBasis(sb2,rep->NGen,(const Matrix_t **)rep->Gen,g2);
+ 
+ 	/* Compare the two representations. If they are different,
+ 	   we know that the splitting field degree must be smaller
+@@ -762,7 +762,7 @@ static void newirred(node_t *n)
+     LI.Cf[i].spl = n->spl = n->nsp->Nor;
+     b = SpinUp(n->nsp,n->Rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+     MTX_VERIFY(b != NULL && b->Nor == b->Noc);
+-    ChangeBasisOLD(b,LI.NGen,(const Matrix_t **)n->Rep->Gen,n->Rep->Gen);
++    ChangeBasis(b,LI.NGen,(const Matrix_t **)n->Rep->Gen,n->Rep->Gen);
+     MatFree(b);
+ 
+     /* Write out the generators
+diff --git a/src/homcomp.c b/src/homcomp.c
+index a808089..351af2b 100644
+--- a/src/homcomp.c
++++ b/src/homcomp.c
+@@ -112,10 +112,10 @@ Matrix_t *HomogeneousPart(MatRep_t *m, MatRep_t *s, Matrix_t *npw,
+ 	{
+ 	    PTR matptr = MatGetPtr(A,j);
+ 	    int u;
+-	    a = MatDup(V[j]);
+-	    b = MatDup(s->Gen[i]);
+-	    MatMul(a,m->Gen[i]);		/* the equations that describe  */
+-	    MatMul(b,V[j]);			/* that a vector in the null-   */
++	    a = MatAlloc(V[j]->Field, V[j]->Nor, m->Gen[i]->Noc);
++	    b = MatAlloc(s->Gen[i]->Field, s->Gen[i]->Nor, V[j]->Noc);
++	    MatMulStrassen(a, V[j], m->Gen[i]);		/* the equations that describe  */
++	    MatMulStrassen(b,s->Gen[i], V[j]);			/* that a vector in the null-   */
+ 	    MatMulScalar(b,FfNeg(FF_ONE));	/* space is the first element   */
+ 	    MatAdd(a, b);			/* of a standard basis of a     */ 
+ 					/* module isomorphic to S       */
+diff --git a/src/isisom.c b/src/isisom.c
+index 790d2b0..e2b7f07 100644
+--- a/src/isisom.c
++++ b/src/isisom.c
+@@ -9,7 +9,7 @@
+ 
+ 
+ #include "meataxe.h"
+-
++#include <string.h>
+ 
+ MTX_DEFINE_FILE_INFO
+ 
+@@ -114,7 +114,7 @@ int IsIsomorphic(const MatRep_t *rep1, const CfInfo *info1,
+ {
+     int j;
+     WgData_t *wg;
+-    Matrix_t  *word, *m, *seed, *b, *bi;
++    Matrix_t  *word, *m, *seed, *b, *g1, *g2;
+     int result;
+ 
+     if (CheckArgs(rep1->NGen,rep1->Gen,info1,rep2->Gen,use_pw) != 0)
+@@ -148,27 +148,35 @@ int IsIsomorphic(const MatRep_t *rep1, const CfInfo *info1,
+ 	MatFree(b);
+ 	return 0;
+     }
+-    bi = MatInverse(b);
+ 
+     /* Compare generators
+        ------------------ */
++    /**
++     * We test whether b*rep2_j*b^-1 == rep1_j
++     * by testing whether b*rep2_j == rep1_j*b
++     * */
++    g1 = MatAlloc(b->Field, b->Nor, b->Noc);
++    g2 = MatAlloc(b->Field, b->Nor, b->Noc);
++    size_t memsize = FfCurrentRowSize*b->Nor;
+     for (j = 0, result = 0; result == 0 && j < rep2->NGen; ++j)
+     {
+-	Matrix_t *g = MatDup(b);
+-	MatMul(g,rep2->Gen[j]);
+-	MatMul(g,bi);
+-	if (MatCompare(g,rep1->Gen[j]) != 0)
+-	    result = 1;
+-	MatFree(g);
++	MatMulStrassen(g2, b, rep2->Gen[j]);
++	MatMulStrassen(g1, rep1->Gen[j], b);
++	if (MatCompare(g1, g2) != 0)
++	    {   result = 1;
++            break;
++        }
++	memset(g1->Data, FF_ZERO, memsize);
++    memset(g2->Data, FF_ZERO, memsize);
+     }
+ 
+     /* Clean up 
+        -------- */
+     if (trans != NULL && result == 0)
+-	*trans = b;
++        *trans = b;
+     else
+-	MatFree(b);
+-    MatFree(bi);
+-
++        MatFree(b);
++    MatFree(g1);
++    MatFree(g2);
+     return (result == 0);
+ }
+diff --git a/src/meataxe.h b/src/meataxe.h
+index e2f5a84..5123f1c 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -1096,11 +1096,7 @@ int LdFree(LdLattice_t *l);
+ int LdAddIncidence(LdLattice_t *lat, int sub, int sup);
+ int LdSetPositions(LdLattice_t *l);
+ 
+-
+-
+-
+-/* OLD STUFF */
+-int ChangeBasisOLD(const Matrix_t *M, int ngen, const Matrix_t *gen[],
++int ChangeBasis(const Matrix_t *M, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[]);
+ 
+ 
+diff --git a/src/mktree.c b/src/mktree.c
+index ede7881..3e99489 100644
+--- a/src/mktree.c
++++ b/src/mktree.c
+@@ -213,8 +213,8 @@ static int MakeTree()
+ 	{
+ 	    /* Calculate next element
+ 	       ---------------------- */
+-	    Matrix_t *newelem = MatDup(Elms[src].Matrix);
+-	    MatMul(newelem,Rep->Gen[g]);
++	    Matrix_t *newelem = MatAlloc(Elms[src].Matrix->Field, Elms[src].Matrix->Nor, Rep->Gen[g]->Noc);
++	    MatMulStrassen(newelem, Elms[src].Matrix, Rep->Gen[g]);
+ 
+ 	    /* If it is new, add to tree, else discard
+ 	       --------------------------------------- */
+diff --git a/src/precond.c b/src/precond.c
+index f144716..efc2f3d 100644
+--- a/src/precond.c
++++ b/src/precond.c
+@@ -391,8 +391,8 @@ static void MakePQ(int n, int mj, int nj)
+ 	for (k = 0; k < spl; ++k)
+ 	{
+ 	    FEL f;
+-	    Matrix_t *x = MatDup(endo[i]);  
+-	    MatMul(x,endo[k]);
++	    Matrix_t *x = MatAlloc(endo[i]->Field, endo[i]->Nor, endo[k]->Noc);  
++	    MatMulStrassen(x,endo[i],endo[k]);
+ 	    f = MatTrace(x);
+ 	    FfInsert(pptr,k,f);
+ 	    MatFree(x);
+diff --git a/src/pseudochop.c b/src/pseudochop.c
+index 68cadae..3f1fa97 100644
+--- a/src/pseudochop.c
++++ b/src/pseudochop.c
+@@ -105,8 +105,8 @@ int main(int argc, const char *argv[])
+         {
+ 	    Matrix_t *newmat;
+             oldnul = newnul;
+-            newmat = MatDup(old);
+-            MatMul(newmat,old);
++            newmat = MatAlloc(old->Field, old->Nor, old->Noc);
++            MatMulStrassen(newmat, old, old);
+             MatFree(old);
+             MatFree(nulsp);
+             old = MatDup(newmat);
+diff --git a/src/pwkond.c b/src/pwkond.c
+index c14c20e..5eaa5de 100644
+--- a/src/pwkond.c
++++ b/src/pwkond.c
+@@ -309,8 +309,8 @@ static void gkond(const Lat_Info *li, int i, Matrix_t *b, Matrix_t *k,
+     char fn[LAT_MAXBASENAME+10];
+     Matrix_t *x1, *x2;
+ 
+-    x1 = MatDup(k);
+-    MatMul(x1,w);
++    x1 = MatAlloc(k->Field, k->Nor, w->Noc);
++    MatMulStrassen(x1, k, w);
+     x2 = QProjection(b,x1);
+     sprintf(fn,"%s%s.%s",li->BaseName,Lat_CfName(li,i),name);
+     MatSave(x2,fn);
+@@ -340,7 +340,7 @@ static void Standardize(int cf)
+     MESSAGE(0,("  Transforming to standard basis\n"));
+     sb = SpinUp(CfList[cf].PWNullSpace,CfList[cf].Gen,
+ 	SF_FIRST|SF_CYCLIC|SF_STD,&script,NULL);
+-    ChangeBasisOLD(sb,CfList[cf].Gen->NGen,
++    ChangeBasis(sb,CfList[cf].Gen->NGen,
+ 	(const Matrix_t **)CfList[cf].Gen->Gen,std);
+     MatFree(sb);
+ 
+@@ -782,7 +782,7 @@ static int try2(long w, FEL f)
+ 	    	MESSAGE(3,("failed\n"));
+ 		return -1;  /* Nullity should be 0 */
+ 	    }
+-	    nul = MatNullity__(MatMul(MatDup(word),word));
++	    nul = MatNullity__(MatMulStrassen(MatAlloc(word->Field, word->Nor, word->Noc), word, word));
+ 	    if (nul != CfList[i].Info->spl)
+ 	    {
+ 		MatFree(word);
+@@ -915,7 +915,7 @@ static int try_p(long w)
+ 	       /* Check if the nullity is stable
+ 	          ------------------------------ */
+ 	       wp = MatInsert(word,mp->Factor[k]);
+-	       wp2 = MatMul(MatDup(wp),wp);
++	       wp2 = MatMulStrassen(MatAlloc(wp->Field, wp->Nor, wp->Noc), wp, wp);
+ 	       MatFree(wp);
+ 	       nul = MatNullity__(wp2);
+ 	       if (nul != CfList[i].Info->spl) 
+diff --git a/src/soc.c b/src/soc.c
+index 789a02b..199a2e0 100644
+--- a/src/soc.c
++++ b/src/soc.c
+@@ -294,8 +294,8 @@ static int NextLayer()
+ 	Matrix_t *mat, *stgen;
+ 
+ 	mat = MatCutRows(basis,basis->Nor - Dimension,Dimension);
+-	stgen = MatDup(bas);
+-	MatMul(stgen, mat);
++	stgen = MatAlloc(bas->Field, bas->Nor, mat->Noc);
++	MatMulStrassen(stgen, bas, mat);
+ 	MatCopyRegion(basis,basis->Nor - Dimension,0,stgen,0,0,Dimension,-1);
+ 	MatFree(mat);
+ 	MatFree(stgen);

From 9e2a8c6027a1034ec21ea52dd2881965c05292d3 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 13 Sep 2015 10:56:13 +0200
Subject: [PATCH 03/23] A very basic MeatAxe Cython wrapper

---
 src/module_list.py                    |    5 +
 src/sage/libs/meataxe.pxd             |  128 +++
 src/sage/matrix/matrix_gfpn_dense.pxd |   31 +
 src/sage/matrix/matrix_gfpn_dense.pyx | 1101 +++++++++++++++++++++++++
 src/sage/matrix/matrix_space.py       |   30 +-
 5 files changed, 1286 insertions(+), 9 deletions(-)
 create mode 100644 src/sage/libs/meataxe.pxd
 create mode 100644 src/sage/matrix/matrix_gfpn_dense.pxd
 create mode 100644 src/sage/matrix/matrix_gfpn_dense.pyx

diff --git a/src/module_list.py b/src/module_list.py
index ed27e14a07b..d616a08c162 100644
--- a/src/module_list.py
+++ b/src/module_list.py
@@ -949,6 +949,11 @@ def uname_specific(name, value, alternative):
     Extension('sage.matrix.matrix_window',
               sources = ['sage/matrix/matrix_window.pyx']),
 
+    OptionalExtension("sage.matrix.matrix_gfpn_dense",
+              sources = ['sage/matrix/matrix_gfpn_dense.pyx'],
+              libraries = ['mtx'],
+              package = 'meataxe'),
+
     Extension('sage.matrix.misc',
               sources = ['sage/matrix/misc.pyx'],
               libraries=['mpfr']),
diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
new file mode 100644
index 00000000000..2b413533c4a
--- /dev/null
+++ b/src/sage/libs/meataxe.pxd
@@ -0,0 +1,128 @@
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+#
+# Import SOME features from meataxe.h
+# (most types are not needed, but listed here
+# in the comments, for completeness)
+#
+cdef extern from "meataxe.h":
+    # general ctype emulations
+    # ctypedef int size_t   # size_t should be a standard type!
+    ctypedef unsigned long Ulong
+    ctypedef unsigned short Ushort
+    ctypedef unsigned char Uchar
+    ctypedef unsigned char FEL
+    ctypedef FEL *PTR
+
+    # global constants
+    cdef extern int FfOrder             # Current field order
+    cdef extern int FfChar              # Current characteristic
+    cdef extern FEL FfGen               # Generator
+    cdef extern int FfNoc               # Number of columns for row ops
+    cdef extern size_t FfCurrentRowSize # The byte size of a single row in memory,
+                                        # always a multiple of sizeof(long)
+    cdef extern size_t FfCurrentRowSizeIo # The number of bytes actually used in a row.
+    cdef extern char MtxLibDir[250]     # Where to search/create multiplication tables
+
+    # we only wrap MeatAxe for small fields (size < 255)
+    cdef extern FEL mtx_tmult[256][256]
+    cdef extern FEL mtx_tadd[256][256]
+    cdef extern FEL mtx_taddinv[256]
+    cdef extern FEL mtx_tmultinv[256]
+    cdef extern FEL mtx_tinsert[8][256]
+    cdef extern FEL mtx_textract[8][256]
+    cdef extern FEL FF_ONE, FF_ZERO
+
+#########################################
+# function prototypes
+    ## global parameters
+    size_t FfRowSize(int noc)
+    size_t FfTrueRowSize(int noc) # Difference to FfRowSize: Doesn't count padding bytes
+    int FfSetField(int field)
+    int FfSetNoc(int ncols)
+
+    ## Finite Fields
+    # FEL FfAdd(FEL a,FEL b)
+    # FEL FfSub(FEL a, FEL b)
+    # FEL FfNeg(FEL a)
+    # FEL FfMul(FEL a, FEL b)
+    # FEL FfDiv(FEL a, FEL b)
+    # FEL FfInv(FEL a)
+    # FEL FfEmbed(FEL a, int subfield)
+    # FEL FfRestrict(FEL a, int subfield)
+    FEL FfFromInt(int l)
+    int FfToInt(FEL f)
+
+    ## Rows
+    void FfMulRow(PTR row, FEL mark)
+    # void FfAddMulRow(PTR dest, PTR src, FEL f)
+    PTR FfAddRow(PTR dest, PTR src)
+    FEL FfExtract(PTR row, int col)
+    void FfInsert(PTR row, int col, FEL mark)
+    int FfFindPivot(PTR row, FEL *mark)
+    # FEL FfScalarProduct(PTR a, PTR b)
+    # void FfSwapRows(PTR dest, PTR src)
+    # void FfPermRow(PTR row, long *perm, PTR result)
+    # int FfCmpRows(PTR p1, PTR p2)
+
+    ## multiple rows
+    PTR FfAlloc(int nor)
+    void FfExtractColumn(PTR mat,int nor,int col,PTR result)
+    int FfStepPtr(PTR *x)  # Advance to next row
+    PTR FfGetPtr(PTR base, int row)  # Advance to "row" rows after base
+    void FfInsert(PTR row, int col, FEL mark)
+    void FfMapRow(PTR row, PTR matrix, int nor, PTR result)
+
+    ############
+    ## Skip: Application, error handling, i/o
+
+    ############
+    ## Matrices
+    ############
+    ctypedef struct Matrix_t:
+        unsigned long Magic         #/* Used internally */
+        int Field, Nor, Noc     #/* Field, #rows, #columns */
+        PTR Data            #/* Pointer to data area */
+        int RowSize                     # Size (in bytes) of one row
+        int *PivotTable                 # Pivot table (if matrix is in echelon form
+    ## Basic memory operations
+    Matrix_t *MatAlloc(int field, int nor, int noc)
+    int MatFree(Matrix_t *mat)
+    PTR MatGetPtr(Matrix_t *mat, int row)
+    int MatCompare(Matrix_t *a, Matrix_t *b)
+    # int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols)
+    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols)
+    # Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows)
+    Matrix_t *MatDup(Matrix_t *src)
+    Matrix_t *MatId(int fl, int nor)
+    Matrix_t *MatLoad(char *fn)
+    int MatSave(Matrix_t *mat, char *fn)
+
+
+    ## Basic Arithmetic  ## general rule: dest is changed, src/mat are unchanged!
+    Matrix_t *MatTransposed(Matrix_t *src)
+    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src)
+    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff)
+    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src)
+    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
+    Matrix_t *MatPower(Matrix_t *mat, long n)
+    FEL MatTrace(Matrix_t *mat)
+    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B)
+    void StrassenSetCutoff(size_t size)
+
+    ## "Higher" Arithmetic ## all arguments are unchanged
+    # int MatClean(Matrix_t *mat, Matrix_t *sub)
+    int MatEchelonize(Matrix_t *mat)
+    int MatOrder(Matrix_t *mat)
+    long MatNullity(Matrix_t *mat)
+    Matrix_t *MatInverse(Matrix_t *src)
+    Matrix_t *MatNullSpace(Matrix_t *mat)
+# thats's all of meataxe.h !
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
new file mode 100644
index 00000000000..61025adf6a3
--- /dev/null
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -0,0 +1,31 @@
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+cdef class FieldConverter_class:
+    cdef object field  # that's a function converting an int to a field element
+    cdef object int_to_field(self, int x)
+    cdef int field_to_int(self, x)
+
+from sage.matrix.matrix_dense cimport Matrix_dense
+from sage.structure.element cimport Matrix
+from sage.libs.meataxe cimport *
+
+cdef class Matrix_gfpn_dense(Matrix_dense):
+    cdef Matrix_t *Data
+    cdef FieldConverter_class _converter
+    #cpdef Matrix_gfpn_dense normalized(Matrix_gfpn_dense self)
+    #cpdef Matrix_gfpn_dense semi_echelon(Matrix_gfpn_dense self)
+    #cpdef int nullity(Matrix_gfpn_dense self)
+    #cpdef tuple lead(self)
+    cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
+    cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
+    cdef Matrix _matrix_times_matrix_(self, Matrix right)
+    cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right)
+    cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=*)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
new file mode 100644
index 00000000000..6021c554828
--- /dev/null
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -0,0 +1,1101 @@
+r"""
+Dense Matrices over `\mathbb F_q`, with `q<255` odd and not prime
+
+This module is a wrapper for version 2.4.24 of the Aachen
+`C-MeatAxe <http://www.math.rwth-aachen.de/homes/MTX/download.html>`_,
+improved by an implementation of the Winograd-Strassen multiplication
+algorithm. It provides matrices over the finite field `\mathbb F_q`,
+where `q\le 255` is odd and not prime.
+
+AUTHORS:
+
+- Simon King (2015-09-18): initial version
+
+"""
+
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+
+## Define an environment variable that enables MeatAxe to find
+## its multiplication tables.
+
+from sage.env import DOT_SAGE
+import os
+cdef extern from "Python.h":
+    object PyString_FromStringAndSize(char *s, Py_ssize_t len)
+    char* PyString_AsString(object string)
+MtxLibDir = PyString_AsString(os.path.join(DOT_SAGE,'meataxe'))
+
+####################
+#
+# import sage types
+#
+####################
+
+from sage.rings.integer import Integer
+from sage.rings.finite_rings.constructor import GF
+from sage.rings.finite_rings.integer_mod import IntegerMod_int
+from sage.matrix.constructor import random_matrix
+from sage.rings.arith import is_prime_power, factor
+from sage.matrix.matrix_space import MatrixSpace
+from sage.misc.randstate import current_randstate
+from sage.misc.cachefunc import cached_method, cached_function
+from sage.structure.element cimport Element, ModuleElement, RingElement, Matrix
+
+include 'sage/ext/stdsage.pxi'
+
+####################
+#
+# auxiliary functions
+#
+####################
+import sys
+from libc.string cimport memcpy
+
+cdef inline int setfield(long n) except -1:
+    # This is a wrapper around FfSetField, but
+    # we guard it against MTX_Error, which would immediately
+    # crash the Sage session.
+    if n == FfOrder:
+        return 0
+    if not (0 < n < 255 and is_prime_power(n)):
+        raise ValueError("Only finite fields of order at most 255 are supported")
+    return FfSetField(n)
+
+# Fast conversion from field to int and int to field
+cdef class FieldConverter_class:
+    """
+    An auxiliary class, used to convert between <int> and finite field element
+
+    This class is for non-prime fields only. The method
+    :meth:`int_to_field` exists for speed. The method
+    :meth:`field_to_int` exists in order to have a common interface
+    for elements of prime and non-prime fields; see
+    :class:`PrimeFieldConverter_class`.
+
+    EXAMPLE::
+
+        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+        sage: F.<y> = GF(125)
+        sage: C = FieldConverter_class(F)
+        sage: C.int_to_field(15)
+        3*y
+        sage: F.fetch_int(15)
+        3*y
+        sage: %timeit C.int_to_field(15)    #not tested
+        625 loops, best of 3: 1.04 µs per loop
+        sage: %timeit F.fetch_int(15)       #not tested
+        625 loops, best of 3: 3.97 µs per loop
+        sage: C.field_to_int(y)
+        5
+        sage: y.integer_representation()
+        5
+
+    """
+    def __init__(self, field):
+        """
+        INPUT:
+
+        A finite *non-prime* field. This assumption is not tested.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.int_to_field(15)
+            3*y
+            sage: F.fetch_int(15)
+            3*y
+            sage: C.field_to_int(y)
+            5
+            sage: y.integer_representation()
+            5
+
+        """
+        self.field = field._cache.fetch_int
+    cdef object int_to_field(self, int x):
+        """
+        Fetch a python int into the field.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.int_to_field(15)
+            3*y
+            sage: F.fetch_int(15)
+            3*y
+
+        """
+        return self.field(x)
+    cdef int field_to_int(self, x):
+        """
+        Represent a field element by a python int.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.field_to_int(y)
+            5
+            sage: y.integer_representation()
+            5
+
+        """
+        return x.integer_representation()
+
+cdef class PrimeFieldConverter_class(FieldConverter_class):
+    """
+    An auxiliary class, used to convert between <int> and finite field element
+
+    This class is for prime fields only. The methods
+    :meth:`int_to_field` and :meth:`field_to_int` exist in order to
+    have a common interface for elements of prime and non-prime fields;
+    see :class:`FieldConverter_class`.
+
+    EXAMPLE::
+
+        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+        sage: F = GF(5)
+        sage: C = PrimeFieldConverter_class(F)
+        sage: C.int_to_field(int(2))
+        2
+        sage: F(2)
+        2
+        sage: C.field_to_int(F(2))
+        2
+        sage: int(F(2))
+        2
+
+    """
+    def __init__(self, field):
+        """
+        INPUT:
+
+        A finite *prime* field. This assumption is not tested.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.int_to_field(int(2))
+            2
+            sage: F(2)
+            2
+            sage: C.field_to_int(F(2))
+            2
+            sage: int(F(2))
+            2
+
+        """
+        self.field = field
+    cdef object int_to_field(self, int x):
+        """
+        Fetch a python int into the field.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.int_to_field(int(2))
+            2
+            sage: F(2)
+            2
+
+        """
+        return IntegerMod_int(self.field, x)
+    cdef int field_to_int(self, x):
+        """
+        Represent a field element by a python int.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.field_to_int(F(2))
+            2
+            sage: int(F(2))
+            2
+
+        """
+        return int(x)
+
+cdef dict _converter_cache = {}
+cdef FieldConverter_class FieldConverter(field):
+    """
+    Return a :class:`FieldConverter_class` or :class:`PrimeFieldConverter_class` instance,
+    depending whether the field is prime or not.
+
+    EXAMPLE::
+
+        sage: MS = MatrixSpace(GF(5^3,'y'),2)
+        sage: A = MS.random_element()
+        sage: A*2 == A+A    # indirect doctest
+        True
+        sage: A = MS.random_element()
+        sage: A*2 == A+A
+        True
+
+    """
+    try:
+        return _converter_cache[field]
+    except KeyError:
+        if field.is_prime_field():
+            return _converter_cache.setdefault(field, PrimeFieldConverter_class(field))
+        return _converter_cache.setdefault(field, FieldConverter_class(field))
+
+
+
+cdef class Matrix_gfpn_dense(Matrix_dense):
+    r"""
+    Dense matrices over `\mathbb F_q`, `q<255` odd and not prime.
+
+    NOTE:
+
+    This class uses a major modification of the Aachen C-MeatAxe
+    as backend. In principle, it would also work for prime fields
+    and in characteristic two. However, other matrices in Sage,
+    relying on linbox, m4ri or m4rie, are more efficient in these
+    cases.
+
+    EXAMPLES::
+
+        sage: M = MatrixSpace(GF(25,'z'),2,3)([1,2,3,4,5,6])
+        sage: print M
+        [1 2 3]
+        [4 0 1]
+        sage: type(M)
+        <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+
+    The documentation of the ``__init__`` methods shows further
+    ways of creating a :class:`Matrix_gfpn_dense` instance.
+    However, these should only be of internal use.
+
+    """
+##################
+## Init, Dealloc, Copy
+    def __cinit__(self, parent=None, entries=None, *args, **kwds):
+        """
+        TESTS::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # indirect doctest
+            []
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)
+            [0 0 0 0]
+            [0 0 0 0]
+            [0 0 0 0]
+            [0 0 0 0]
+
+        """
+        if parent is None:  # this makes Matrix_gfpn_dense.__new__(Matrix_gfpn_dense) work,
+                            # returning a non-initialised matrix
+            return
+        if isinstance(parent, basestring): # this allows to provide a file when initialising a matrix
+            return
+        cdef int f = parent.base_ring().order()
+        cdef int nrows = parent.nrows()
+        cdef int ncols = parent.ncols()
+        self.Data = MatAlloc(f, nrows, ncols)
+
+    def __init__(self, parent, data=None, mutable=True, copy=False, coerce=False):
+        """
+        Matrix extension class using libmeataxe as backend
+
+        INPUT:
+
+        Instances of this class can be created by providing one of
+        the following input data, where ``q<255`` is a prime power,
+        ``m,n`` are non-negative integers, and `a_{11},...,a_{mn}`
+        can be coerced into ``GF(q)``. Note that a user should
+        create these instances via the matrix constructors; what
+        we explain here is for internal use only!
+
+        - None => empty matrix over an unspecified field (used for unpickling)
+        - a string ``f`` ==> load matrix from the file named ``f``
+        - A matrix space of `m\\times n` matrices over GF(q) and either
+
+          - a list `[a_{11},a_{12},...,a_{1n},a_{21},...,a_{m1},...,a_{mn}]`,
+            which results in a matrix with the given marks
+          - ``None``, which is the fastest way to creata a zero matrix.
+          - an element of GF(q), which results in a diagonal matrix with the
+            given element on the diagonal.
+
+        If the optional parameter ``mutable`` is ``False`` (by default,
+        it is ``True``), the resulting matrix can not be changed, and
+        it can be used as key in a Python dictionary.
+
+        The arguments ``copy`` and ``coerce`` are ignored, they are only
+        here for a common interface with :class:`~sage.matrix.matrix.Matrix`.
+
+        EXAMPLES::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+
+        1. Creating an empty matrix::
+
+            sage: Matrix_gfpn_dense(None)
+            []
+
+        2. Creating a zero (3x2)-matrix::
+
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))
+            [0 0]
+            [0 0]
+            [0 0]
+
+        3. Creating a matrix from a list or list of lists::
+
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])
+            [1 2 3]
+            [4 0 1]
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])  # indirect doctest
+            [1 2 3]
+            [4 0 1]
+
+        4. Creating a diagonal matrix::
+
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M
+            [2 0 0 0 0]
+            [0 2 0 0 0]
+            [0 0 2 0 0]
+            [0 0 0 2 0]
+            [0 0 0 0 2]
+
+        5. Creating a matrix from a file in MeatAxe format.
+
+           First, we have to create that file; we use a temporary file,
+           that will be removed when leaving Sage. Note that the method
+           :meth:`msave` must be used, which does not use Python pickling
+           but relies on the intrinsic C--MeatAxe way of saving.
+           ::
+
+            sage: f = tmp_filename()
+            sage: M.msave(f)
+            sage: Matrix_gfpn_dense(f)
+            [2 0 0 0 0]
+            [0 2 0 0 0]
+            [0 0 2 0 0]
+            [0 0 0 2 0]
+            [0 0 0 0 2]
+
+        TESTS::
+
+            sage: MS = MatrixSpace(GF(125,'y'),2)
+            sage: A = MS(0)
+            sage: A.left_kernel()
+            Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
+            Basis matrix:
+            [1 0]
+            [0 1]
+            sage: A.right_kernel()
+            Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
+            Basis matrix:
+            [1 0]
+            [0 1]
+
+        """
+        if parent is None:
+            self._is_immutable = False
+            self._ncols = 0
+            self._nrows = 0
+            self._cache = {}
+            return
+        if isinstance(parent, basestring): # load from file
+            FILE = os.path.realpath(parent)
+            try:
+                fsock = open(FILE,"rb",0)
+                fsock.close()
+            except (OSError,IOError):
+                return
+            self.Data = MatLoad(FILE)
+            if FfSetField(self.Data.Field):
+                raise ValueError("Invalid data in file {}".format(FILE))
+            B = GF(self.Data.Field, 'z')
+            parent = MatrixSpace(B, self.Data.Nor, self.Data.Noc)
+            self._is_immutable = False
+            self._parent = parent
+            self._base_ring = B
+            self._converter = FieldConverter(B)
+            self._ncols = self.Data.Noc
+            self._nrows = self.Data.Nor
+            self._cache = {}
+            return
+
+        if not self.Data: # should have been initialised by __cinit__
+            raise MemoryError, "Error allocating memory for MeatAxe matrix"
+        Matrix_dense.__init__(self, parent)
+        self._is_immutable = not mutable
+        B = self._base_ring
+        self._converter = FieldConverter(B)
+        if data is None:
+            return
+
+        cdef int i,j
+        cdef FEL f
+        cdef PTR x
+        if not isinstance(data,list):
+            if not data:
+                return
+            if self._nrows != self._ncols:
+                raise ValueError("Cannot initialise non-square matrix from {}".format(data))
+            f = FfFromInt(self._converter.field_to_int(self._coerce_element(data)))
+            x = self.Data.Data
+            for j from 0 <= j < self.Data.Noc:
+                FfInsert(x,j,f)
+                FfStepPtr(&x)
+            return
+
+        x = self.Data.Data
+        cdef int nr = self.Data.Nor
+        cdef int nc = self.Data.Noc
+        assert self._ncols == nc
+        assert self._nrows == nr
+        if nr==0 or nc==0:
+            return
+        if len(data)<nr:
+            raise ValueError, "Expected a list of size at least the number of rows"
+        cdef list dt, dt_i
+        FfSetField(self.Data.Field)
+        FfSetNoc(nc)
+        if isinstance(data[0],list):
+            # The matrix is given by a list of rows
+            dt = data
+            for i from 0 <= i < nr:
+                idx = 0
+                dt_i = dt[i]
+                for j from 0 <= j < nc:
+                    FfInsert(x, j, FfFromInt(self._converter.field_to_int(self._coerce_element(dt_i[j]))))
+                FfStepPtr(&(x))
+        else:
+            # It is supposed to be a flat list of all entries, sorted by rows
+            dtnext = data.__iter__().next
+            for i from 0 <= i < nr:
+                for j from 0 <= j < nc:
+                    bla = self._converter.field_to_int(self._coerce_element(dtnext()))
+                    FfInsert(x, j, FfFromInt(bla))
+                FfStepPtr(&(x))
+
+    def rowsize(self):
+        return self.Data.RowSize
+
+    def __dealloc__(self):
+        if self.Data != NULL:
+            MatFree(self.Data)
+            self.Data = NULL
+
+    def __copy__(self):
+        """
+        Return a copy of this matrix.
+
+        EXAMPLES::
+
+            sage: M=MatrixSpace(GF(25,'x')([20*[0],20*[0],[1]+19*[0]])
+            sage: N=copy(M)
+            sage: print N
+            [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            sage: N==M
+            True
+            sage: N is M
+            False
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: M=Matrix_gfpn_dense('')
+            sage: N=copy(M)
+            sage: N
+            Empty MTX matrix
+            sage: N==M
+            True
+            sage: N is M
+            False
+        """
+        cdef Matrix_gfpn_dense retval = type(self).__new__(type(self))
+        # Do the initialisation "manually"
+        retval._is_immutable = False  # a copy of a matrix is mutable!
+        retval._parent = self._parent
+        retval._base_ring = self._base_ring
+        retval._converter = self._converter
+        retval._ncols = self._ncols
+        retval._nrows = self._nrows
+        retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
+        if self.Data:
+            retval.Data = MatDup(self.Data)
+            if not retval.Data:
+                raise MemoryError, "Error copying a %s instance"%repr(type(self))
+        else:
+            retval.Data = NULL
+        return retval
+
+    ##########################
+    ## Saving should be done via pickling
+    ## However, we keep a method that relies on MeatAxe matsave:
+    def msave(self,f):
+        """
+        M.msave('filename') ==> save matrix into file <filename>
+
+        It can be reloaded with ``Matrix_gfpn_dense('filename')``.
+        """
+        MatSave(self.Data,f)
+
+    ## Pickling and string representation is taken care of by implementing get_unsafe
+    cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
+        """
+        Get an element without checking.
+
+        TEST::
+
+            sage: F.<z> = GF(9)
+            sage: M = MatrixSpace(F,3)(list(F))
+            sage: type(M)
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: M    # indirect doctest
+            [      0     2*z   z + 1]
+            [  z + 2       2       z]
+            [2*z + 2 2*z + 1       1]
+
+        """
+        if self.Data == NULL:
+            raise IndexError, "Matrix is empty"
+        return self._converter.int_to_field(FfToInt(FfExtract(MatGetPtr(self.Data,i), j)))
+
+    cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j):
+        # NOTE:
+        # It is essential that you call FfSetField and FfSetNoc YOURSELF
+        # and that you assert that the matrix is not empty!
+        # This method is here for speed!
+        return FfToInt(FfExtract(FfGetPtr(self.Data.Data,i) ,j))
+
+    cdef set_unsafe(self, Py_ssize_t i, Py_ssize_t j, value):
+        # ASSUMPTION: value's parent is the base ring
+        if self.Data == NULL:
+            raise IndexError, "Matrix is empty"
+        FfInsert(MatGetPtr(self.Data,i), j, FfFromInt(self._converter.field_to_int(value)))
+
+    cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value):
+        # NOTE:
+        # It is essential that you call FfSetField and FfSetNoc YOURSELF
+        # and that you assert that the matrix is not empty!
+        # This method is here for speed!
+        FfInsert(FfGetPtr(self.Data.Data,i), j, FfFromInt(value))
+
+    def randomize(self, density=None, nonzero=False, *args, **kwds):
+        """
+        Fill the matrix with random values.
+
+        INPUT:
+
+        - ``density`` (optional real number between zero and one) --
+          the expected density of the resulting matrix
+        - ``nonzero`` (optional bool, default ``False``) --
+          If true, all inserted marks are non-zero.
+
+        EXAMPLE::
+
+            sage: MS = MatrixSpace(GF(27,'z'),6,6)
+            sage: M = MS.random_element(); M    # indirect doctest
+            [              1           z + 1     z^2 + z + 1             z^2       2*z^2 + z           z + 1]
+            [2*z^2 + 2*z + 2   2*z^2 + z + 2         z^2 + 1 2*z^2 + 2*z + 2         z^2 + z   2*z^2 + z + 1]
+            [        2*z + 2     z^2 + z + 2           z + 2 2*z^2 + 2*z + 2           2*z^2           2*z^2]
+            [  2*z^2 + z + 2             z^2           z + 2         z^2 + z       2*z^2 + 2         z^2 + 2]
+            [      2*z^2 + z             2*z 2*z^2 + 2*z + 1       2*z^2 + 1 2*z^2 + 2*z + 1       2*z^2 + z]
+            [        2*z + 1         z^2 + z             z^2             z^2     2*z^2 + 2*z           z + 1]
+            sage: type(M)
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: MS.random_element(nonzero=True)
+            [            2*z               1   z^2 + 2*z + 1   2*z^2 + z + 1             z^2     z^2 + z + 1]
+            [    2*z^2 + 2*z   2*z^2 + z + 2         2*z + 1       z^2 + 2*z     2*z^2 + 2*z             z^2]
+            [        z^2 + z     z^2 + z + 2 2*z^2 + 2*z + 1         z^2 + 2               1           2*z^2]
+            [              z     2*z^2 + 2*z           2*z^2         2*z + 1           z + 2           z + 2]
+            [        z^2 + z             z^2           z + 2     2*z^2 + 2*z         2*z + 1         z^2 + z]
+            [    z^2 + z + 2       2*z^2 + z             z^2           z + 1     2*z^2 + 2*z   z^2 + 2*z + 1]
+            sage: MS.random_element(density=0.5)
+            [        z^2 + 2               0   z^2 + 2*z + 2       2*z^2 + z               0     z^2 + z + 2]
+            [              0               1               0               0               0               0]
+            [  2*z^2 + z + 1   2*z^2 + z + 2               0     z^2 + z + 2               0     z^2 + z + 1]
+            [              0               0               0               0               0               0]
+            [2*z^2 + 2*z + 2               0               0   2*z^2 + z + 2               0         2*z + 1]
+            [              0       2*z^2 + z               0               1               0   2*z^2 + z + 1]
+
+        """
+        self.check_mutability()
+        cdef int fl = self.Data.Field
+        density = float(density)
+        if density <= 0:
+            return
+        if density > 1:
+            density = float(1)
+
+        self.clear_cache()
+
+        cdef PTR x
+        cdef unsigned char *y
+        x = self.Data.Data
+        cdef int nr = self.Data.Nor
+        cdef int nc = self.Data.Noc
+        cdef int i, j, k
+
+        FfSetField(fl)
+        FfSetNoc(nc)
+        cdef int O, MPB, tmp
+        randint = current_randstate().c_random
+        randdouble = current_randstate().c_rand_double
+
+        if not nonzero:
+            if density == 1:
+                MPB = 0
+                tmp = fl
+                while tmp <= 256:
+                    MPB += 1
+                    tmp *= fl
+                O = (fl**MPB)
+                sig_on()
+                if nc%MPB:
+                    for i from 0 <= i < nr:
+                        y = <unsigned char*>x
+                        for j from 0 <= j < FfCurrentRowSizeIo-1:
+                            y[j] = randint()%O
+                        y[FfCurrentRowSizeIo-1] = randint()%(fl**(nc%MPB))
+                        FfStepPtr(&(x))
+                else:
+                    for i from 0 <= i < nr:
+                        y = <unsigned char*>x
+                        for j from 0 <= j < FfCurrentRowSizeIo:
+                            y[j] = randint()%O
+                        FfStepPtr(&(x))
+                sig_off()
+            else:
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        if randdouble() < density:
+                            FfInsert(x, j, FfFromInt( (randint()%fl) ))
+                    FfStepPtr(&(x))
+                sig_off()
+        else:
+            if density == 1:
+                fl -= 1
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        FfInsert(x, j, FfFromInt( (randint()%fl)+1 ))
+                    FfStepPtr(&(x))
+                sig_off()
+            else:
+                fl -= 1
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        if randdouble() < density:
+                            FfInsert(x, j, FfFromInt( (randint()%fl)+1 ))
+                    FfStepPtr(&(x))
+                sig_off()
+
+    def show_contents(self, r=None):
+        FfSetField(self.Data.Field)
+        FfSetNoc(self.Data.Noc)
+        cdef PTR p
+        cdef size_t i, j
+        if r is not None:
+            r_min = r
+            r_max = r+1
+        else:
+            r_min = 0
+            r_max = self.Data.Nor
+        for i in range(r_min, r_max):
+            p = FfGetPtr(self.Data.Data, i)
+            for j from 0<=j<self.Data.RowSize:
+                print "%3.3d"%p[j],
+            print
+
+##################
+## comparison
+    cpdef int _cmp_(left, Element right) except -2:
+        """
+        Compare two Matrix_gfpn_dense matrices
+
+        Of course, '<' and '>' doesn't make much sense for matrices.
+
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(125,'x'),[20*[0],20*[0],[1]+19*[0]])
+            sage: N = copy(M)
+            sage: M == N
+            True
+            sage: M != N
+            False
+            sage: print M < N
+            None
+            sage: N[2,19] = 1
+            sage: M == N
+            False
+            sage: M != N
+            True
+        """
+        cdef Matrix_gfpn_dense self = left
+        cdef Matrix_gfpn_dense N = right
+        cdef char* d1
+        cdef char* d2
+        if self.Data.Field != N.Data.Field:
+            if self.Data.Field > N.Data.Field:
+                return 1
+            return -1
+        if self.Data.Noc != N.Data.Noc:
+            if self.Data.Noc > N.Data.Noc:
+                return 1
+            return -1
+        if self.Data.Nor != N.Data.Nor:
+            if self.Data.Nor > N.Data.Nor:
+                return 1
+            return -1
+        d1 = <char*>(self.Data.Data)
+        d2 = <char*>(N.Data.Data)
+        cdef str s1 = PyString_FromStringAndSize(d1,self.Data.RowSize * self.Data.Nor)
+        cdef str s2 = PyString_FromStringAndSize(d2,N.Data.RowSize * N.Data.Nor)
+        if s1 != s2:
+            if s1 > s2:
+                return 1
+            return -1
+        return 0
+
+    def _rowlist_(self, i, j=-1):
+        "M._rowlist_(i): Return row <i> as a list of python ints"
+        cdef int k
+        if self.Data:
+            FfSetField(self.Data.Field)
+        else:
+            raise ValueError("Matrix is empty")
+        if (i<0) or (i>=self.Data.Nor):
+            raise IndexError("Index {} out of range 0..{}",format(i,self.Data.Nor-1))
+        cdef PTR p
+        p = MatGetPtr(self.Data,i)
+        L = [FfToInt(FfExtract(p,k)) for k in range(self.Data.Noc)]
+        if j!=-1:
+            if not(isinstance(j,int) or isinstance(j,Integer)):
+                raise TypeError, "Second index must be an integer"
+            if j >= self.Data.Nor:
+                raise IndexError, "Index out of range"
+            for k from i < k <= j:
+                FfStepPtr(&(p)) # This is only called after MatGetPtr, hence, after FfSetNoc.
+                L.extend([FfToInt(FfExtract(p,l)) for l in range(self.Data.Noc)])
+        return L
+
+    def _matlist_(self):
+        "M._matlist_(): Return M as a list of lists of python ints"
+        cdef int i
+        if self.Data:
+            FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
+        else:
+            raise IndexError, "Matrix is empty"
+        cdef PTR p
+        p = self.Data.Data
+        l_out=[]
+        for i from 1<=i<self.Data.Nor:
+            l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+            FfStepPtr(&(p))
+        l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+        return l_out
+
+#########################
+## Arithmetics
+    cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
+        if start_col != 0 or self.Data == NULL:
+            raise ValueError
+        cdef PTR = MatGetPtr(self.Data, i)
+        FfMulRow(PTR, FfFromInt(self._converter.field_to_int(s)))
+
+    cpdef ModuleElement _add_(self, ModuleElement right):
+        "add two MTX matrices of equal size"
+        cdef Matrix_gfpn_dense Self = self
+        cdef Matrix_gfpn_dense Right = right
+        assert Self is not None
+        assert Right is not None
+        if Self.Data == NULL or Right.Data == NULL:
+            raise NotImplementedError, "The matrices must not be empty"
+        cdef Matrix_gfpn_dense Left = Self.__copy__()
+        if MatAdd(Left.Data, Right.Data) != NULL:
+            return Left
+        else:
+            raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cpdef ModuleElement _sub_(self, ModuleElement right):
+        "subtract two MTX matrices of equal size"
+        cdef Matrix_gfpn_dense Self = self
+        cdef Matrix_gfpn_dense Right = right
+        assert Self is not None
+        assert Right is not None
+        if Self.Data == NULL or Right.Data == NULL:
+            raise NotImplementedError, "The matrices must not be empty"
+        cdef Matrix_gfpn_dense Left = Self.__copy__()
+        Left._is_immutable = False
+        if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
+            return Left
+        else:
+            raise ArithmeticError, "Matrix sizes or fields not compatible"
+
+    def __neg__(self):
+        "return negation of a MTX matrix: -M == M.__neg__()"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        return self._rmul_(self._base_ring(-1))
+
+    cpdef ModuleElement _rmul_(self, RingElement left):
+        "Scalar multiplication"
+        if self.Data == NULL:
+            return self.__copy__()
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cpdef ModuleElement _lmul_(self, RingElement right):
+        "Scalar multiplication"
+        if self.Data == NULL:
+            return self.__copy__()
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cdef Matrix _matrix_times_matrix_(self, Matrix right):
+        # Surprisingly, Winograd-Strassen can compete with school book
+        # multiplication for smallish matrices, and of course it is
+        # asymptotically faster. So, we used it by default.
+        return self._multiply_strassen(right)
+
+    cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
+        "multiply two meataxe matrices by the school book algorithm"
+        if self.Data == NULL or right.Data == NULL:
+            raise ValueError("The matrices must not be empty")
+        if self._ncols != right._nrows:
+            raise ArithmeticError("left ncols must match right nrows")
+        MS = self.matrix_space(self._nrows, right._ncols, False)
+        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+        sig_on()
+        OUT.Data = MatDup(self.Data)
+        if OUT.Data == NULL:
+            sig_off()
+            raise MemoryError
+        if not MatMul(OUT.Data,right.Data):
+            sig_off()
+            raise ArithmeticError("Matrix sizes or fields not compatible")
+        sig_off()
+        OUT._nrows = OUT.Data.Nor
+        OUT._ncols = OUT.Data.Noc
+        OUT._is_immutable = False
+        OUT._parent = MS
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        return OUT
+
+    cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=0):
+        """
+        cutoff is NOT the number of rows/columns, but the rowsize expressed in bytes.
+        If `cutoff==0` then the default ``sizeof(long)^2/2`` is chosen.
+        """
+        if self.Data == NULL or right.Data == NULL:
+            raise ValueError("The matrices must not be empty")
+        if self._ncols != right._nrows:
+            raise ArithmeticError("left ncols must match right nrows")
+        MS = self.matrix_space(self._nrows, right._ncols, False)
+        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense(MS, None)
+        # Now, OUT.Data is initialised, which is neede for MatrixMulStrassen to work.
+        cutoff = cutoff//sizeof(long)
+        StrassenSetCutoff(cutoff)
+        sig_on()
+        if MatMulStrassen(OUT.Data, self.Data, right.Data) == NULL:
+            raise ArithmeticError("Error multiplying matrices by Strassen-Winograd algorithm")
+        sig_off()
+        return OUT
+
+    cdef ModuleElement _mul_long(self, long n):
+        "multiply an MTX matrix with a field element represented by an integer"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense left
+        cdef FEL r
+        if n < 0:
+            r = mtx_taddinv[FfFromInt(-n)]
+        else:
+            r = FfFromInt(n)
+        left = self.__copy__()
+        if MatMulScalar(left.Data, r) != NULL:
+            return left
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    def __div__(Matrix_gfpn_dense self, p):
+        "divide an MTX matrix by a field element represented by an integer"
+        if self.Data == NULL:
+            return self.__copy__()
+        if not p:
+            raise ZeroDivisionError
+        if p not in self._base_ring:
+            raise ValueError("{} is not a scalar".format(p))
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
+        if MatMulScalar(OUT.Data, r) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    def __pow__(Matrix_gfpn_dense self, n, ignored):
+        "M.__pow__(n): return M^n"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if not self.is_square():
+            raise ArithmeticError("self must be a square matrix")
+        if ignored is not None:
+            raise RuntimeError("__pow__ third argument not used")
+        cdef Matrix_gfpn_dense OUT
+        cdef Matrix_gfpn_dense SELFINV
+        OUT = type(self).__new__(type(self))
+        OUT._nrows = self._nrows
+        OUT._ncols = self._ncols
+        OUT._is_immutable = False
+        OUT._parent = self._parent
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        if n>=0:
+            OUT.Data = MatPower(self.Data,n)
+        else:
+            SELFINV = self.__invert__()
+            OUT.Data = MatPower(SELFINV.Data,-n)
+        if OUT.Data != NULL:
+            return OUT
+        raise ArithmeticError("Failure in exponentiating a matrix")
+
+    def __invert__(Matrix_gfpn_dense self):
+        "M__invert__(): return M^(-1)"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if not self.is_square():
+            raise ArithmeticError("self must be a square matrix")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT._nrows = self._nrows
+        OUT._ncols = self._ncols
+        OUT._is_immutable = False
+        OUT._parent = self._parent
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        OUT.Data = MatInverse(self.Data)
+        if OUT.Data != NULL:
+            return OUT
+        raise ArithmeticError("This matrix is not invertible")
+
+    def transpose(Matrix_gfpn_dense self):
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT._nrows = self._ncols
+        OUT._ncols = self._nrows
+        OUT._is_immutable = False
+        OUT._parent = self.matrix_space(self._ncols, self._nrows, False)
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        OUT.Data = MatTransposed(self.Data)
+        return OUT
+
+    def order(self):
+        "M.order(): return multiplicative order of M"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if (self.Data.Nor <> self.Data.Noc):
+            raise ValueError("only defined for square matrices")
+        o = MatOrder(self.Data)
+        if o==-1:
+            raise ArithmeticError("order too large")
+        else:
+            return o
+
+###################
+## Gauss algorithm
+
+    def nullity(self):
+        "M.nullity(): return the nullity of M"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        return MatNullity(self.Data)
+
+    def left_kernel_matrix(self):
+        """M.left_kernel_matrix(): return the null space of M
+
+        M.left_kernel_matrix()*M is a null matrix
+        """
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT.Data = MatNullSpace(self.Data)
+        if OUT.Data == NULL:
+            return OUT
+        OUT._nrows = OUT.Data.Nor
+        OUT._ncols = OUT.Data.Noc
+        OUT._is_immutable = False
+        OUT._parent = self.matrix_space(OUT._nrows, OUT._ncols, False)
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        return OUT
+
+    def lead(self):
+        """
+(f,i) = M.lead() <=> f=M[0,i] is the first non-zero coefficient in the first row of M
+
+If the first row of M has no non-zero entry then f==0
+        """
+        cdef int i
+        cdef int fe
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        FfSetField(self.Data.Field)
+        for i from 0 <= i < self.Data.Noc:
+            fe = FfToInt(FfExtract(self.Data.Data,i))
+            if fe:
+                return fe, i
+        return 0, self.Data.Noc
+
+########################
+### String representations
+#    def __repr__(self):
+#        "return a short description of an MTX matrix"
+#        if self.Data == NULL:
+#            return 'Empty MTX matrix'
+#        return '(%s x %s) MTX matrix over GF(%s)'%(self.Data.Nor, self.Data.Noc, self.Data.Field)
+#
+#    def __str__(self):
+#        "return a string showing the contents of an MTX matrix"
+#        # cdef long i,j
+#        if self.Data == NULL:
+#            return '[]'
+#        nc = self.Data.Noc
+#        nr = self.Data.Nor
+#        setfield(self.Data.Field)
+#        fln = len(str(FfOrder))
+#        matL = self._matlist_()
+#        return "\n".join(["["+" ".join([str(el).rjust(fln) for el in matL[i]])+"]" \
+#                                   for i in range(nr)])
+
+###############################################################################
+# Further features may be added later
+###############################################################################
+
diff --git a/src/sage/matrix/matrix_space.py b/src/sage/matrix/matrix_space.py
index 58569480464..bb5802ab28b 100644
--- a/src/sage/matrix/matrix_space.py
+++ b/src/sage/matrix/matrix_space.py
@@ -56,7 +56,6 @@
 
 import matrix_mpolynomial_dense
 
-
 # Sage imports
 from sage.misc.superseded import deprecation
 import sage.structure.coerce
@@ -984,6 +983,12 @@ def _get_matrix_class(self):
             <type 'sage.matrix.matrix_modn_dense_float.Matrix_modn_dense_float'>
             sage: type(matrix(GF(16007), 2, range(4)))
             <type 'sage.matrix.matrix_modn_dense_double.Matrix_modn_dense_double'>
+            sage: type(matrix(GF(2), 2, range(4)))
+            <type 'sage.matrix.matrix_mod2_dense.Matrix_mod2_dense'>
+            sage: type(matrix(GF(64,'z'), 2, range(4)))
+            <type 'sage.matrix.matrix_mod2e_dense.Matrix_mod2e_dense'>
+            sage: type(matrix(GF(125,'z'), 2, range(4)))
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
         """
         R = self.base_ring()
         if self.is_dense():
@@ -1011,19 +1016,26 @@ def _get_matrix_class(self):
                 elif R.order() < matrix_modn_dense_double.MAX_MODULUS:
                     return matrix_modn_dense_double.Matrix_modn_dense_double
                 return matrix_generic_dense.Matrix_generic_dense
-            elif sage.rings.finite_rings.constructor.is_FiniteField(R) and R.characteristic() == 2 and R.order() <= 65536:
-                return matrix_gf2e_dense.Matrix_gf2e_dense
+            elif sage.rings.finite_rings.constructor.is_FiniteField(R):
+                if R.characteristic() == 2:
+                    if R.order() <= 65536:
+                        return matrix_gf2e_dense.Matrix_gf2e_dense
+                elif R.order() <= 255:
+                    try:
+                        import matrix_gfpn_dense
+                        return matrix_gfpn_dense.Matrix_gfpn_dense
+                    except ImportError:
+                        pass
             elif sage.rings.polynomial.multi_polynomial_ring_generic.is_MPolynomialRing(R) and R.base_ring() in _Fields:
                 return matrix_mpolynomial_dense.Matrix_mpolynomial_dense
             #elif isinstance(R, sage.rings.padics.padic_ring_capped_relative.pAdicRingCappedRelative):
             #    return padics.matrix_padic_capped_relative_dense
             # the default
-            else:
-                from sage.symbolic.ring import SR   # causes circular imports
-                if R is SR:
-                    import matrix_symbolic_dense
-                    return matrix_symbolic_dense.Matrix_symbolic_dense
-                return matrix_generic_dense.Matrix_generic_dense
+            from sage.symbolic.ring import SR   # causes circular imports
+            if R is SR:
+                import matrix_symbolic_dense
+                return matrix_symbolic_dense.Matrix_symbolic_dense
+            return matrix_generic_dense.Matrix_generic_dense
 
         else:
             if sage.rings.finite_rings.integer_mod_ring.is_IntegerModRing(R) and R.order() < matrix_modn_sparse.MAX_MODULUS:

From 4bdd285cd6320851aa4ed7aee8fb820960005692 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 20 Sep 2015 16:36:58 +0200
Subject: [PATCH 04/23] A full wrapper for MeatAxe matrices

---
 src/sage/libs/meataxe.pxd             |   4 +-
 src/sage/matrix/matrix_gfpn_dense.pxd |   5 +-
 src/sage/matrix/matrix_gfpn_dense.pyx | 216 ++++++++++++++++----------
 3 files changed, 136 insertions(+), 89 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index 2b413533c4a..cfca8ca40bc 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -63,13 +63,13 @@ cdef extern from "meataxe.h":
 
     ## Rows
     void FfMulRow(PTR row, FEL mark)
-    # void FfAddMulRow(PTR dest, PTR src, FEL f)
+    void FfAddMulRow(PTR dest, PTR src, FEL f)
     PTR FfAddRow(PTR dest, PTR src)
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
     # FEL FfScalarProduct(PTR a, PTR b)
-    # void FfSwapRows(PTR dest, PTR src)
+    void FfSwapRows(PTR dest, PTR src)
     # void FfPermRow(PTR row, long *perm, PTR result)
     # int FfCmpRows(PTR p1, PTR p2)
 
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
index 61025adf6a3..300118918ad 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pxd
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -20,10 +20,7 @@ from sage.libs.meataxe cimport *
 cdef class Matrix_gfpn_dense(Matrix_dense):
     cdef Matrix_t *Data
     cdef FieldConverter_class _converter
-    #cpdef Matrix_gfpn_dense normalized(Matrix_gfpn_dense self)
-    #cpdef Matrix_gfpn_dense semi_echelon(Matrix_gfpn_dense self)
-    #cpdef int nullity(Matrix_gfpn_dense self)
-    #cpdef tuple lead(self)
+    cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols)
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
     cdef Matrix _matrix_times_matrix_(self, Matrix right)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 6021c554828..7996917c3a7 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1,15 +1,19 @@
 r"""
-Dense Matrices over `\mathbb F_q`, with `q<255` odd and not prime
+Dense Matrices over `\mathbb F_q`, with `q<255`
 
 This module is a wrapper for version 2.4.24 of the Aachen
 `C-MeatAxe <http://www.math.rwth-aachen.de/homes/MTX/download.html>`_,
 improved by an implementation of the Winograd-Strassen multiplication
 algorithm. It provides matrices over the finite field `\mathbb F_q`,
-where `q\le 255` is odd and not prime.
+where `q\le 255`.
+
+By default, it is only used when `q` is odd and not prime, because other
+matrix implementations in SageMath perform better for prime fields or in
+characteristic two.
 
 AUTHORS:
 
-- Simon King (2015-09-18): initial version
+- Simon King (2015-09): initial version
 
 """
 
@@ -50,6 +54,12 @@ from sage.misc.randstate import current_randstate
 from sage.misc.cachefunc import cached_method, cached_function
 from sage.structure.element cimport Element, ModuleElement, RingElement, Matrix
 
+from libc.stdlib cimport free
+from sage.ext.memory cimport check_realloc
+from libc.string cimport memset, memcpy
+
+cimport sage.matrix.matrix0
+
 include 'sage/ext/stdsage.pxi'
 
 ####################
@@ -312,6 +322,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef int ncols = parent.ncols()
         self.Data = MatAlloc(f, nrows, ncols)
 
+    def __dealloc__(self):
+        if self.Data != NULL:
+            MatFree(self.Data)
+            self.Data = NULL
+
     def __init__(self, parent, data=None, mutable=True, copy=False, coerce=False):
         """
         Matrix extension class using libmeataxe as backend
@@ -490,13 +505,22 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     FfInsert(x, j, FfFromInt(bla))
                 FfStepPtr(&(x))
 
-    def rowsize(self):
-        return self.Data.RowSize
+    cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols):
+        r"""
+        Return a new matrix with no entries set.
+        """
+        cdef Matrix_gfpn_dense res
+        res = self.__class__.__new__(self.__class__)
 
-    def __dealloc__(self):
-        if self.Data != NULL:
-            MatFree(self.Data)
-            self.Data = NULL
+        if nrows == self._nrows and ncols == self._ncols:
+            res._parent = self._parent
+        else:
+            res._parent = self.matrix_space(nrows, ncols)
+        res._ncols  = ncols
+        res._nrows  = nrows
+        res._base_ring = self._base_ring
+        res._converter = self._converter
+        return res
 
     def __copy__(self):
         """
@@ -524,14 +548,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             sage: N is M
             False
         """
-        cdef Matrix_gfpn_dense retval = type(self).__new__(type(self))
-        # Do the initialisation "manually"
+        cdef Matrix_gfpn_dense retval = self._new(self._nrows, self._ncols)
         retval._is_immutable = False  # a copy of a matrix is mutable!
-        retval._parent = self._parent
-        retval._base_ring = self._base_ring
-        retval._converter = self._converter
-        retval._ncols = self._ncols
-        retval._nrows = self._nrows
         retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
         if self.Data:
             retval.Data = MatDup(self.Data)
@@ -794,8 +812,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                 L.extend([FfToInt(FfExtract(p,l)) for l in range(self.Data.Noc)])
         return L
 
-    def _matlist_(self):
-        "M._matlist_(): Return M as a list of lists of python ints"
+    def _list(self):
+        cdef list x = self.fetch('list')
+        if not x is None:
+            return x
+        x = []
         cdef int i
         if self.Data:
             FfSetField(self.Data.Field)
@@ -804,23 +825,47 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise IndexError, "Matrix is empty"
         cdef PTR p
         p = self.Data.Data
-        l_out=[]
         for i from 1<=i<self.Data.Nor:
-            l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+            x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
             FfStepPtr(&(p))
-        l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
-        return l_out
+        x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
+        self.cache('list', x)
+        return x
 
 #########################
 ## Arithmetics
     cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
         if start_col != 0 or self.Data == NULL:
-            raise ValueError
-        cdef PTR = MatGetPtr(self.Data, i)
-        FfMulRow(PTR, FfFromInt(self._converter.field_to_int(s)))
+            raise ValueError("We can only rescale a full row of a non-empty matrix")
+        FfMulRow(MatGetPtr(self.Data, i), FfFromInt(self._converter.field_to_int(self._base_ring(s))))
+
+    cdef add_multiple_of_row_c(self,  Py_ssize_t row_to, Py_ssize_t row_from, multiple, Py_ssize_t start_col):
+        if start_col != 0 or self.Data == NULL:
+            raise ValueError("We can only rescale a full row of a non-empty matrix")
+        FfAddMulRow(MatGetPtr(self.Data, row_to), MatGetPtr(self.Data, row_from), FfFromInt(self._converter.field_to_int(self._base_ring(multiple))))
+
+    cdef swap_rows_c(self, Py_ssize_t row1, Py_ssize_t row2):
+        FfSwapRows(MatGetPtr(self.Data, row1), MatGetPtr(self.Data, row2))
+
+    def trace(self):
+        if self._nrows != self._ncols:
+            raise ValueError, "self must be a square matrix"
+        return self._converter.int_to_field(FfToInt(MatTrace(self.Data)))
+
+    def stack(self, Matrix_gfpn_dense other):
+        if self._ncols != other._ncols:
+            raise TypeError("Both numbers of columns must match.")
+        if self._nrows == 0 or self.Data == NULL:
+            return other.__copy__()
+        if other._nrows == 0 or other.Data == NULL:
+            return self.__copy__()
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows+other._nrows, self._ncols)
+        OUT.Data = MatAlloc(self.Data.Field, self.Data.Nor+other.Data.Nor, self.Data.Noc)
+        memcpy(OUT.Data.Data, self.Data.Data, FfCurrentRowSize*self.Data.Nor)
+        memcpy(MatGetPtr(OUT.Data, self.Data.Nor), other.Data.Data, FfCurrentRowSize*other.Data.Nor)
+        return OUT
 
     cpdef ModuleElement _add_(self, ModuleElement right):
-        "add two MTX matrices of equal size"
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -834,7 +879,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
-        "subtract two MTX matrices of equal size"
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -849,13 +893,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError, "Matrix sizes or fields not compatible"
 
     def __neg__(self):
-        "return negation of a MTX matrix: -M == M.__neg__()"
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         return self._rmul_(self._base_ring(-1))
 
     cpdef ModuleElement _rmul_(self, RingElement left):
-        "Scalar multiplication"
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
@@ -865,7 +907,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _lmul_(self, RingElement right):
-        "Scalar multiplication"
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
@@ -874,11 +915,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
-    cdef Matrix _matrix_times_matrix_(self, Matrix right):
+    cdef int _strassen_default_cutoff(self, sage.matrix.matrix0.Matrix right) except -2:
         # Surprisingly, Winograd-Strassen can compete with school book
         # multiplication for smallish matrices, and of course it is
         # asymptotically faster. So, we used it by default.
-        return self._multiply_strassen(right)
+        return 0
 
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
         "multiply two meataxe matrices by the school book algorithm"
@@ -886,8 +927,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrices must not be empty")
         if self._ncols != right._nrows:
             raise ArithmeticError("left ncols must match right nrows")
-        MS = self.matrix_space(self._nrows, right._ncols, False)
-        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, right._ncols)
         sig_on()
         OUT.Data = MatDup(self.Data)
         if OUT.Data == NULL:
@@ -897,12 +937,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             sig_off()
             raise ArithmeticError("Matrix sizes or fields not compatible")
         sig_off()
-        OUT._nrows = OUT.Data.Nor
-        OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
-        OUT._parent = MS
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         return OUT
 
@@ -917,7 +952,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("left ncols must match right nrows")
         MS = self.matrix_space(self._nrows, right._ncols, False)
         cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense(MS, None)
-        # Now, OUT.Data is initialised, which is neede for MatrixMulStrassen to work.
+        # Now, OUT.Data is initialised, which is needed for MatMulStrassen to work.
         cutoff = cutoff//sizeof(long)
         StrassenSetCutoff(cutoff)
         sig_on()
@@ -964,15 +999,9 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("self must be a square matrix")
         if ignored is not None:
             raise RuntimeError("__pow__ third argument not used")
-        cdef Matrix_gfpn_dense OUT
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         cdef Matrix_gfpn_dense SELFINV
-        OUT = type(self).__new__(type(self))
-        OUT._nrows = self._nrows
-        OUT._ncols = self._ncols
         OUT._is_immutable = False
-        OUT._parent = self._parent
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         if n>=0:
             OUT.Data = MatPower(self.Data,n)
@@ -989,13 +1018,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrix must not be empty")
         if not self.is_square():
             raise ArithmeticError("self must be a square matrix")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
-        OUT._nrows = self._nrows
-        OUT._ncols = self._ncols
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         OUT._is_immutable = False
-        OUT._parent = self._parent
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         OUT.Data = MatInverse(self.Data)
         if OUT.Data != NULL:
@@ -1005,13 +1029,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
     def transpose(Matrix_gfpn_dense self):
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
-        OUT._nrows = self._ncols
-        OUT._ncols = self._nrows
+        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._rows)
         OUT._is_immutable = False
-        OUT._parent = self.matrix_space(self._ncols, self._nrows, False)
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         OUT.Data = MatTransposed(self.Data)
         return OUT
@@ -1074,28 +1093,59 @@ If the first row of M has no non-zero entry then f==0
                 return fe, i
         return 0, self.Data.Noc
 
-########################
-### String representations
-#    def __repr__(self):
-#        "return a short description of an MTX matrix"
-#        if self.Data == NULL:
-#            return 'Empty MTX matrix'
-#        return '(%s x %s) MTX matrix over GF(%s)'%(self.Data.Nor, self.Data.Noc, self.Data.Field)
-#
-#    def __str__(self):
-#        "return a string showing the contents of an MTX matrix"
-#        # cdef long i,j
-#        if self.Data == NULL:
-#            return '[]'
-#        nc = self.Data.Noc
-#        nr = self.Data.Nor
-#        setfield(self.Data.Field)
-#        fln = len(str(FfOrder))
-#        matL = self._matlist_()
-#        return "\n".join(["["+" ".join([str(el).rjust(fln) for el in matL[i]])+"]" \
-#                                   for i in range(nr)])
-
-###############################################################################
-# Further features may be added later
-###############################################################################
+    def _echelon_in_place_classical(self, reduced=True):
+        if self._nrows == 0 or self._ncols == 0:
+            self.cache('in_echelon_form',True)
+            self.cache('rank', 0)
+            self.cache('pivots', ())
+            return self
+        if MatEchelonize(self.Data) == -1:
+            raise ArithmeticError("Error echelonizing this matrix")
+        self._cache = {}
+        # Now, self.Data is in semi-echelon form.
+        r = self.Data.Nor
+        cdef size_t i, j, pos
+        cdef PTR old, dest, src
+        cdef FEL piv
+        self.cache('rank', r)
+        # Next, we do permutations to achieve the reduced echelon form,
+        # if requested.
+        if reduced:
+            pivs = [(self.Data.PivotTable[i],i) for i in range(r)]
+            pivs.sort()
+            if pivs != [(self.Data.PivotTable[i],i) for i in range(r)] or self.Data.Nor < self._nrows:
+                # We copy the row one by one, sorting their pivot positions
+                # and scaling the pivot to one.
+                old = self.Data.Data
+                self.Data.Data = FfAlloc(self._nrows)
+                for i, (pos,j) in enumerate(pivs):
+                    # We have to move row j to row i
+                    dest = self.Data.Data+FfCurrentRowSize*i
+                    memcpy(dest, old+FfCurrentRowSize*j, FfCurrentRowSize)
+                    self.Data.PivotTable[i] = pos
+                    piv = FfExtract(dest, pos)
+                    assert piv!=FF_ZERO
+                    if piv != FF_ONE:
+                        FfMulRow(dest, mtx_tmultinv[piv])
+                free(old)
+                self.Data.Nor = self._nrows
+                # Finally, we annulate everything above the pivots
+                # (currently, we only know that the matrix is zero
+                # below the pivots).
+                for i from 1 <= i < r:
+                    src = MatGetPtr(self.Data, i)
+                    for j from 0 <= j < i:
+                        dest = MatGetPtr(self.Data, j)
+                        piv = FfExtract(dest, self.Data.PivotTable[i])
+                        if piv != FF_ZERO:
+                            FfAddMulRow(dest, src, mtx_taddinv[piv])
+        elif self.Data.Nor < self._nrows:
+            # Some rows may have vanished. In SageMath, we
+            # want that the number of rows does not change,
+            # thus, we have to append zero rows.
+            self.Data.Data = <PTR>check_realloc(self.Data.Data, FfCurrentRowSize*self._nrows)
+            memset(self.Data.Data + FfCurrentRowSize*self.Data.Nor, FF_ZERO, FfCurrentRowSize*(self._nrows-self.Data.Nor))
+            self.Data.Nor = self._nrows
+        self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
+        self.cache('in_echelon_form',True)
 

From d889e0baf9308298c084f6d8ea2c1f052aeaf63c Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Mon, 21 Sep 2015 12:17:06 +0200
Subject: [PATCH 05/23] Improve echelon computation in MeatAxe, and fix some
 compiler warnings

---
 build/pkgs/meataxe/dependencies               |   1 +
 build/pkgs/meataxe/patches/TweakEchelon.patch | 229 ++++++++++++++++++
 build/pkgs/meataxe/spkg-install               |  24 ++
 3 files changed, 254 insertions(+)
 create mode 100644 build/pkgs/meataxe/dependencies
 create mode 100644 build/pkgs/meataxe/patches/TweakEchelon.patch

diff --git a/build/pkgs/meataxe/dependencies b/build/pkgs/meataxe/dependencies
new file mode 100644
index 00000000000..2f9f3849682
--- /dev/null
+++ b/build/pkgs/meataxe/dependencies
@@ -0,0 +1 @@
+# no dependencies
diff --git a/build/pkgs/meataxe/patches/TweakEchelon.patch b/build/pkgs/meataxe/patches/TweakEchelon.patch
new file mode 100644
index 00000000000..eeee5e4d8f6
--- /dev/null
+++ b/build/pkgs/meataxe/patches/TweakEchelon.patch
@@ -0,0 +1,229 @@
+Improve echelon computation by restricting FfAddMulRow to the
+nonzero part of the to-be-added row.
+
+Also remove some compiler warnings.
+
+AUTHOR:
+
+- Simon King, 2015-09-22
+diff --git a/src/c-kernel.c b/src/c-kernel.c
+index f74e97e..d4355bc 100644
+--- a/src/c-kernel.c
++++ b/src/c-kernel.c
+@@ -311,13 +311,10 @@ void TestFelToInt(unsigned flags)
+ static void TestSubfield1(int fld, int sub)
+ 
+ {
+-    FEL tabfld[256], tabsub[256];
++    FEL tabsub[256];
+     FEL tabemb[256];
+     int i;
+ 
+-    FfSetField(fld);
+-    for (i = 0; i < fld; ++i) 
+-	tabfld[i] = FfFromInt(i);
+     FfSetField(sub);
+     for (i = 0; i < sub; ++i) 
+ 	tabsub[i] = FfFromInt(i);
+diff --git a/src/cfcomp.c b/src/cfcomp.c
+index 7434549..fa739d6 100644
+--- a/src/cfcomp.c
++++ b/src/cfcomp.c
+@@ -131,7 +131,7 @@ static void Compare(const char *name)
+ {
+     ReadGens(name);
+     FindEquiv(name);
+-    FreeGens(name);
++    FreeGens();
+ }
+ 
+ 
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 178b6cb..6ef2f72 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -919,6 +919,54 @@ PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len)
+     return dest;
+ }
+ 
++/**
++ ** Add a multiple of a part of a row.
++ ** This function adds a multiple of @em src to @em dest.
++ ** This works like FfAddRow(), but the operation is performed only on a given range of
++ ** columns.
++ ** @param dest The row to add to.
++ ** @param src The row to add.
++ ** @param first Number of bytes to skip.
++ ** @param len Number of bytes to add.
++**/
++/* Warning!! Let L be the long integer to which the first byte of the a row
++ * belongs. It is assumed that all previous bytes in L are zero!
++ * Moreover, it is assumed that either the part of the rows ends at the
++ * end of the row, or that it ends with a full long.
++ */
++void FfAddMulRowPartial(PTR dest, PTR src, FEL f, int first, int len)
++{
++    register int i;
++    register BYTE *p1, *p2, *multab;
++
++    CHECKFEL(f);
++    if (f == FF_ZERO)
++    return;
++    int lfirst;
++    if (f == FF_ONE)
++    {
++        lfirst = first/sizeof(long);
++        if (first+len>=FfCurrentRowSizeIo)
++        {
++            FfAddRowPartial(dest,src,lfirst,FfCurrentRowSize/sizeof(long)-lfirst);
++            return;
++        }
++        FfAddRowPartial(dest,src,lfirst,(first+len)/sizeof(long)-lfirst);
++        return;
++    }
++    multab = mtx_tmult[f];
++    p1 = dest + first;
++    p2 = src + first;
++    int rem = FfCurrentRowSizeIo - first;
++    if (rem > len) rem = len;
++    for (i = rem; i != 0; --i)
++    {
++        register BYTE x = *p2++;
++        if (x!=0)
++            *p1 = mtx_tadd[*p1][multab[x]];
++        ++p1;
++    }
++}
+ 
+ /**
+  ** Multiply a row by a coefficient.
+@@ -977,10 +1025,12 @@ void FfAddMulRow(PTR dest, PTR src, FEL f)
+     multab = mtx_tmult[f];
+     p1 = dest;
+     p2 = src;
+-    for (i = FfTrueRowSize(FfNoc); i != 0; --i)
++    for (i = FfCurrentRowSizeIo; i != 0; --i)
+     {
+-	*p1 = mtx_tadd[*p1][multab[*p2++]];
+-	++p1;
++        register BYTE x = *p2++;
++        if (x!=0)
++            *p1 = mtx_tadd[*p1][multab[x]];
++        ++p1;
+     }
+ }
+ 
+@@ -1131,7 +1181,9 @@ __asm__("    popl %ebx\n"
+                 {
+                     for (; k != 0; --k)
+                     {
+-                        *r = mtx_tadd[*r][*v++];
++                        register BYTE x = *v++;
++                        if (x!=0)
++                            *r = mtx_tadd[*r][x];
+                         ++r;
+                     }
+                 }
+@@ -1140,9 +1192,9 @@ __asm__("    popl %ebx\n"
+                     register BYTE *multab = mtx_tmult[f];
+                     for (; k != 0; --k)
+                     {
+-		 	if (*v != 0)
+-			    *r = mtx_tadd[multab[*v]][*r];
+-			++v;
++                        if (*v != 0)
++                            *r = mtx_tadd[multab[*v]][*r];
++                        ++v;
+                         ++r;
+                     }
+                 }
+diff --git a/src/matcopy.c b/src/matcopy.c
+index 75b29c0..457dfeb 100644
+--- a/src/matcopy.c
++++ b/src/matcopy.c
+@@ -57,7 +57,10 @@ int MatCopyRegion(Matrix_t *dest, int destrow, int destcol,
+     if (!MatIsValid(src) || !MatIsValid(dest))
+ 	return -1;
+     if (src->Field != dest->Field)
+-	return MTX_ERROR1("%E",MTX_ERR_INCOMPAT), -1;
++    {
++        MTX_ERROR1("%E",MTX_ERR_INCOMPAT);
++        return -1;
++    }
+     if (nrows == -1)
+ 	nrows = src->Nor - row1;
+     if (ncols == -1)
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 5123f1c..368b37b 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -126,6 +126,7 @@ int FfSetNoc(int noc);
+ 
+ 
+ void FfAddMulRow(PTR dest, PTR src, FEL f);
++void FfAddMulRowPartial(PTR dest, PTR src, FEL f, int first, int len);
+ PTR FfAddRow(PTR dest, PTR src);
+ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len);
+ PTR FfSubRow(PTR dest, PTR src);
+diff --git a/src/window.c b/src/window.c
+index f374028..9c87694 100644
+--- a/src/window.c
++++ b/src/window.c
+@@ -236,12 +236,14 @@ __asm__("    popl %ebx\n"
+             {
+                 register BYTE *v = m;
+                 register BYTE *r = result;
++                register BYTE x;
+                 if (f == FF_ONE)
+                 {
+                     register size_t k = l_rowsize;
+                     for (; k != 0; --k)
+                     {
+-                        *r = mtx_tadd[*r][*v++];
++                        x=*v++;
++                        if (x) *r = mtx_tadd[*r][x];
+                         ++r;
+                     }
+                 }
+@@ -251,9 +253,8 @@ __asm__("    popl %ebx\n"
+                     register size_t k = l_rowsize;
+                     for (; k != 0; --k)
+                     {
+-                        if (*v != 0)
+-                            *r = mtx_tadd[multab[*v]][*r];
+-                        ++v;
++                        x=*v++;
++                        if (x) *r = mtx_tadd[multab[x]][*r];
+                         ++r;
+                     }
+                 }
+diff --git a/src/zcleanrow.c b/src/zcleanrow.c
+index 649e551..b4dcb30 100644
+--- a/src/zcleanrow.c
++++ b/src/zcleanrow.c
+@@ -35,18 +35,21 @@ MTX_DEFINE_FILE_INFO
+ 
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv)
+ {
+-    int i;
++    register int i, pivi, first;
+     PTR x;
+ 
+     for (i=0, x=matrix; i < nor; ++i, FfStepPtr(&x))
+     {
+-        FEL f = FfExtract(row,piv[i]);
++        pivi = piv[i];
++        FEL f = FfExtract(row,pivi);
+         if (f != FF_ZERO)
+-	    FfAddMulRow(row,x,FfNeg(FfDiv(f,FfExtract(x,piv[i]))));
++        {
++            first = pivi/MPB;
++            FfAddMulRowPartial(row,x,FfNeg(FfDiv(f,FfExtract(x,pivi))),first,FfCurrentRowSizeIo-first);
++        }
+     }
+ }
+ 
+-
+ /**
+  ** Clean Row and Record Operations.
+  ** This function works like FfCleanRow(), but it stores a record of the operations performed
diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
index 7733e9e44cb..163f180e50f 100755
--- a/build/pkgs/meataxe/spkg-install
+++ b/build/pkgs/meataxe/spkg-install
@@ -45,6 +45,30 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
+# Just to be sure, we also create other folders, although
+# they are standard SageMath folders
+
+mkdir -p $MTXBIN
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating directory for meataxe binaries."
+    exit 1
+fi
+
+mkdir -p "$SAGE_LOCAL/include"
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating SageMath's include directory."
+    exit 1
+fi
+
+mkdir -p "$SAGE_LOCAL/lib"
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating SageMath's lib folder."
+    exit 1
+fi
+
 ## Install! Aparently MeatAxe would rebuild everything when
 ## testing, and "make check" also installs. So, if a test
 ## is requested then we do it in one go.

From 2e6425793607152a39296a95c5c50f63cf796dea Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Tue, 22 Sep 2015 18:08:36 +0200
Subject: [PATCH 06/23] Doctests and error handling for MeatAxe

---
 src/sage/libs/meataxe.pxd             |  79 +--
 src/sage/matrix/matrix_gfpn_dense.pxd |   5 +-
 src/sage/matrix/matrix_gfpn_dense.pyx | 786 ++++++++++++++++++++------
 3 files changed, 659 insertions(+), 211 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index cfca8ca40bc..79a8d03448d 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -16,9 +16,9 @@
 cdef extern from "meataxe.h":
     # general ctype emulations
     # ctypedef int size_t   # size_t should be a standard type!
-    ctypedef unsigned long Ulong
-    ctypedef unsigned short Ushort
-    ctypedef unsigned char Uchar
+    # ctypedef unsigned long Ulong
+    # ctypedef unsigned short Ushort
+    # ctypedef unsigned char Uchar
     ctypedef unsigned char FEL
     ctypedef FEL *PTR
 
@@ -46,8 +46,8 @@ cdef extern from "meataxe.h":
     ## global parameters
     size_t FfRowSize(int noc)
     size_t FfTrueRowSize(int noc) # Difference to FfRowSize: Doesn't count padding bytes
-    int FfSetField(int field)
-    int FfSetNoc(int ncols)
+    int FfSetField(int field) except -1
+    int FfSetNoc(int ncols) except -1
 
     ## Finite Fields
     # FEL FfAdd(FEL a,FEL b)
@@ -68,13 +68,13 @@ cdef extern from "meataxe.h":
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
-    # FEL FfScalarProduct(PTR a, PTR b)
+    FEL FfScalarProduct(PTR a, PTR b)
     void FfSwapRows(PTR dest, PTR src)
-    # void FfPermRow(PTR row, long *perm, PTR result)
-    # int FfCmpRows(PTR p1, PTR p2)
+    void FfPermRow(PTR row, long *perm, PTR result)
+    int FfCmpRows(PTR p1, PTR p2)
 
     ## multiple rows
-    PTR FfAlloc(int nor)
+    PTR FfAlloc(int nor) except NULL
     void FfExtractColumn(PTR mat,int nor,int col,PTR result)
     int FfStepPtr(PTR *x)  # Advance to next row
     PTR FfGetPtr(PTR base, int row)  # Advance to "row" rows after base
@@ -94,35 +94,50 @@ cdef extern from "meataxe.h":
         int RowSize                     # Size (in bytes) of one row
         int *PivotTable                 # Pivot table (if matrix is in echelon form
     ## Basic memory operations
-    Matrix_t *MatAlloc(int field, int nor, int noc)
+    Matrix_t *MatAlloc(int field, int nor, int noc) except NULL
     int MatFree(Matrix_t *mat)
     PTR MatGetPtr(Matrix_t *mat, int row)
-    int MatCompare(Matrix_t *a, Matrix_t *b)
-    # int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols)
-    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols)
-    # Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows)
-    Matrix_t *MatDup(Matrix_t *src)
-    Matrix_t *MatId(int fl, int nor)
-    Matrix_t *MatLoad(char *fn)
-    int MatSave(Matrix_t *mat, char *fn)
+    int MatCompare(Matrix_t *a, Matrix_t *b) except? -1
+    int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols) except -1
+    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols) except NULL
+    Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows) except NULL
+    Matrix_t *MatDup(Matrix_t *src) except NULL
+    Matrix_t *MatId(int fl, int nor) except NULL
+    Matrix_t *MatLoad(char *fn) except NULL
+    int MatSave(Matrix_t *mat, char *fn) except -1
 
 
     ## Basic Arithmetic  ## general rule: dest is changed, src/mat are unchanged!
-    Matrix_t *MatTransposed(Matrix_t *src)
-    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src)
-    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff)
-    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src)
-    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
-    Matrix_t *MatPower(Matrix_t *mat, long n)
+    Matrix_t *MatTransposed(Matrix_t *src) except NULL
+    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src) except NULL
+    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff) except NULL
+    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src) except NULL
+    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff) except NULL
+    Matrix_t *MatPower(Matrix_t *mat, long n) except NULL
     FEL MatTrace(Matrix_t *mat)
-    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B)
+    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B) except NULL
     void StrassenSetCutoff(size_t size)
 
-    ## "Higher" Arithmetic ## all arguments are unchanged
-    # int MatClean(Matrix_t *mat, Matrix_t *sub)
-    int MatEchelonize(Matrix_t *mat)
-    int MatOrder(Matrix_t *mat)
+    ## "Higher" Arithmetic
+    int MatClean(Matrix_t *mat, Matrix_t *sub) except -1
+    int MatEchelonize(Matrix_t *mat) except -1
+    int MatOrder(Matrix_t *mat) except? -1
     long MatNullity(Matrix_t *mat)
-    Matrix_t *MatInverse(Matrix_t *src)
-    Matrix_t *MatNullSpace(Matrix_t *mat)
-# thats's all of meataxe.h !
+    Matrix_t *MatInverse(Matrix_t *src) except NULL
+    Matrix_t *MatNullSpace(Matrix_t *mat) except NULL
+
+    ## Error handling
+    cdef extern int MTX_ERR_NOMEM, MTX_ERR_GAME_OVER, MTX_ERR_DIV0, MTX_ERR_FILEFMT, MTX_ERR_BADARG
+    cdef extern int MTX_ERR_RANGE, MTX_ERR_NOTECH, MTX_ERR_NOTSQUARE, MTX_ERR_INCOMPAT
+    cdef extern int MTX_ERR_BADUSAGE, MTX_ERR_OPTION, MTX_ERR_NARGS, MTX_ERR_NOTMATRIX, MTX_ERR_NOTPERM
+    ctypedef struct MtxFileInfo_t:
+        char *Name
+        char *BaseName
+
+    ctypedef struct MtxErrorRecord_t:
+        MtxFileInfo_t *FileInfo
+        int LineNo
+        char *Text
+
+    ctypedef void MtxErrorHandler_t(MtxErrorRecord_t*)
+    MtxErrorHandler_t *MtxSetErrorHandler(MtxErrorHandler_t *h)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
index 300118918ad..34487536b14 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pxd
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -10,8 +10,8 @@
 
 cdef class FieldConverter_class:
     cdef object field  # that's a function converting an int to a field element
-    cdef object int_to_field(self, int x)
-    cdef int field_to_int(self, x)
+    cpdef object int_to_field(self, int x)
+    cpdef int field_to_int(self, x)
 
 from sage.matrix.matrix_dense cimport Matrix_dense
 from sage.structure.element cimport Matrix
@@ -23,6 +23,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
     cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols)
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
+    cdef list _rowlist_(self, i, j=*)
     cdef Matrix _matrix_times_matrix_(self, Matrix right)
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right)
     cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=*)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 7996917c3a7..dc0c978c8ef 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -70,16 +70,6 @@ include 'sage/ext/stdsage.pxi'
 import sys
 from libc.string cimport memcpy
 
-cdef inline int setfield(long n) except -1:
-    # This is a wrapper around FfSetField, but
-    # we guard it against MTX_Error, which would immediately
-    # crash the Sage session.
-    if n == FfOrder:
-        return 0
-    if not (0 < n < 255 and is_prime_power(n)):
-        raise ValueError("Only finite fields of order at most 255 are supported")
-    return FfSetField(n)
-
 # Fast conversion from field to int and int to field
 cdef class FieldConverter_class:
     """
@@ -93,18 +83,18 @@ cdef class FieldConverter_class:
 
     EXAMPLE::
 
-        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
         sage: F.<y> = GF(125)
-        sage: C = FieldConverter_class(F)
-        sage: C.int_to_field(15)
+        sage: C = FieldConverter_class(F)               # optional: meataxe
+        sage: C.int_to_field(15)                        # optional: meataxe
         3*y
-        sage: F.fetch_int(15)
+        sage: F.fetch_int(15)                           # optional: meataxe
         3*y
-        sage: %timeit C.int_to_field(15)    #not tested
+        sage: %timeit C.int_to_field(15)    # not tested
         625 loops, best of 3: 1.04 µs per loop
-        sage: %timeit F.fetch_int(15)       #not tested
+        sage: %timeit F.fetch_int(15)       # not tested
         625 loops, best of 3: 3.97 µs per loop
-        sage: C.field_to_int(y)
+        sage: C.field_to_int(y)                         # optional: meataxe
         5
         sage: y.integer_representation()
         5
@@ -118,46 +108,46 @@ cdef class FieldConverter_class:
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.int_to_field(15)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.int_to_field(15)                    # optional: meataxe
             3*y
             sage: F.fetch_int(15)
             3*y
-            sage: C.field_to_int(y)
+            sage: C.field_to_int(y)                     # optional: meataxe
             5
             sage: y.integer_representation()
             5
 
         """
         self.field = field._cache.fetch_int
-    cdef object int_to_field(self, int x):
+    cpdef object int_to_field(self, int x):
         """
         Fetch a python int into the field.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.int_to_field(15)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.int_to_field(15)                    # optional: meataxe
             3*y
             sage: F.fetch_int(15)
             3*y
 
         """
         return self.field(x)
-    cdef int field_to_int(self, x):
+    cpdef int field_to_int(self, x):
         """
         Represent a field element by a python int.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.field_to_int(y)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.field_to_int(y)                     # optional: meataxe
             5
             sage: y.integer_representation()
             5
@@ -176,14 +166,14 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
     EXAMPLE::
 
-        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class # optional: meataxe
         sage: F = GF(5)
-        sage: C = PrimeFieldConverter_class(F)
-        sage: C.int_to_field(int(2))
+        sage: C = PrimeFieldConverter_class(F)      # optional: meataxe
+        sage: C.int_to_field(int(2))                # optional: meataxe
         2
         sage: F(2)
         2
-        sage: C.field_to_int(F(2))
+        sage: C.field_to_int(F(2))                  # optional: meataxe
         2
         sage: int(F(2))
         2
@@ -197,29 +187,29 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
-            sage: C.int_to_field(int(2))
+            sage: C = PrimeFieldConverter_class(F)  # optional: meataxe
+            sage: C.int_to_field(int(2))            # optional: meataxe
             2
             sage: F(2)
             2
-            sage: C.field_to_int(F(2))
+            sage: C.field_to_int(F(2))              # optional: meataxe
             2
             sage: int(F(2))
             2
 
         """
         self.field = field
-    cdef object int_to_field(self, int x):
+    cpdef object int_to_field(self, int x):
         """
         Fetch a python int into the field.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
+            sage: C = PrimeFieldConverter_class(F)  # optional: meataxe
             sage: C.int_to_field(int(2))
             2
             sage: F(2)
@@ -227,16 +217,16 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
         """
         return IntegerMod_int(self.field, x)
-    cdef int field_to_int(self, x):
+    cpdef int field_to_int(self, x):
         """
         Represent a field element by a python int.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
-            sage: C.field_to_int(F(2))
+            sage: C = PrimeFieldConverter_class(F)      # optional: meataxe
+            sage: C.field_to_int(F(2))                  # optional: meataxe
             2
             sage: int(F(2))
             2
@@ -268,7 +258,41 @@ cdef FieldConverter_class FieldConverter(field):
             return _converter_cache.setdefault(field, PrimeFieldConverter_class(field))
         return _converter_cache.setdefault(field, FieldConverter_class(field))
 
+######################################
+## Error handling for MeatAxe, to prevent immediate exit of the program
+
+cdef dict ErrMsg = {
+    "Not enough memory": MemoryError,
+    "Time limit exceeded": RuntimeError,
+    "Division by zero": ZeroDivisionError,
+    "Bad file format": IOError,
+    "Bad argument": ValueError,
+    "Argument out of range": IndexError,
+
+    "Matrix not in echelon form": ValueError,
+    "Matrix not square": ArithmeticError,
+    "Incompatible objects": TypeError,
+
+    "Bad syntax, try `-help'": SyntaxError,
+    "Bad usage of option, try `-help'": ValueError,
+    "Bad number of arguments, try `-help'": ValueError,
+
+    "Not a matrix": TypeError,
+    "Not a permutation": TypeError
+}
 
+from cpython.exc cimport PyErr_SetObject
+
+cdef void ErrorHandler(MtxErrorRecord_t *err):
+    PyErr_SetObject(ErrMsg.get(err.Text, SystemError), "{} in file {} (line {})".format(err.Text, err.FileInfo.BaseName, err.LineNo))
+
+MtxSetErrorHandler(ErrorHandler)
+
+######################################
+##
+## Wrapper for MeatAxe matrices
+##
+######################################
 
 cdef class Matrix_gfpn_dense(Matrix_dense):
     r"""
@@ -288,7 +312,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         sage: print M
         [1 2 3]
         [4 0 1]
-        sage: type(M)
+        sage: type(M)     # optional: meataxe
         <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
 
     The documentation of the ``__init__`` methods shows further
@@ -302,10 +326,10 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         """
         TESTS::
 
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
-            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # indirect doctest
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # optional: meataxe
             []
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)  # optional: meataxe
             [0 0 0 0]
             [0 0 0 0]
             [0 0 0 0]
@@ -323,6 +347,16 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.Data = MatAlloc(f, nrows, ncols)
 
     def __dealloc__(self):
+        """
+        TESTS::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # optional: meataxe
+            []
+            sage: M = None
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)  # optional: meataxe
+            sage: del M    # indirect doctest
+        """
         if self.Data != NULL:
             MatFree(self.Data)
             self.Data = NULL
@@ -359,32 +393,32 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         EXAMPLES::
 
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
 
         1. Creating an empty matrix::
 
-            sage: Matrix_gfpn_dense(None)
+            sage: Matrix_gfpn_dense(None)  # optional: meataxe
             []
 
         2. Creating a zero (3x2)-matrix::
 
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))  # optional: meataxe
             [0 0]
             [0 0]
             [0 0]
 
         3. Creating a matrix from a list or list of lists::
 
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])  # optional: meataxe
             [1 2 3]
             [4 0 1]
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])  # indirect doctest
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])    # optional: meataxe
             [1 2 3]
             [4 0 1]
 
         4. Creating a diagonal matrix::
 
-            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M  # optional: meataxe
             [2 0 0 0 0]
             [0 2 0 0 0]
             [0 0 2 0 0]
@@ -393,24 +427,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         5. Creating a matrix from a file in MeatAxe format.
 
-           First, we have to create that file; we use a temporary file,
-           that will be removed when leaving Sage. Note that the method
-           :meth:`msave` must be used, which does not use Python pickling
-           but relies on the intrinsic C--MeatAxe way of saving.
-           ::
-
-            sage: f = tmp_filename()
-            sage: M.msave(f)
-            sage: Matrix_gfpn_dense(f)
-            [2 0 0 0 0]
-            [0 2 0 0 0]
-            [0 0 2 0 0]
-            [0 0 0 2 0]
-            [0 0 0 0 2]
+           This is not tested.
 
         TESTS::
 
-            sage: MS = MatrixSpace(GF(125,'y'),2)
+            sage: MS = MatrixSpace(GF(125,'y'),2)  # indirect doctest
             sage: A = MS(0)
             sage: A.left_kernel()
             Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
@@ -528,22 +549,22 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         EXAMPLES::
 
-            sage: M=MatrixSpace(GF(25,'x')([20*[0],20*[0],[1]+19*[0]])
-            sage: N=copy(M)
+            sage: M = MatrixSpace(GF(25,'x'), 3, 20)([20*[0],20*[0],[1]+19*[0]])
+            sage: N = copy(M)   # indirect doctest
             sage: print N
             [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-            sage: N==M
+            sage: N== M
             True
             sage: N is M
             False
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
-            sage: M=Matrix_gfpn_dense('')
-            sage: N=copy(M)
-            sage: N
-            Empty MTX matrix
-            sage: N==M
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: M = Matrix_gfpn_dense('')   # optional: meataxe
+            sage: N = copy(M)
+            sage: N                         # optional: meataxe
+            []
+            sage: N == M
             True
             sage: N is M
             False
@@ -559,17 +580,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             retval.Data = NULL
         return retval
 
-    ##########################
-    ## Saving should be done via pickling
-    ## However, we keep a method that relies on MeatAxe matsave:
-    def msave(self,f):
-        """
-        M.msave('filename') ==> save matrix into file <filename>
-
-        It can be reloaded with ``Matrix_gfpn_dense('filename')``.
-        """
-        MatSave(self.Data,f)
-
     ## Pickling and string representation is taken care of by implementing get_unsafe
     cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
         """
@@ -578,17 +588,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         TEST::
 
             sage: F.<z> = GF(9)
-            sage: M = MatrixSpace(F,3)(list(F))
-            sage: type(M)
+            sage: M = MatrixSpace(F,3)(sorted(list(F)))
+            sage: type(M)               # optional: meataxe
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
-            sage: M    # indirect doctest
-            [      0     2*z   z + 1]
-            [  z + 2       2       z]
-            [2*z + 2 2*z + 1       1]
+            sage: M                     # indirect doctest
+            [      0       1       2]
+            [      z   z + 1   z + 2]
+            [    2*z 2*z + 1 2*z + 2]
 
         """
         if self.Data == NULL:
             raise IndexError, "Matrix is empty"
+        FfSetField(self.Data.Field)
         return self._converter.int_to_field(FfToInt(FfExtract(MatGetPtr(self.Data,i), j)))
 
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j):
@@ -596,12 +607,37 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         # It is essential that you call FfSetField and FfSetNoc YOURSELF
         # and that you assert that the matrix is not empty!
         # This method is here for speed!
-        return FfToInt(FfExtract(FfGetPtr(self.Data.Data,i) ,j))
+        return FfToInt(FfExtract(MatGetPtr(self.Data,i), j))
 
     cdef set_unsafe(self, Py_ssize_t i, Py_ssize_t j, value):
+        """
+        Set values without bound checking.
+
+        TESTS:
+
+        The following test would have failed in a preliminary version
+        of this MeatAxe wrapper::
+
+            sage: K.<x> = GF(125)
+            sage: M = MatrixSpace(K,9,9)()
+            sage: N = MatrixSpace(GF(9,'x'),20).random_element()
+            sage: M[2,2] = x
+            sage: M
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 x 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+
+        """
         # ASSUMPTION: value's parent is the base ring
         if self.Data == NULL:
             raise IndexError, "Matrix is empty"
+        FfSetField(self.Data.Field)
         FfInsert(MatGetPtr(self.Data,i), j, FfFromInt(self._converter.field_to_int(value)))
 
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value):
@@ -625,23 +661,24 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         EXAMPLE::
 
             sage: MS = MatrixSpace(GF(27,'z'),6,6)
-            sage: M = MS.random_element(); M    # indirect doctest
+            sage: M = MS.random_element()       # indirect doctest
+            sage: M                             # optional: meataxe
             [              1           z + 1     z^2 + z + 1             z^2       2*z^2 + z           z + 1]
             [2*z^2 + 2*z + 2   2*z^2 + z + 2         z^2 + 1 2*z^2 + 2*z + 2         z^2 + z   2*z^2 + z + 1]
             [        2*z + 2     z^2 + z + 2           z + 2 2*z^2 + 2*z + 2           2*z^2           2*z^2]
             [  2*z^2 + z + 2             z^2           z + 2         z^2 + z       2*z^2 + 2         z^2 + 2]
             [      2*z^2 + z             2*z 2*z^2 + 2*z + 1       2*z^2 + 1 2*z^2 + 2*z + 1       2*z^2 + z]
             [        2*z + 1         z^2 + z             z^2             z^2     2*z^2 + 2*z           z + 1]
-            sage: type(M)
+            sage: type(M)                           # optional: meataxe
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
-            sage: MS.random_element(nonzero=True)
+            sage: MS.random_element(nonzero=True)   # optional: meataxe
             [            2*z               1   z^2 + 2*z + 1   2*z^2 + z + 1             z^2     z^2 + z + 1]
             [    2*z^2 + 2*z   2*z^2 + z + 2         2*z + 1       z^2 + 2*z     2*z^2 + 2*z             z^2]
             [        z^2 + z     z^2 + z + 2 2*z^2 + 2*z + 1         z^2 + 2               1           2*z^2]
             [              z     2*z^2 + 2*z           2*z^2         2*z + 1           z + 2           z + 2]
             [        z^2 + z             z^2           z + 2     2*z^2 + 2*z         2*z + 1         z^2 + z]
             [    z^2 + z + 2       2*z^2 + z             z^2           z + 1     2*z^2 + 2*z   z^2 + 2*z + 1]
-            sage: MS.random_element(density=0.5)
+            sage: MS.random_element(density=0.5)    # optional: meataxe
             [        z^2 + 2               0   z^2 + 2*z + 2       2*z^2 + z               0     z^2 + z + 2]
             [              0               1               0               0               0               0]
             [  2*z^2 + z + 1   2*z^2 + z + 2               0     z^2 + z + 2               0     z^2 + z + 1]
@@ -723,41 +760,42 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     FfStepPtr(&(x))
                 sig_off()
 
-    def show_contents(self, r=None):
-        FfSetField(self.Data.Field)
-        FfSetNoc(self.Data.Noc)
-        cdef PTR p
-        cdef size_t i, j
-        if r is not None:
-            r_min = r
-            r_max = r+1
-        else:
-            r_min = 0
-            r_max = self.Data.Nor
-        for i in range(r_min, r_max):
-            p = FfGetPtr(self.Data.Data, i)
-            for j from 0<=j<self.Data.RowSize:
-                print "%3.3d"%p[j],
-            print
+## Debugging
+#    def show_contents(self, r=None):
+#        FfSetField(self.Data.Field)
+#        FfSetNoc(self.Data.Noc)
+#        cdef PTR p
+#        cdef size_t i, j
+#        if r is not None:
+#            r_min = r
+#            r_max = r+1
+#        else:
+#            r_min = 0
+#            r_max = self.Data.Nor
+#        for i in range(r_min, r_max):
+#            p = FfGetPtr(self.Data.Data, i)
+#            for j from 0<=j<self.Data.RowSize:
+#                print "%3.3d"%p[j],
+#            print
 
 ##################
 ## comparison
     cpdef int _cmp_(left, Element right) except -2:
         """
-        Compare two Matrix_gfpn_dense matrices
+        Compare two :class:`Matrix_gfpn_dense` matrices
 
         Of course, '<' and '>' doesn't make much sense for matrices.
 
         EXAMPLES::
 
-            sage: M = MatrixSpace(GF(125,'x'),[20*[0],20*[0],[1]+19*[0]])
+            sage: M = MatrixSpace(GF(125,'x'),3,20)([20*[0],20*[0],[1]+19*[0]])
             sage: N = copy(M)
             sage: M == N
             True
             sage: M != N
             False
-            sage: print M < N
-            None
+            sage: M < N
+            False
             sage: N[2,19] = 1
             sage: M == N
             False
@@ -766,8 +804,17 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         """
         cdef Matrix_gfpn_dense self = left
         cdef Matrix_gfpn_dense N = right
+        if self is None or N is None:
+            return -1
         cdef char* d1
         cdef char* d2
+        if self.Data == NULL:
+            if N.Data == NULL:
+                return 0
+            else:
+                return 1
+        elif N.Data == NULL:
+            return -1
         if self.Data.Field != N.Data.Field:
             if self.Data.Field > N.Data.Field:
                 return 1
@@ -790,11 +837,12 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             return -1
         return 0
 
-    def _rowlist_(self, i, j=-1):
+    cdef list _rowlist_(self, i, j=-1):
         "M._rowlist_(i): Return row <i> as a list of python ints"
         cdef int k
         if self.Data:
             FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
         else:
             raise ValueError("Matrix is empty")
         if (i<0) or (i>=self.Data.Nor):
@@ -813,6 +861,17 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return L
 
     def _list(self):
+        """
+        Return a flat list of all entries of this matrix.
+
+        The result is cached.
+
+        EXAMPLES::
+
+            sage: MatrixSpace(GF(9,'x'),3)(sorted(list(GF(9,'x')))).list()  # indirect doctest
+            [0, 1, 2, x, x + 1, x + 2, 2*x, 2*x + 1, 2*x + 2]
+
+        """
         cdef list x = self.fetch('list')
         if not x is None:
             return x
@@ -825,34 +884,135 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise IndexError, "Matrix is empty"
         cdef PTR p
         p = self.Data.Data
+        sig_on()
         for i from 1<=i<self.Data.Nor:
             x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
             FfStepPtr(&(p))
         x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
+        sig_off()
         self.cache('list', x)
         return x
 
 #########################
 ## Arithmetics
     cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
+        """
+        Rescale row number `i` in-place by multiplication with the scalar `s`.
+
+        The argument ``start_col`` is ignored. The scalar `s` is
+        converted into the base ring.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.rescale_row(1, 3)   # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 3 3*x + 1 3*x + 4 3*x + 2]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.rescale_row(4, x)
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 3 3*x + 1 3*x + 4 3*x + 2]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [4*x + 2       2   x + 2 2*x + 2 3*x + 2]
+
+        """
         if start_col != 0 or self.Data == NULL:
             raise ValueError("We can only rescale a full row of a non-empty matrix")
         FfMulRow(MatGetPtr(self.Data, i), FfFromInt(self._converter.field_to_int(self._base_ring(s))))
 
     cdef add_multiple_of_row_c(self,  Py_ssize_t row_to, Py_ssize_t row_from, multiple, Py_ssize_t start_col):
+        """
+        Add the ``multiple``-fold of row ``row_from`` in-place to row ``row_to``.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.add_multiple_of_row(2, 4, x)  # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [  x + 2 2*x + 3 3*x + 4     4*x       1]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+
+        """
         if start_col != 0 or self.Data == NULL:
             raise ValueError("We can only rescale a full row of a non-empty matrix")
         FfAddMulRow(MatGetPtr(self.Data, row_to), MatGetPtr(self.Data, row_from), FfFromInt(self._converter.field_to_int(self._base_ring(multiple))))
 
     cdef swap_rows_c(self, Py_ssize_t row1, Py_ssize_t row2):
+        """
+        Swap the rows ``row1`` and ``row2`` in-place.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.swap_rows(1, 3)    # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+
+        """
         FfSwapRows(MatGetPtr(self.Data, row1), MatGetPtr(self.Data, row2))
 
     def trace(self):
+        """
+        Trace of this matrix, i.e., the sum of diagonal elements.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(125)
+            sage: MatrixSpace(K,7,7)(x).trace()
+            2*x
+
+        """
         if self._nrows != self._ncols:
             raise ValueError, "self must be a square matrix"
         return self._converter.int_to_field(FfToInt(MatTrace(self.Data)))
 
     def stack(self, Matrix_gfpn_dense other):
+        """
+        Stack two matrices of the same number of columns.
+
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),1,9)(sorted(list(GF(9,'x'))))
+            sage: M
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+            sage: M.stack(M)
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+
+        """
         if self._ncols != other._ncols:
             raise TypeError("Both numbers of columns must match.")
         if self._nrows == 0 or self.Data == NULL:
@@ -866,6 +1026,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return OUT
 
     cpdef ModuleElement _add_(self, ModuleElement right):
+        """
+        TESTS::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: N = MatrixSpace(K,3,3)(2*x)
+            sage: M+N           # indirect doctest
+            [    2*x       1       2]
+            [      x       1   x + 2]
+            [    2*x 2*x + 1   x + 2]
+
+        """
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -873,12 +1045,25 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         if Self.Data == NULL or Right.Data == NULL:
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
+        Left._cache = {}
         if MatAdd(Left.Data, Right.Data) != NULL:
             return Left
         else:
             raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
+        """
+        TESTS::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: N = MatrixSpace(K,3,3)(2*x)
+            sage: M-N    # indirect doctest
+            [      x       1       2]
+            [      x 2*x + 1   x + 2]
+            [    2*x 2*x + 1       2]
+
+        """
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -887,30 +1072,78 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._is_immutable = False
+        Left._cache = {}
         if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
             return Left
         else:
             raise ArithmeticError, "Matrix sizes or fields not compatible"
 
     def __neg__(self):
+        """
+        TESTS::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: -M
+            [      0       2       1]
+            [    2*x 2*x + 2 2*x + 1]
+            [      x   x + 2   x + 1]
+
+        ::
+
+            sage: M = MatrixSpace(GF(125,'x'),10,30).random_element()
+            sage: N = MatrixSpace(GF(125,'x'),10,30).random_element()
+            sage: M + (-N) == M - N == -(N - M)
+            True
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         return self._rmul_(self._base_ring(-1))
 
     cpdef ModuleElement _rmul_(self, RingElement left):
+        """
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(list(K))
+            sage: x*M    # indirect doctest
+            [      0   x + 1 2*x + 1]
+            [      2     2*x 2*x + 2]
+            [  x + 2       1       x]
+            sage: -M == (-1)*M
+            True
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
+        OUT._cache = {}
         if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _lmul_(self, RingElement right):
+        """
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: x*M    # indirect doctest
+            [      0       x     2*x]
+            [  x + 1 2*x + 1       1]
+            [2*x + 2       2   x + 2]
+            sage: -M == (-1)*M
+            True
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
+        OUT._cache = {}
         if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
@@ -922,6 +1155,21 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return 0
 
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
+        """
+        Multiplication using the cubic school book multiplication algorithm.
+
+        EXAMPLES:
+
+        Since by default the asymptotically faster Strassen-Winograd
+        multiplication algorithm is used, the following is a valid
+        consistency check::
+
+            sage: M = MatrixSpace(GF(9,'x'),1000,500).random_element()
+            sage: N = MatrixSpace(GF(9,'x'),500,2000).random_element()
+            sage: M*N == M._multiply_classical(N)                       # optional: meataxe
+            True
+
+        """
         "multiply two meataxe matrices by the school book algorithm"
         if self.Data == NULL or right.Data == NULL:
             raise ValueError("The matrices must not be empty")
@@ -943,8 +1191,26 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
     cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=0):
         """
-        cutoff is NOT the number of rows/columns, but the rowsize expressed in bytes.
-        If `cutoff==0` then the default ``sizeof(long)^2/2`` is chosen.
+        Matrix multiplication using the asymptotically fast Strassen-Winograd algorithm.
+
+        INPUT:
+
+        - ``right`` -- a matrix of dimensions suitable to do multiplication
+        - ``cutoff`` (optional integer) -- indicates the minimal size of submatrices
+          that will be considered in the divide-and-conquer algorithm. The size is
+          *not* expressed by the number of rows/columns, but the rowsize expressed
+          in bytes. Depending on the base field, one byte may represent up to eight
+          entries in a matrix row. The default is ``sizeof(long)^2/2`` byte.
+
+        EXAMPLES:
+
+        We test that different cutoffs yield the same result::
+
+            sage: M = MatrixSpace(GF(9,'x'),1500,600).random_element()
+            sage: N = MatrixSpace(GF(9,'x'),600,1500).random_element()
+            sage: M._multiply_strassen(N) == M._multiply_strassen(N,80) == M._multiply_strassen(N,2) # optional: meataxe
+            True
+
         """
         if self.Data == NULL or right.Data == NULL:
             raise ValueError("The matrices must not be empty")
@@ -972,48 +1238,76 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         else:
             r = FfFromInt(n)
         left = self.__copy__()
+        left._cache = {}
         if MatMulScalar(left.Data, r) != NULL:
             return left
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     def __div__(Matrix_gfpn_dense self, p):
-        "divide an MTX matrix by a field element represented by an integer"
+        """
+        Divide a matrix by a scalar.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: M
+            [      0       1       2]
+            [      x   x + 1   x + 2]
+            [    2*x 2*x + 1 2*x + 2]
+            sage: M/2                   # indirect doctest
+            [      0       2       1]
+            [    2*x 2*x + 2 2*x + 1]
+            [      x   x + 2   x + 1]
+            sage: M/x
+            [      0   x + 2 2*x + 1]
+            [      1       x 2*x + 2]
+            [      2   x + 1     2*x]
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         if not p:
             raise ZeroDivisionError
         if p not in self._base_ring:
             raise ValueError("{} is not a scalar".format(p))
+        p = self._base_ring(p)
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
-        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
-        if MatMulScalar(OUT.Data, r) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
-
-    def __pow__(Matrix_gfpn_dense self, n, ignored):
-        "M.__pow__(n): return M^n"
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        if not self.is_square():
-            raise ArithmeticError("self must be a square matrix")
-        if ignored is not None:
-            raise RuntimeError("__pow__ third argument not used")
-        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
-        cdef Matrix_gfpn_dense SELFINV
-        OUT._is_immutable = False
         OUT._cache = {}
-        if n>=0:
-            OUT.Data = MatPower(self.Data,n)
-        else:
-            SELFINV = self.__invert__()
-            OUT.Data = MatPower(SELFINV.Data,-n)
-        if OUT.Data != NULL:
-            return OUT
-        raise ArithmeticError("Failure in exponentiating a matrix")
+        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
+        MatMulScalar(OUT.Data, r)
+        return OUT
 
     def __invert__(Matrix_gfpn_dense self):
-        "M__invert__(): return M^(-1)"
+        """
+        Multiplicative inverse of this matrix (if available)
+
+        TESTS::
+
+            sage: MS = MatrixSpace(GF(9,'x'),500)
+            sage: while 1:
+            ....:     M = MS.random_element()
+            ....:     if M.rank() == 500:
+            ....:         break
+            sage: Minv = ~M    # indirect doctest
+            sage: Minv*M == M*Minv == 1
+            True
+
+        We use the occasion to demonstrate that errors in MeatAxe are
+        correctly handled in Sage::
+
+            sage: MS = MatrixSpace(GF(25,'x'),5)
+            sage: while 1:
+            ....:     M = MS.random_element(density=0.4)
+            ....:     if M.rank() < 5:
+            ....:         break
+            sage: ~M                    # optional: meataxe
+            Traceback (most recent call last):
+            ...
+            ZeroDivisionError: Division by zero in file matinv.c (line 50)
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         if not self.is_square():
@@ -1021,22 +1315,61 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         OUT._is_immutable = False
         OUT._cache = {}
-        OUT.Data = MatInverse(self.Data)
+        sig_on()
+        try:
+            OUT.Data = MatInverse(self.Data)
+        except:
+            sig_off()
+            raise
+        sig_off()
         if OUT.Data != NULL:
             return OUT
         raise ArithmeticError("This matrix is not invertible")
 
     def transpose(Matrix_gfpn_dense self):
+        """
+        Return the transposed matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K, 2,4)(sorted(list(K)[1:]))
+            sage: M
+            [      1       2       x   x + 1]
+            [  x + 2     2*x 2*x + 1 2*x + 2]
+            sage: M.transpose()
+            [      1   x + 2]
+            [      2     2*x]
+            [      x 2*x + 1]
+            [  x + 1 2*x + 2]
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._rows)
+        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._nrows)
         OUT._is_immutable = False
         OUT._cache = {}
         OUT.Data = MatTransposed(self.Data)
         return OUT
 
     def order(self):
-        "M.order(): return multiplicative order of M"
+        """
+        Return the multiplicative order of this matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(27)
+            sage: M = MatrixSpace(K, 4)([2*x^2 + 2*x, 2*x^2 + x, 2*x^2 + x + 1,
+            ....: x^2 + x + 2, x + 2, x^2, 2*x + 2, 2*x^2 + 2*x, 2*x^2 + 1,
+            ....: 1, 2, x^2 + 2*x + 1, x^2 + x + 2, x + 1, 2*x^2 + 2*x, x^2 + x])
+            sage: M.order()                 # optional: meataxe
+            104
+            sage: M^104 == 1
+            True
+            sage: M^103 == 1
+            False
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         if (self.Data.Nor <> self.Data.Noc):
@@ -1050,23 +1383,49 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 ###################
 ## Gauss algorithm
 
-    def nullity(self):
-        "M.nullity(): return the nullity of M"
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        return MatNullity(self.Data)
-
     def left_kernel_matrix(self):
-        """M.left_kernel_matrix(): return the null space of M
+        """
+        Return the null space of this matrix, represented as a matrix.
+
+        NOTE:
+
+        - For a matrix `M`, ``M.left_kernel_matrix()*M`` is a null matrix.
+        - The command `M.left_kernel()` uses a generic implementation in Sage,
+          that relies on computing the echelon form of the transposed
+          matrix. This method however uses a MeatAxe function to compute
+          the left kernel matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K, 10)()
+            sage: entries = [((0, 2), x), ((0, 4), 3*x + 2),
+            ....: ((0, 8), 2*x), ((1, 1), x + 3), ((1, 5), 3*x),
+            ....: ((1, 6), x + 4), ((2, 3), 2*x), ((2, 5), 4*x + 1),
+            ....: ((2, 6), 4), ((3, 4), x + 4), ((3, 5), x + 1),
+            ....: ((5, 5), 3*x), ((5, 7), x + 3), ((6, 1), x),
+            ....: ((6, 2), x + 1), ((6, 5), x + 1), ((8, 2), 4),
+            ....: ((8, 8), 4), ((8, 9), x + 3), ((9, 8), 4*x + 2)]
+            sage: for (i,j),v in entries: M[i,j] = v
+            sage: M.left_kernel()
+            Vector space of degree 10 and dimension 2 over Finite Field in x of size 5^2
+            Basis matrix:
+            [0 0 0 0 1 0 0 0 0 0]
+            [0 0 0 0 0 0 0 1 0 0]
+            sage: M.left_kernel_matrix()    # optional: meataxe
+            [0 0 0 0 1 0 0 0 0 0]
+            [0 0 0 0 0 0 0 1 0 0]
 
-        M.left_kernel_matrix()*M is a null matrix
         """
+        cdef Matrix_gfpn_dense OUT = self.fetch("left_kernel_matrix")
+        if OUT is not None:
+            return OUT
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT = type(self).__new__(type(self))
         OUT.Data = MatNullSpace(self.Data)
         if OUT.Data == NULL:
-            return OUT
+            raise ArithmeticError("Error computing left kernel matrix")
         OUT._nrows = OUT.Data.Nor
         OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
@@ -1074,26 +1433,97 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         OUT._base_ring = self._base_ring
         OUT._converter = self._converter
         OUT._cache = {}
+        self.cache("left_kernel_matrix", OUT)
         return OUT
 
-    def lead(self):
+    def _echelon_in_place_classical(self, reduced=True):
         """
-(f,i) = M.lead() <=> f=M[0,i] is the first non-zero coefficient in the first row of M
+        Change this matrix into echelon form, using classical Gaussian elimination.
 
-If the first row of M has no non-zero entry then f==0
-        """
-        cdef int i
-        cdef int fe
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        FfSetField(self.Data.Field)
-        for i from 0 <= i < self.Data.Noc:
-            fe = FfToInt(FfExtract(self.Data.Data,i))
-            if fe:
-                return fe, i
-        return 0, self.Data.Noc
+        INPUT:
 
-    def _echelon_in_place_classical(self, reduced=True):
+        - ``reduced`` (optional, default ``True``) -- will result
+          in the row-reduced echelon form (otherwise, only a
+          semi-echelon form results).
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K, 10)()
+            sage: entries = [((0, 2), x), ((0, 4), 3*x + 2),
+            ....: ((0, 8), 2*x), ((1, 1), x + 3), ((1, 5), 3*x),
+            ....: ((1, 6), x + 4), ((2, 3), 2*x), ((2, 5), 4*x + 1),
+            ....: ((2, 6), 4), ((3, 4), x + 4), ((3, 5), x + 1),
+            ....: ((5, 5), 3*x), ((5, 7), x + 3), ((6, 1), x),
+            ....: ((6, 2), x + 1), ((6, 5), x + 1), ((8, 2), 4),
+            ....: ((8, 8), 4), ((8, 9), x + 3), ((9, 8), 4*x + 2)]
+            sage: for (i,j),v in entries: M[i,j] = v
+            sage: M
+            [      0       0       x       0 3*x + 2       0       0       0     2*x       0]
+            [      0   x + 3       0       0       0     3*x   x + 4       0       0       0]
+            [      0       0       0     2*x       0 4*x + 1       4       0       0       0]
+            [      0       0       0       0   x + 4   x + 1       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0     3*x       0   x + 3       0       0]
+            [      0       x   x + 1       0       0   x + 1       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       4       0       0       0       0       0       4   x + 3]
+            [      0       0       0       0       0       0       0       0 4*x + 2       0]
+            sage: M.echelon_form()   # indirect doctest
+            [      0       1       0       0       0       0       0       0       0 4*x + 4]
+            [      0       0       1       0       0       0       0       0       0 4*x + 2]
+            [      0       0       0       1       0       0       0       0       0 3*x + 4]
+            [      0       0       0       0       1       0       0       0       0 3*x + 3]
+            [      0       0       0       0       0       1       0       0       0 2*x + 3]
+            [      0       0       0       0       0       0       1       0       0       x]
+            [      0       0       0       0       0       0       0       1       0 2*x + 2]
+            [      0       0       0       0       0       0       0       0       1       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        A semi-echelon form can be produced by invoking the single-underscore
+        method directly::
+
+            sage: N = copy(M)
+            sage: N._echelon_in_place_classical(reduced=False)      # optional: meataxe
+            sage: N                                                 # optional: meataxe
+            [      0       0       x       0 3*x + 2       0       0       0     2*x       0]
+            [      0   x + 3       0       0       0     3*x   x + 4       0       0       0]
+            [      0       0       0     2*x       0 4*x + 1       4       0       0       0]
+            [      0       0       0       0   x + 4   x + 1       0       0       0       0]
+            [      0       0       0       0       0     3*x       0   x + 3       0       0]
+            [      0       0       0       0       0       0 2*x + 2     4*x 3*x + 3       0]
+            [      0       0       0       0       0       0       0   x + 1       1   x + 3]
+            [      0       0       0       0       0       0       0       0 4*x + 2       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        TESTS:
+
+        We verify that the above echelon form is consistent with Sage's generic
+        implementation of dense matrices::
+
+            sage: type(M)                           # optional: meataxe
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: MS = M.parent()
+            sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+            sage: MS._MatrixSpace__matrix_class = Matrix_generic_dense
+            sage: X = MS(M._list())
+            sage: type(X)
+            <type 'sage.matrix.matrix_generic_dense.Matrix_generic_dense'>
+            sage: X.echelon_form()
+            [      0       1       0       0       0       0       0       0       0 4*x + 4]
+            [      0       0       1       0       0       0       0       0       0 4*x + 2]
+            [      0       0       0       1       0       0       0       0       0 3*x + 4]
+            [      0       0       0       0       1       0       0       0       0 3*x + 3]
+            [      0       0       0       0       0       1       0       0       0 2*x + 3]
+            [      0       0       0       0       0       0       1       0       0       x]
+            [      0       0       0       0       0       0       0       1       0 2*x + 2]
+            [      0       0       0       0       0       0       0       0       1       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        """
         if self._nrows == 0 or self._ncols == 0:
             self.cache('in_echelon_form',True)
             self.cache('rank', 0)
@@ -1110,6 +1540,7 @@ If the first row of M has no non-zero entry then f==0
         self.cache('rank', r)
         # Next, we do permutations to achieve the reduced echelon form,
         # if requested.
+        sig_on()
         if reduced:
             pivs = [(self.Data.PivotTable[i],i) for i in range(r)]
             pivs.sort()
@@ -1146,6 +1577,7 @@ If the first row of M has no non-zero entry then f==0
             self.Data.Data = <PTR>check_realloc(self.Data.Data, FfCurrentRowSize*self._nrows)
             memset(self.Data.Data + FfCurrentRowSize*self.Data.Nor, FF_ZERO, FfCurrentRowSize*(self._nrows-self.Data.Nor))
             self.Data.Nor = self._nrows
+        sig_off()
         self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
         self.cache('in_echelon_form',True)
 

From 7c389692bbc757d8d61e7f25e671527a80a7f6c5 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Wed, 23 Sep 2015 00:07:33 +0200
Subject: [PATCH 07/23] Fix computation of row-reduced echelon form

---
 src/sage/libs/meataxe.pxd             |  1 +
 src/sage/matrix/matrix_gfpn_dense.pyx | 44 ++++++++++++++++++---------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index 79a8d03448d..fc76bfc781e 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -65,6 +65,7 @@ cdef extern from "meataxe.h":
     void FfMulRow(PTR row, FEL mark)
     void FfAddMulRow(PTR dest, PTR src, FEL f)
     PTR FfAddRow(PTR dest, PTR src)
+    PTR FfSubRow(PTR dest, PTR src)
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index dc0c978c8ef..5ab82de2f83 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1523,6 +1523,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             [      0       0       0       0       0       0       0       0       0       0]
             [      0       0       0       0       0       0       0       0       0       0]
 
+        The following was a problem in a preliminary version of the code::
+
+            sage: K.<a> = GF(25)
+            sage: M = MatrixSpace(K, 2, 4)([4, 4, 1, 0, 0, 2*a+1, a+2, 1])
+            sage: M
+            [      4       4       1       0]
+            [      0 2*a + 1   a + 2       1]
+            sage: M.echelonize()
+            sage: M
+            [      1       0 3*a + 4 2*a + 2]
+            [      0       1     2*a 3*a + 3]
+
         """
         if self._nrows == 0 or self._ncols == 0:
             self.cache('in_echelon_form',True)
@@ -1546,7 +1558,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             pivs.sort()
             if pivs != [(self.Data.PivotTable[i],i) for i in range(r)] or self.Data.Nor < self._nrows:
                 # We copy the row one by one, sorting their pivot positions
-                # and scaling the pivot to one.
                 old = self.Data.Data
                 self.Data.Data = FfAlloc(self._nrows)
                 for i, (pos,j) in enumerate(pivs):
@@ -1554,22 +1565,25 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     dest = self.Data.Data+FfCurrentRowSize*i
                     memcpy(dest, old+FfCurrentRowSize*j, FfCurrentRowSize)
                     self.Data.PivotTable[i] = pos
-                    piv = FfExtract(dest, pos)
-                    assert piv!=FF_ZERO
-                    if piv != FF_ONE:
-                        FfMulRow(dest, mtx_tmultinv[piv])
                 free(old)
                 self.Data.Nor = self._nrows
-                # Finally, we annulate everything above the pivots
-                # (currently, we only know that the matrix is zero
-                # below the pivots).
-                for i from 1 <= i < r:
-                    src = MatGetPtr(self.Data, i)
-                    for j from 0 <= j < i:
-                        dest = MatGetPtr(self.Data, j)
-                        piv = FfExtract(dest, self.Data.PivotTable[i])
-                        if piv != FF_ZERO:
-                            FfAddMulRow(dest, src, mtx_taddinv[piv])
+            # Now, the pivot columns are strictly increasing.
+            # We now normalize each row, and annulate everything
+            # above the pivot (currently, we only know that the matrix
+            # is zero below the pivots).
+            for i from 0 <= i < r:
+                src = MatGetPtr(self.Data, i)
+                piv = FfExtract(src, self.Data.PivotTable[i])
+                assert piv!=FF_ZERO
+                if piv != FF_ONE:
+                    FfMulRow(src, mtx_tmultinv[piv])
+                for j from 0 <= j < i:
+                    dest = MatGetPtr(self.Data, j)
+                    piv = FfExtract(dest, self.Data.PivotTable[i])
+                    if piv != FF_ONE:
+                        FfAddMulRow(dest, src, mtx_taddinv[piv])
+                    else:
+                        FfSubRow(dest, src)
         elif self.Data.Nor < self._nrows:
             # Some rows may have vanished. In SageMath, we
             # want that the number of rows does not change,

From 55a278da06ba77fdfde839aa2e45d43a6806f2fb Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Wed, 23 Sep 2015 00:55:03 +0200
Subject: [PATCH 08/23] Fix doctests when meataxe is installed

---
 src/sage/matrix/constructor.py        |  9 +++++++--
 src/sage/matrix/matrix2.pyx           | 22 ++++++++++++++++------
 src/sage/matrix/matrix_gfpn_dense.pyx |  2 +-
 src/sage/matrix/matrix_space.py       |  2 +-
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/sage/matrix/constructor.py b/src/sage/matrix/constructor.py
index 97348ba1468..53f21793b2a 100644
--- a/src/sage/matrix/constructor.py
+++ b/src/sage/matrix/constructor.py
@@ -1018,7 +1018,7 @@ def random_matrix(ring, nrows, ncols=None, algorithm='randomize', *args, **kwds)
          eigenvectors, if computed by hand, will have only integer
          entries.
 
-    -  ``*args, **kwds`` - arguments and keywords to describe additional 
+    -  ``*args, **kwds`` - arguments and keywords to describe additional
        properties. See more detailed documentation below.
 
     .. warning::
@@ -1176,9 +1176,14 @@ def random_matrix(ring, nrows, ncols=None, algorithm='randomize', *args, **kwds)
 
     The default implementation of :meth:`~sage.matrix.matrix2.randomize` relies
     on the ``random_element()`` method for the base ring.  The ``density`` and
-    ``sparse`` keywords behave as described above. ::
+    ``sparse`` keywords behave as described above. Since we have a different
+    randomisation when using the optional meataxe package, we have to make sure
+    that we use the default implementation in this test::
 
         sage: K.<a>=FiniteField(3^2)
+        sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+        sage: MS = MatrixSpace(K, 2, 5)
+        sage: MS._MatrixSpace__matrix_class = Matrix_generic_dense
         sage: random_matrix(K, 2, 5)
         [      1       a       1 2*a + 1       2]
         [    2*a   a + 2       0       2       1]
diff --git a/src/sage/matrix/matrix2.pyx b/src/sage/matrix/matrix2.pyx
index e973cfd5bf0..c0b731d0f79 100644
--- a/src/sage/matrix/matrix2.pyx
+++ b/src/sage/matrix/matrix2.pyx
@@ -779,7 +779,7 @@ cdef class Matrix(matrix1.Matrix):
             36.0000000000000
 
         The permanent above is directed to the Sloane's sequence :oeis:`A079908`
-        ("The Dancing School Problems") for which the third term is 36: 
+        ("The Dancing School Problems") for which the third term is 36:
 
         ::
 
@@ -3352,8 +3352,6 @@ cdef class Matrix(matrix1.Matrix):
             verbose ...
             verbose 1 (<module>) computing right kernel matrix over an arbitrary field for 3x4 matrix
             ...
-            verbose 1 (<module>) done computing right kernel matrix over an arbitrary field for 3x4 matrix
-            ...
             Vector space of degree 4 and dimension 2 over Finite Field in a of size 5^2
             Basis matrix:
             [      1       0 3*a + 4 2*a + 2]
@@ -3800,13 +3798,25 @@ cdef class Matrix(matrix1.Matrix):
             [      0       1     2*a 3*a + 3]
             sage: A*K.basis_matrix().transpose() == zero_matrix(F, 3, 2)
             True
-            sage: B = copy(A)
+
+        In the following test, we have to force usage of
+        :class:`~sage.matrix.matrix_generic_dense.Matrix_generic_dense`,
+        since the option ``basis = 'pivot'`` would simply yield the same
+        result as the previous test, if the optional meataxe package is
+        installed. ::
+
+            sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+            sage: B = Matrix_generic_dense(A.parent(), A.list(), False, False)
             sage: P = B.right_kernel(basis = 'pivot'); P
             Vector space of degree 4 and dimension 2 over Finite Field in a of size 5^2
             User basis matrix:
             [      4       4       1       0]
             [  a + 2 3*a + 3       0       1]
-            sage: B*P.basis_matrix().transpose() == zero_matrix(F, 3, 2)
+
+        If the optional meataxe package is installed, we again have to make sure
+        to work with a copy of B that has the same type as ``P.basis_matrix()``::
+
+            sage: B.parent()(B.list())*P.basis_matrix().transpose() == zero_matrix(F, 3, 2)
             True
             sage: K == P
             True
@@ -8374,7 +8384,7 @@ cdef class Matrix(matrix1.Matrix):
 
             sage: filename = tmp_filename(ext='.png')
             sage: img.save(filename)
-            sage: open(filename).read().startswith('\x89PNG') 
+            sage: open(filename).read().startswith('\x89PNG')
             True
         """
         cdef int x, y, _x, _y, v, bi, bisq
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 5ab82de2f83..b7ec01a0b14 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1436,7 +1436,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.cache("left_kernel_matrix", OUT)
         return OUT
 
-    def _echelon_in_place_classical(self, reduced=True):
+    def _echelon_in_place_classical(self, reduced=True, **kwds):
         """
         Change this matrix into echelon form, using classical Gaussian elimination.
 
diff --git a/src/sage/matrix/matrix_space.py b/src/sage/matrix/matrix_space.py
index bb5802ab28b..b0021507536 100644
--- a/src/sage/matrix/matrix_space.py
+++ b/src/sage/matrix/matrix_space.py
@@ -986,7 +986,7 @@ def _get_matrix_class(self):
             sage: type(matrix(GF(2), 2, range(4)))
             <type 'sage.matrix.matrix_mod2_dense.Matrix_mod2_dense'>
             sage: type(matrix(GF(64,'z'), 2, range(4)))
-            <type 'sage.matrix.matrix_mod2e_dense.Matrix_mod2e_dense'>
+            <type 'sage.matrix.matrix_gf2e_dense.Matrix_gf2e_dense'>
             sage: type(matrix(GF(125,'z'), 2, range(4)))
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
         """

From f73337711df114571d1be0fa61140a41628703b7 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sat, 26 Sep 2015 00:48:58 +0200
Subject: [PATCH 09/23] Use and propagate specific return values on error in
 matrix-related MeatAxe functions.

---
 .../meataxe/patches/UseErrorPropagation.patch | 1147 +++++++++++++++++
 src/sage/libs/meataxe.pxd                     |   14 +-
 2 files changed, 1156 insertions(+), 5 deletions(-)
 create mode 100644 build/pkgs/meataxe/patches/UseErrorPropagation.patch

diff --git a/build/pkgs/meataxe/patches/UseErrorPropagation.patch b/build/pkgs/meataxe/patches/UseErrorPropagation.patch
new file mode 100644
index 00000000000..00745e8b967
--- /dev/null
+++ b/build/pkgs/meataxe/patches/UseErrorPropagation.patch
@@ -0,0 +1,1147 @@
+In functions that appear in matrix arithmetic, use specific return values
+on error, and propagate errors. This is *not* done in other parts of
+MeatAxe (e.g., not for greased matrices or polynomials) and not for
+standalone programs.
+
+AUTHOR:
+
+- Simon King, 2015-09-26
+
+diff --git a/src/cfinfo.c b/src/cfinfo.c
+index 293526b..9c1a004 100644
+--- a/src/cfinfo.c
++++ b/src/cfinfo.c
+@@ -215,7 +215,7 @@ int Lat_ReadInfo(Lat_Info *li, const char *basename)
+ 	    }
+ 	    for (i = 0; i < li->NCf; ++i)
+ 	    {
+-		ReadWord(f,&(li->Cf[i].idword),&(li->Cf[i].idpol),fn);
++		if (!ReadWord(f,&(li->Cf[i].idword),&(li->Cf[i].idpol),fn)) return -1;
+ 		if (StfMatch(f,i < li->NCf - 1 ? "," : "];") != 0)
+ 		{
+ 		    MTX_ERROR2("%s: %E",fn,MTX_ERR_FILEFMT);
+@@ -232,7 +232,7 @@ int Lat_ReadInfo(Lat_Info *li, const char *basename)
+ 	    }
+ 	    for (i = 0; i < li->NCf; ++i)
+ 	    {
+-		ReadWord(f,&(li->Cf[i].peakword),&(li->Cf[i].peakpol),fn);
++		if (!ReadWord(f,&(li->Cf[i].peakword),&(li->Cf[i].peakpol),fn)) return -1;
+ 		if (StfMatch(f,i < li->NCf - 1 ? "," : "];") != 0)
+ 		{
+ 		    MTX_ERROR2("%s: %E",fn,MTX_ERR_FILEFMT);
+diff --git a/src/chbasis.c b/src/chbasis.c
+index 34cf886..f1ee2e8 100644
+--- a/src/chbasis.c
++++ b/src/chbasis.c
+@@ -61,7 +61,8 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ /** Conjugate a list @em gen of @em ngen square matrices over the same
+  *  field and of the same dimensions by a mattrix @em trans
+  *  and write the result into @em newgen. If @em gen == @em newgen, then
+- *  the previous content of @em newgen will be overridden. **/
++ *  the previous content of @em newgen will be overridden.
++ *  Return -1 on error and 0 on success. **/
+ int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[])
+ 
+@@ -83,18 +84,36 @@ int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+     }
+ 
+     Matrix_t *tmp = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++    if (!tmp) return -1;
+     size_t tmpsize = FfCurrentRowSize*trans->Nor;
+     for (i = 0; i < ngen; ++i)
+     {
+         MTX_VERIFY(gen[i]->Nor==trans->Nor);
+         MTX_VERIFY(gen[i]->Noc==trans->Noc);
+         memset(tmp->Data, FF_ZERO, tmpsize);
+-        MatMulStrassen(tmp, trans, gen[i]);
++        if (!MatMulStrassen(tmp, trans, gen[i]))
++        {
++			MatFree(tmp);
++			return -1;
++		}
+         if ((const Matrix_t **)newgen == gen)
+             memset(newgen[i]->Data, FF_ZERO, tmpsize);
+         else
++        {
+             newgen[i] = MatAlloc(trans->Field, trans->Nor, trans->Noc);
+-        MatMulStrassen(newgen[i], tmp, bi);
++            if (!newgen[i])
++            {
++                MatFree(tmp);
++                MatFree(bi);
++                return -1;
++            }
++        }
++        if (!MatMulStrassen(newgen[i], tmp, bi))
++        {
++            MatFree(tmp);
++            MatFree(bi);
++            return -1;
++        }
+     }
+     MatFree(bi);
+     MatFree(tmp);
+diff --git a/src/ffio.c b/src/ffio.c
+index 92f9360..d2e3f1c 100644
+--- a/src/ffio.c
++++ b/src/ffio.c
+@@ -71,8 +71,11 @@ int FfReadRows(FILE *f, PTR buf, int n)
+         if (fread(b,FfTrueRowSize(FfNoc),1,f) != 1) break;
+ 	b += FfCurrentRowSize;
+     }
+-    if (ferror(f)) 
+-	MTX_ERROR("Read failed: %S");
++    if (ferror(f))
++    {
++        MTX_ERROR("Read failed: %S");
++        return -1;
++    }
+     return i;
+ }
+ 
+@@ -106,8 +109,11 @@ int FfWriteRows(FILE *f, PTR  buf, int n)
+         if (fwrite(b,FfTrueRowSize(FfNoc),1,f) != 1) break;
+ 	b += FfCurrentRowSize;
+     }
+-    if (ferror(f)) 
+-	MTX_ERROR("Write failed: %S");
++    if (ferror(f))
++    {
++        MTX_ERROR("Write failed: %S");
++        return -1;
++    }
+     return i;
+ }
+ 
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 6ef2f72..431f01a 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -304,7 +304,10 @@ static FILE *OpenTableFile(int fl)
+     /* Create the table file.
+        ---------------------- */
+     if (FfMakeTables(fl) != 0)
+-	MTX_ERROR("Unable to build arithmetic tables");
++	{
++        MTX_ERROR("Unable to build arithmetic tables");
++        return NULL;
++    }
+     fd = SysFopen(fn,FM_READ|FM_LIB);
+     return fd;
+ }
+@@ -363,8 +366,7 @@ static int ReadTableFile(FILE *fd, int field)
+ 	return -1;
+     }
+     FfOrder = field;
+-    FfSetNoc(FfOrder);
+-    return 0;
++    return FfSetNoc(FfOrder);
+ }
+ 
+ 
+@@ -471,7 +473,7 @@ size_t FfTrueRowSize(int noc)
+  ** Embed a subfield.
+  ** @param a Element of the subfield field.
+  ** @param subfield Subfield order. Must be a divisor of the current field order.
+- ** @return @em a, embedded into the current field.
++ ** @return @em a, embedded into the current field, or 255 on error.
+  **/ 
+ 
+ FEL FfEmbed(FEL a, int subfield)
+@@ -482,7 +484,9 @@ FEL FfEmbed(FEL a, int subfield)
+ 	return a;
+     for (i = 0; mtx_embedord[i] != subfield && i < 4; ++i);
+     if (i >= 4)
+-	MTX_ERROR2("Cannot embed GF(%d) into GF(%d)",(int)subfield,(int)FfOrder);
++	{ MTX_ERROR2("Cannot embed GF(%d) into GF(%d)",(int)subfield,(int)FfOrder);
++      return (FEL)255;
++    }
+     return mtx_embed[i][a];
+ }
+ 
+@@ -498,6 +502,7 @@ FEL FfEmbed(FEL a, int subfield)
+  ** <tt>FfSetField(subfield)</tt>.
+  ** @param a Element of the current field.
+  ** @param subfield Subfield order. Must be a divisor of the current field order.
++ ** Return 255 on error.
+  **/
+ 
+ FEL FfRestrict(FEL a, int subfield)
+@@ -511,6 +516,7 @@ FEL FfRestrict(FEL a, int subfield)
+     {
+ 	MTX_ERROR2("Cannot restrict GF(%d) to GF(%d)",(int)FfOrder,
+ 	    (int)subfield);
++        return (FEL)255;
+     }
+     return mtx_restrict[i][a];
+ }
+diff --git a/src/maddmul.c b/src/maddmul.c
+index f5c171d..24ad3a5 100644
+--- a/src/maddmul.c
++++ b/src/maddmul.c
+@@ -59,7 +59,7 @@ Matrix_t *MatAddMul(Matrix_t *dest, const Matrix_t *src, FEL coeff)
+ 	   ------------ */
+ 	PTR dp = dest->Data, sp = src->Data;
+ 	int n;
+-	FfSetField(src->Field);
++	FfSetField(src->Field);  /* No error checking */
+ 	FfSetNoc(src->Noc);
+ 	for (n = src->Nor; n > 0; --n)
+ 	{
+diff --git a/src/maketabF.c b/src/maketabF.c
+index d7af83e..0fa26fb 100644
+--- a/src/maketabF.c
++++ b/src/maketabF.c
+@@ -175,7 +175,7 @@ static void polymod(POLY a, POLY b)
+    testprim() - Test for primitivity.
+    ----------------------------------------------------------------- */
+ 
+-static void testprim()
++static int testprim()
+ {
+     int i, a[256];
+ 
+@@ -187,7 +187,9 @@ static void testprim()
+ 	{
+ 	    fprintf(stderr,"*** a[%d]=%d.",i,a[i]);
+ 	    MTX_ERROR("Polynome is not primitive.");
++        return 1;
+ 	}
++    return 0;
+ }
+ 
+ 
+@@ -195,7 +197,7 @@ static void testprim()
+    initarith() - Initialize index and zech logarithm tables.
+    ----------------------------------------------------------------- */
+ 
+-static void initarith()
++static int initarith()
+ {	int i,elem;
+ 	POLY a;
+ 
+@@ -214,7 +216,7 @@ static void initarith()
+ 		polmultx(a);
+ 		polymod(a,irred);
+         }
+-	testprim();
++	if (testprim()) return 1;
+ 
+ 	/* Calculate zech logarithms
+ 	   ------------------------- */
+@@ -222,6 +224,7 @@ static void initarith()
+ 	{	elem = (int)((i%P)==P-1 ? i+1-P : i+1); /* add 1 */
+ 		zech[indx[i]]=indx[elem]; /* Zech-table=result */
+         }
++    return 0;
+ }
+ 
+ 
+@@ -314,7 +317,7 @@ static BYTE pack(BYTE a[8])
+ 	and initialize tables.
+    ----------------------------------------------------------------- */
+ 
+-static void writeheader()
++static int writeheader()
+ {
+     int i, j;
+ 
+@@ -324,6 +327,7 @@ static void writeheader()
+     {
+ 	perror(filename);
+ 	MTX_ERROR("Cannot open table file");
++    return 1;
+     }
+     for (CPM=1,maxmem=Q; (long)maxmem * Q <= 256L; ++CPM, maxmem *= Q);
+     for (i = 0; irrednrs[i] != (int) Q && irrednrs[i] != 0; ++i);
+@@ -333,7 +337,7 @@ static void writeheader()
+         for (j = 0; j <= MAXGRAD; j++)
+             irred[j] = irreducibles[i][MAXGRAD-j];
+ 	G = P;		/* Generator is X */
+-	initarith();	/* Init index- and Zech-tables */
++	if (initarith()) return 1;	/* Init index- and Zech-tables */
+     }
+     else
+     {	
+@@ -357,6 +361,7 @@ static void writeheader()
+     }
+     MESSAGE(1,("Generator   : %ld\n",info[1]));
+     MESSAGE(1,("Packing     : %ld/byte\n",info[3]));
++    return 0;
+ }
+ 
+ 
+@@ -364,14 +369,14 @@ static void writeheader()
+    checkq() - Set Q and N. Verify that Q is a prime power.
+    ----------------------------------------------------------------- */
+ 
+-static void checkq(long l)
++static int checkq(long l)
+ {
+     long q, d;
+ 
+     if (l < 2 || l > 256)
+     {
+-	fprintf(stderr,"Field order out of range (2-256)\n");
+-	exit(EXIT_ERR);
++	MTX_ERROR1("Field order out of range (2-256): %E", MTX_ERR_RANGE);
++	return 1;
+     }
+ 
+     Q = l;
+@@ -381,9 +386,10 @@ static void checkq(long l)
+        	q /= d;
+     if (q != 1)
+     {
+-	fprintf(stderr,"Illegal Field order\n");
+-	exit(EXIT_ERR);
++	MTX_ERROR("Illegal Field order\n");
++	return 1;
+     }
++    return 0;
+ }
+ 
+ 
+@@ -407,7 +413,7 @@ static void inittables()
+    mkembed() - Calculate embeddings of all subfields.
+    ----------------------------------------------------------------- */
+ 
+-static void mkembed()
++static int mkembed()
+ {
+     int n;	/* Degree of subfield over Z_p */
+     long q; /* subfield order */
+@@ -456,6 +462,7 @@ static void mkembed()
+ 	{
+ 	    fprintf(stderr,"*** q=%ld, Q=%ld.",q,Q);
+ 	    MTX_ERROR("Internal error.");
++        return 1;
+ 	}
+ 
+ 	/* Calculate a generator for the subfield
+@@ -502,13 +509,13 @@ static void mkembed()
+ 	    fflush(stdout);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
+ static int Init(int field)
+ {
+-    checkq(field);
+-    return 0;
++    return checkq(field);
+ }
+ 
+ /* -----------------------------------------------------------------
+@@ -526,7 +533,7 @@ int FfMakeTables(int field)
+        ---------- */
+     if (Init(field) != 0)
+ 	return 1;
+-    writeheader();			/* Open file and write header */
++    if (writeheader()) return 1;			/* Open file and write header */
+     inittables();
+ 
+     /* Make insert table
+@@ -618,7 +625,7 @@ int FfMakeTables(int field)
+ 	}
+     }
+ 
+-    mkembed();
++    if (mkembed()) return 1;
+ 
+     MESSAGE(1,("Writing tables to %s\n",filename));
+     if (
+@@ -639,6 +646,7 @@ int FfMakeTables(int field)
+     {
+ 	perror(filename);
+ 	MTX_ERROR("Error writing table file");
++    return 1;
+     }
+     fclose(fd);
+     return(0);
+diff --git a/src/matadd.c b/src/matadd.c
+index 54dbcb1..2d86d86 100644
+--- a/src/matadd.c
++++ b/src/matadd.c
+@@ -48,7 +48,7 @@ Matrix_t *MatAdd(Matrix_t *dest, const Matrix_t *src)
+        ------------------- */
+     dp = dest->Data;
+     sp = src->Data;
+-    FfSetField(src->Field);
++    FfSetField(src->Field);   /* No error checking */
+     FfSetNoc(src->Noc);
+     for (n = src->Nor; n > 0; --n)
+     {
+diff --git a/src/matclean.c b/src/matclean.c
+index e7307bf..16f02d6 100644
+--- a/src/matclean.c
++++ b/src/matclean.c
+@@ -53,7 +53,7 @@ int MatClean(Matrix_t *mat, const Matrix_t *sub)
+ 
+     /* Clean
+        ----- */
+-    FfSetNoc(mat->Noc);
++    FfSetNoc(mat->Noc);  /* No error checking */
+     for (i = 0; i < mat->Nor; ++i)
+     {
+ 	PTR m = MatGetPtr(mat,i);
+diff --git a/src/matcmp.c b/src/matcmp.c
+index b778ec4..d503285 100644
+--- a/src/matcmp.c
++++ b/src/matcmp.c
+@@ -38,7 +38,7 @@ MTX_DEFINE_FILE_INFO
+  ** not necessarily mean that an error has occured.
+  ** @param a First matrix.
+  ** @param b Second matrix.
+- ** @return 0 if the matrices are equal, nonzero otherwise (see description).
++ ** @return 0 if the matrices are equal, nonzero otherwise (see description), -2 on error.
+  **/
+ 
+ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+@@ -50,7 +50,7 @@ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+     if (!MatIsValid(a) || !MatIsValid(b))
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return -1;
++	return -2;
+     }
+ 
+     /* Compare fields and dimensions
+@@ -65,7 +65,7 @@ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+     /* Compare the entries row by row. We do not use memcmp on the
+        whole matrix because we must ignore padding bytes.
+        ----------------------------------------------------------- */
+-    FfSetField(a->Field);
++    FfSetField(a->Field);  /* No error checking */
+     FfSetNoc(a->Noc);
+     for (i = 0; i < a->Nor; ++i)
+     {
+diff --git a/src/matcopy.c b/src/matcopy.c
+index 75852dd..c3e7850 100644
+--- a/src/matcopy.c
++++ b/src/matcopy.c
+@@ -105,7 +105,7 @@ int MatCopyRegion(Matrix_t *dest, int destrow, int destcol,
+ 	{
+ #ifdef PARANOID
+ 	    FEL f;
+-	    FfSetNoc(src->Noc);
++	    FfSetNoc(src->Noc);  /* No error checking */
+ 	    f = FfExtract(s,k);
+ 	    FfSetNoc(dest->Noc);
+ 	    FfInsert(d,destcol+k-col1,f);
+diff --git a/src/matcore.c b/src/matcore.c
+index 1f27dfd..0dc9d92 100644
+--- a/src/matcore.c
++++ b/src/matcore.c
+@@ -131,7 +131,7 @@ Matrix_t *MatAlloc(int field, int nor, int noc)
+ 	SysFree(m);
+ 	return NULL;
+     }
+-    FfSetNoc(noc);
++    if (FfSetNoc(noc)) return NULL;
+     m->Magic = MAT_MAGIC;
+     m->Field = field;
+     m->Nor = nor;
+diff --git a/src/matcut.c b/src/matcut.c
+index fde0662..f274311 100644
+--- a/src/matcut.c
++++ b/src/matcut.c
+@@ -79,11 +79,12 @@ Matrix_t *MatCut(const Matrix_t *src, int row1, int col1, int nrows, int ncols)
+     /* Initialize pointers to the source and destination matrix 
+        -------------------------------------------------------- */
+     s = MatGetPtr(src,row1);
++    if (!s) return NULL;
+     d = result->Data;
+ 
+     /* Copy the requested data
+        ----------------------- */
+-    FfSetNoc(ncols);
++    if (FfSetNoc(ncols)) return NULL;
+     for (n = nrows; n > 0; --n)
+     {
+ 	if (col1 == 0)
+@@ -95,9 +96,9 @@ Matrix_t *MatCut(const Matrix_t *src, int row1, int col1, int nrows, int ncols)
+ 	    {
+ #ifdef PARANOID
+ 		FEL f;
+-		FfSetNoc(src->Noc);
++		FfSetNoc(src->Noc);  /* No error checking */
+ 		f = FfExtract(s,col1+k);
+-		FfSetNoc(ncols);
++		FfSetNoc(ncols);  /* error was checked above */
+ 		FfInsert(d,k,f);
+ #else
+ 		FfInsert(d,k,FfExtract(s,col1+k));
+diff --git a/src/matech.c b/src/matech.c
+index ed31cf4..ee52ebe 100644
+--- a/src/matech.c
++++ b/src/matech.c
+@@ -124,7 +124,7 @@ int MatEchelonize(Matrix_t *mat)
+ 
+     /* Build the pivot table
+        --------------------- */
+-    FfSetField(mat->Field);
++    FfSetField(mat->Field);  /* No error checking */
+     FfSetNoc(mat->Noc);
+     rank = zmkechelon(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+ 
+@@ -163,13 +163,14 @@ long MatNullity(const Matrix_t *mat)
+  ** This function calculates the dimension of the null-space of a matrix
+  ** and deletes the matrix.
+  ** @param mat Pointer to the matrix.
+- ** @return Nullity of @em mat, or -$ on error.
++ ** @return Nullity of @em mat, or $-1$ on error.
+  **/
+ 
+ long MatNullity__(Matrix_t *mat)
+ {
+     long nul;
+-    MatEchelonize(mat);
++    if (!mat) return -1;
++    if (MatEchelonize(mat)==-1) return -1;
+     nul = mat->Noc - mat->Nor;
+     MatFree(mat);
+     return nul;
+diff --git a/src/matins.c b/src/matins.c
+index 45c31e4..50fe9c1 100644
+--- a/src/matins.c
++++ b/src/matins.c
+@@ -54,7 +54,7 @@ Matrix_t *MatInsert_(Matrix_t *mat, const Poly_t *pol)
+ 	return NULL;
+     }
+ 
+-    FfSetField(mat->Field);
++    FfSetField(mat->Field);  /* No error checking */
+     FfSetNoc(nor);
+ 
+     /* Special case: p(x) = 0
+@@ -81,7 +81,10 @@ Matrix_t *MatInsert_(Matrix_t *mat, const Poly_t *pol)
+     /* Evaluate p(A)
+        ------------- */
+     if (pol->Degree > 1) 
+-	x = MatDup(mat);
++	{
++		x = MatDup(mat);
++		if (!x) return NULL;
++	}
+     if ((f = pol->Data[pol->Degree]) != FF_ONE)
+     {
+ 	for (l = nor, v = mat->Data; l > 0; --l, FfStepPtr(&v))
+@@ -147,6 +150,7 @@ Matrix_t *MatInsert(const Matrix_t *mat, const Poly_t *pol)
+     if (pol->Degree == 0)
+     {
+ 	x = MatAlloc(mat->Field,nor,nor);
++    if (!x) return NULL;
+ 	for (l = 0, v = x->Data; l < nor; ++l, FfStepPtr(&v))
+ 	    FfInsert(v,l,pol->Data[0]);
+ 	return x;
+@@ -155,6 +159,7 @@ Matrix_t *MatInsert(const Matrix_t *mat, const Poly_t *pol)
+     /* Evaluate p(A)
+        ------------- */
+     x = MatDup(mat);
++    if (!x) return NULL;
+     if ((f = pol->Data[pol->Degree]) != FF_ONE)
+     {
+ 	for (l = nor, v = x->Data; l > 0; --l, FfStepPtr(&v))
+diff --git a/src/matinv.c b/src/matinv.c
+index 217eb0e..990cbe7 100644
+--- a/src/matinv.c
++++ b/src/matinv.c
+@@ -114,6 +114,7 @@ Matrix_t *MatInverse(const Matrix_t *mat)
+     /* Copy matrix into workspace
+        -------------------------- */
+     tmp = FfAlloc(mat->Nor);
++    if (!tmp) return NULL;
+     memcpy(tmp,mat->Data,FfCurrentRowSize * mat->Nor);
+ 
+     /* Inversion
+diff --git a/src/matmul.c b/src/matmul.c
+index 20f5e88..bed30fc 100644
+--- a/src/matmul.c
++++ b/src/matmul.c
+@@ -63,7 +63,7 @@ Matrix_t *MatMul(Matrix_t *dest, const Matrix_t *src)
+ 
+     /* Matrix multiplication
+        --------------------- */
+-    FfSetField(src->Field);
++    FfSetField(src->Field); /* no error checking, since the matrix *exists* */
+     FfSetNoc(src->Noc);
+     result = tmp = FfAlloc(dest->Nor);
+     if (result == NULL)
+diff --git a/src/matnull.c b/src/matnull.c
+index 4f28566..2550b96 100644
+--- a/src/matnull.c
++++ b/src/matnull.c
+@@ -27,6 +27,8 @@ MTX_DEFINE_FILE_INFO
+     - |piv| contains a pivot table for the null space.
+     If |flags| is nonzero, the null-space is not reduced to echelon form,
+     and the contents of |piv| are undefined.
++
++    Return -1 on error, the dimension of the null-space on success.
+  ** @see 
+  **/
+ 
+@@ -40,7 +42,7 @@ static long znullsp(PTR matrix, long nor, int *piv, PTR nsp, int flags)
+ 
+     /* Make the identity matrix in <nsp>.
+        ---------------------------------- */
+-    FfSetNoc(nor);
++    if (FfSetNoc(nor)) return -1;
+     x = nsp;
+     for (i = 0; i < nor; ++i)
+     {
+@@ -61,13 +63,12 @@ static long znullsp(PTR matrix, long nor, int *piv, PTR nsp, int flags)
+ 
+ 	for (k = 0; k < i; ++k)
+ 	{
+-	    FfSetNoc(noc);
++	    FfSetNoc(noc);  /* No error checking, since noc used to be the previously assigned number of columns */
+ 	    if ((p = piv[k]) >= 0 && (f = FfExtract(x,p)) != FF_ZERO)
+ 	    {
+ 		f = FfNeg(FfDiv(f,FfExtract(xx,p)));
+-		FfSetNoc(noc);
+ 		FfAddMulRow(x,xx,f);
+-		FfSetNoc(nor);
++		FfSetNoc(nor);  /* we have asserted above that it doesn't fail */
+ 		FfAddMulRow(y,yy,f);
+ 	    }
+ 	    FfSetNoc(noc);
+@@ -151,11 +152,21 @@ Matrix_t *MatNullSpace_(Matrix_t *mat, int flags)
+     if (nsp == NULL) 
+ 	return NULL;
+     nsp->PivotTable = NREALLOC(nsp->PivotTable,int,mat->Nor);
++    if (!nsp->PivotTable)
++    {
++        MatFree(nsp);
++        return NULL;
++    }
+ 
+     /* Calculate the null-space
+        ------------------------ */
+-    FfSetNoc(mat->Noc);
++    FfSetNoc(mat->Noc);  /* No error checking */
+     dim = znullsp(mat->Data,mat->Nor,nsp->PivotTable,nsp->Data,flags);
++    if (dim==-1)
++    {
++        MatFree(nsp);
++        return NULL;
++    }
+     if (flags)
+     {
+ 	SysFree(nsp->PivotTable);
+diff --git a/src/matorder.c b/src/matorder.c
+index 16aec74..24b31a3 100644
+--- a/src/matorder.c
++++ b/src/matorder.c
+@@ -32,7 +32,7 @@ MTX_DEFINE_FILE_INFO
+  ** the order is greater than 1000000, or if the order on any cyclic
+  ** subspace is greater than 1000.
+  ** @param mat Pointer to the matrix.
+- ** @return The order of @em mat, or 1 on error.
++ ** @return The order of @em mat, or -1 on error.
+  **/
+ 
+ int MatOrder(const Matrix_t *mat)
+@@ -59,15 +59,29 @@ int MatOrder(const Matrix_t *mat)
+     FfSetNoc(mat->Noc);
+     nor = mat->Nor;
+     m1 = FfAlloc(nor);
++    if (!m1) return -1;
+     memcpy(m1,mat->Data,FfCurrentRowSize * nor);
+     bend = basis = FfAlloc(nor+1);
++    if (!bend)
++    {
++        SysFree(m1);
++        return -1;
++    }
+ 
+     piv = NALLOC(int,nor+1);
+     done = NALLOC(char,nor);
++    if (!piv || !done)
++    { SysFree(m1);
++      return -1;
++    }
+     memset(done,0,(size_t)nor);
+     v1 = FfAlloc(1);
+     v2 = FfAlloc(1);
+     v3 = FfAlloc(1);
++    if (!v1 || !v2 || !v3)
++    { SysFree(m1);
++      return -1;
++    }
+     tord = ord = 1;
+     dim = 0;
+     j1 = 1;
+diff --git a/src/matpivot.c b/src/matpivot.c
+index abe342a..c843282 100644
+--- a/src/matpivot.c
++++ b/src/matpivot.c
+@@ -71,7 +71,6 @@ static int zmkpivot(PTR matrix, int nor, int noc, int *piv, int *ispiv)
+ 
+ int MatPivotize(Matrix_t *mat)
+ {
+-    int rc;
+     int *newtab;
+     static int *is_pivot = NULL;
+     static int maxnoc = -1;
+@@ -106,9 +105,7 @@ int MatPivotize(Matrix_t *mat)
+        --------------------- */
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+-    rc = zmkpivot(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+-
+-    return rc;
++    return zmkpivot(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+ }
+ 
+ /**
+diff --git a/src/matpwr.c b/src/matpwr.c
+index cd66b5e..b06e5b2 100644
+--- a/src/matpwr.c
++++ b/src/matpwr.c
+@@ -119,8 +119,14 @@ Matrix_t *MatPower(const Matrix_t *mat, long n)
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+     tmp = FfAlloc(FfNoc);
++    if (!tmp) return NULL;
+     memcpy(tmp,mat->Data,FfCurrentRowSize * FfNoc);
+     tmp2 = FfAlloc(FfNoc);
++    if (!tmp2)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+     result = MatAlloc(mat->Field,mat->Nor,mat->Noc);
+     if (result != NULL)
+ 	matpwr_(n,tmp,result->Data,tmp2);
+diff --git a/src/matread.c b/src/matread.c
+index 031d100..06e6e6b 100644
+--- a/src/matread.c
++++ b/src/matread.c
+@@ -46,8 +46,9 @@ Matrix_t *MatRead(FILE *f)
+ 	return NULL;
+     if (FfReadRows(f,m->Data,m->Nor) != m->Nor)
+     {
+-	MatFree(m);
+-	return NULL;
++        MTX_ERROR("Number of given rows does not coincide with given row number");
++        MatFree(m);
++        return NULL;
+     }
+     return m;
+ }
+diff --git a/src/mattrace.c b/src/mattrace.c
+index f500248..772a6e4 100644
+--- a/src/mattrace.c
++++ b/src/mattrace.c
+@@ -21,7 +21,7 @@
+  ** This function calculates the sum of all diagonal elements of a matrix.
+  ** Note that the matrix need not be square.
+  ** @param mat Pointer to the matrix.
+- ** @return Trace of @a mat, @c FF_ZERO on error.
++ ** @return Trace of @a mat, @c 255 on error.
+  **/
+ 
+ FEL MatTrace(const Matrix_t *mat)
+@@ -35,7 +35,7 @@ FEL MatTrace(const Matrix_t *mat)
+        ------------------ */
+ #ifdef DEBUG
+     if (!MatIsValid(mat))
+-	return FF_ZERO;
++	return (FEL)255;
+ #endif
+ 
+     maxi = mat->Nor > mat->Noc ? mat->Noc : mat->Nor;
+diff --git a/src/matwrite.c b/src/matwrite.c
+index 1fb6af3..b364e80 100644
+--- a/src/matwrite.c
++++ b/src/matwrite.c
+@@ -44,7 +44,10 @@ int MatWrite(const Matrix_t *mat, FILE *f)
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+     if (FfWriteRows(f,mat->Data,mat->Nor) != mat->Nor)
+-	return -1;
++	{
++        MTX_ERROR("Cannot write rows");
++        return -1;
++    }
+     return 0;
+ }
+ 
+@@ -75,7 +78,10 @@ int MatSave(const Matrix_t *mat, const char *fn)
+     i = MatWrite(mat,f);
+     fclose(f);
+     if (i != 0)
+-	MTX_ERROR1("Cannot write matrix to %s",fn);
++	{
++        MTX_ERROR1("Cannot write matrix to %s",fn);
++        return -1;
++    }
+     return i;
+ }
+ 
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 368b37b..0efa7dd 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -135,8 +135,8 @@ PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len);
+ PTR FfAlloc(int nor);
+ int FfCmpRows(PTR p1, PTR p2);
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv);
+-void FfCleanRow2(PTR row, PTR matrix, int nor, const int *piv, PTR row2);
+-void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, 
++int FfCleanRow2(PTR row, PTR matrix, int nor, const int *piv, PTR row2);
++int FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, 
+     PTR row2, PTR mat2);
+ void FfCopyRow(PTR dest, PTR src);
+ FEL FfEmbed(FEL a, int subfield);
+diff --git a/src/mmulscal.c b/src/mmulscal.c
+index 281be16..9bff3ff 100644
+--- a/src/mmulscal.c
++++ b/src/mmulscal.c
+@@ -21,7 +21,7 @@
+  ** Multiply a Matrix by a Constant.
+  ** @param dest Pointer to the matrix.
+  ** @param coeff Value to multiply with.
+- ** @return The function returns @a dest.
++ ** @return The function returns @a dest, or NULL on error in debug mode only.
+  **/
+ 
+ Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
+diff --git a/src/mtensor.c b/src/mtensor.c
+index b2b9ac7..2fb90ca 100644
+--- a/src/mtensor.c
++++ b/src/mtensor.c
+@@ -85,6 +85,11 @@ Matrix_t *MatTensor(const Matrix_t *m1, const Matrix_t *m2)
+ 	   ---------------------------------------- */
+ 	x1 = m1->Data;
+ 	x3 = MatGetPtr(temat,i2);
++    if (!x3)
++    {
++        MatFree(temat);
++        return NULL;
++    }
+ 	FfSetNoc(temat->Noc);
+ 
+ 	/* Loop through all rows of <m1>
+diff --git a/src/quotient.c b/src/quotient.c
+index f44b556..eea0754 100644
+--- a/src/quotient.c
++++ b/src/quotient.c
+@@ -82,16 +82,23 @@ Matrix_t *QProjection(const Matrix_t *subspace, const Matrix_t *vectors)
+     sdim = subspace->Nor;
+     qdim = subspace->Noc - sdim;
+     result = MatAlloc(subspace->Field,vectors->Nor,qdim);
++    if (!result) return NULL;
+ 
+     /* Calculate the projection
+        ------------------------ */
+     FfSetNoc(subspace->Noc);
+     tmp = FfAlloc(1);
++    if (!tmp) return NULL;
+     non_piv = subspace->PivotTable + subspace->Nor;
+     for (i = 0; i < vectors->Nor; ++i)
+     {
+ 	int k;
+ 	PTR q = MatGetPtr(result,i);
++    if (!q)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FfCopyRow(tmp,MatGetPtr(vectors,i));
+ 	FfCleanRow(tmp,subspace->Data,sdim,subspace->PivotTable);
+ 	for (k = 0; k < qdim; ++k)
+@@ -158,14 +165,20 @@ Matrix_t *QAction(const Matrix_t *subspace, const Matrix_t *gen)
+ 
+     /* Calculate the action on the quotient
+        ------------------------------------ */
+-    FfSetNoc(dim);
++    FfSetNoc(dim);  /* No error checking, since dim is the ->Noc of an existing matrix */
+     tmp = FfAlloc(1);
++    if (!tmp) return NULL;
+     piv = subspace->PivotTable;
+     non_piv = piv + subspace->Nor;
+     for (k = 0; k < qdim; ++k)
+     {
+ 	int l;
+ 	PTR qx = MatGetPtr(action,k);
++    if (!qx)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FfCopyRow(tmp,MatGetPtr(gen,non_piv[k]));
+ 	FfCleanRow(tmp,subspace->Data,sdim,piv);
+ 	for (l = 0; l < qdim; ++l)
+diff --git a/src/saction.c b/src/saction.c
+index adae3cf..0aba44d 100644
+--- a/src/saction.c
++++ b/src/saction.c
+@@ -68,8 +68,14 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+     sdim = subspace->Nor;
+     FfSetField(subspace->Field);
+     action = MatAlloc(FfOrder,sdim,sdim);
+-    FfSetNoc(dim);
++    if (!action) return NULL;
++    FfSetNoc(dim);  /* No error checking, since dim is the ->Noc of an existing matrix */
+     tmp = FfAlloc(1);
++    if (!tmp)
++    {
++        MatFree(action);
++        return NULL;
++    }
+ 
+     /* Calaculate the action.
+        ---------------------- */
+@@ -77,6 +83,12 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+     {
+ 	PTR xi = MatGetPtr(subspace,i);
+ 	PTR yi = MatGetPtr(action,i);
++    if (!xi || !yi)
++    {
++        MatFree(action);
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FEL f;
+ 
+ 	/* Calculate the image of the <i>-th row of <subspace>.
+@@ -85,10 +97,20 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+ 
+ 	/* Clean the image with the subspace and store coefficients.
+ 	   --------------------------------------------------------- */
+-	FfCleanRow2(tmp,subspace->Data,sdim,subspace->PivotTable,yi);
++	if (FfCleanRow2(tmp,subspace->Data,sdim,subspace->PivotTable,yi))
++    {
++        MatFree(action);
++        SysFree(tmp);
++        return NULL;
++    }
+ 	if (FfFindPivot(tmp,&f) >= 0)
+-	    MTX_ERROR("Split(): Subspace not invariant");
++	{
++        MatFree(action);
++        SysFree(tmp);
++        MTX_ERROR("Split(): Subspace not invariant");
++        return NULL;
+     }
++	}
+ 
+     /* Clean up and return the result.
+        ------------------------------- */
+diff --git a/src/stabpwr.c b/src/stabpwr.c
+index ff33bc6..01282ab 100644
+--- a/src/stabpwr.c
++++ b/src/stabpwr.c
+@@ -68,15 +68,18 @@ int StablePower_(Matrix_t *mat, int *pwr, Matrix_t **ker)
+        --------------------------- */
+     p = 1;
+     k1 = MatNullSpace(mat);
+-    MatMul(mat,mat);
++    if (!k1) return -1;
++    if (!MatMul(mat,mat)) return -1;
+     k2 = MatNullSpace(mat);
++    if (!k2) return -1;
+     while (k2->Nor > k1->Nor)
+     {
+ 	p *= 2;
+ 	MatFree(k1);
+ 	k1 = k2;
+-	MatMul(mat,mat);
++	if (!MatMul(mat,mat)) return -1;
+ 	k2 = MatNullSpace(mat);
++    if (!k2) return -1;
+     }
+     MatFree(k2);
+ 
+diff --git a/src/sumint.c b/src/sumint.c
+index 278acd8..905fa79 100644
+--- a/src/sumint.c
++++ b/src/sumint.c
+@@ -77,7 +77,7 @@ int FfSumAndIntersection(PTR wrk1, int *nor1, int *nor2, PTR wrk2, int *piv)
+     {
+ 	FEL f;
+ 	int p;
+-	FfCleanRowAndRepeat(x1,wrk1,k,piv,x2,wrk2);
++	if (FfCleanRowAndRepeat(x1,wrk1,k,piv,x2,wrk2)) return -1;
+ 	if ((p = FfFindPivot(x1,&f)) < 0)
+ 	    continue;	/* Null row - ignore */
+ 	if (k < i)
+diff --git a/src/temap.c b/src/temap.c
+index 7ba445a..4c2d493 100644
+--- a/src/temap.c
++++ b/src/temap.c
+@@ -74,17 +74,21 @@ Matrix_t *TensorMap(Matrix_t *vec, const Matrix_t *a, const Matrix_t *b)
+     for (i = 0; i < vec->Nor; ++i)
+     {
+ 	Matrix_t *tmp = MatTransposed(a);
++    if (!tmp) return NULL;
+ 	Matrix_t *v = VectorToMatrix(vec,i,b->Nor);
+ 	if (v == NULL)
+ 	{
+ 	    MTX_ERROR("Conversion failed");
+-	    break;
++	    return NULL;
+ 	}
+-	MatMul(tmp,v);
++	if (!MatMul(tmp,v)) return NULL;
+ 	MatFree(v);
+-	MatMul(tmp,b);
++	if (!MatMul(tmp,b)) return NULL;
+ 	if (MatrixToVector(tmp,result,i))
+-	    MTX_ERROR("Conversion failed");
++    {
++        MTX_ERROR("Conversion failed");
++        return NULL;
++    }
+ 	MatFree(tmp);
+     }
+     return result;
+diff --git a/src/vec2mat.c b/src/vec2mat.c
+index 1047805..e76ad88 100644
+--- a/src/vec2mat.c
++++ b/src/vec2mat.c
+@@ -63,8 +63,11 @@ Matrix_t *VectorToMatrix(Matrix_t *vecs, int n, int noc)
+ 	return NULL;
+     for (i = 0; i < result->Nor; ++i)
+     {
+-	if (MatCopyRegion(result,i,0, vecs,n,i*noc,1,noc) != 0)
+-	    MTX_ERROR("Copy failed");
++        if (MatCopyRegion(result,i,0, vecs,n,i*noc,1,noc) != 0)
++        {
++            MTX_ERROR("Copy failed");
++            return NULL;
++        }
+     }
+     return result;
+ }
+diff --git a/src/window.c b/src/window.c
+index 9c87694..fbeb943 100644
+--- a/src/window.c
++++ b/src/window.c
+@@ -69,7 +69,11 @@ MatrixWindow_t *WindowAlloc(int fl, int nor, size_t rowsize)
+         MTX_ERROR1("%E",MTX_ERR_NOMEM);
+         return NULL;
+     }
+-    FfSetField(fl);
++    if (FfSetField(fl))
++    {
++        free(out);
++        return NULL;
++    }
+     out->Matrix = MatAlloc(fl, nor, rowsize*sizeof(long)*MPB);
+     if (out->Matrix == NULL)
+     {
+@@ -266,7 +270,8 @@ __asm__("    popl %ebx\n"
+ 
+ /** dest := left+right
+    left and right must be distinct, but one of them may coincide with dest -- under the assumption
+-   that, in that case, the ambient matrices coincide as well. **/
++   that, in that case, the ambient matrices coincide as well.
++   Return dest, or NULL on error (the only error may occur in a compatibility check). **/
+ MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+   PTR x, result, tmp;
+@@ -335,6 +340,7 @@ MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWind
+ /** dest := left-right
+    left and right must be distinct, but one of them may coincide with dest -- under the assumption
+    that, in that case, the ambient matrices coincide as well.
++   Return dest, or NULL on error (the only error may occur in a compatibility check).
+ **/
+ MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+@@ -407,7 +413,7 @@ MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWind
+    can write the result into it. Moreover, the chunk of memory pointed at by dest MUST be disjoint
+    from the chunks for left and right!
+ 
+-   Dimensions are not tested!
++   Dimensions are not tested, always dest will be returned (no error value).
+ **/
+ MatrixWindow_t *WindowAddMul(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+@@ -617,7 +623,7 @@ int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t
+   S2->RowSize = A_sub_rowsize;
+   S2->Matrix = X->Matrix;
+   S2->ULCorner = X->ULCorner;
+-  WindowDif(S2, A00, A10);
++  WindowDif(S2, A00, A10); /* No error checking, as we know that the windows are compatible */
+   /*
+   printf("1.  S2 = A00-A10 in X\n");
+   WindowShow(X);
+@@ -653,7 +659,7 @@ int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t
+   S0->RowSize = A_sub_rowsize;
+   S0->Matrix = X->Matrix;
+   S0->ULCorner = X->ULCorner;
+-  WindowSum(S0, A10, A11);
++  WindowSum(S0, A10, A11); /* no error checking here and below, as we know the dimensions of the windows */
+   /*
+   printf("4.  S0 = A10+A11 in X\n");
+   WindowShow(X);
+diff --git a/src/zcleanrow.c b/src/zcleanrow.c
+index b4dcb30..d36a165 100644
+--- a/src/zcleanrow.c
++++ b/src/zcleanrow.c
+@@ -63,10 +63,10 @@ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv)
+  ** @param nor Number of rows.
+  ** @param piv Pivot table for @em matrix.
+  ** @param row2 Pointer to row where the operations are recorded.
+- ** @return Always 0.
++ ** @return 0, or 1 on error.
+  **/
+ 
+-void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
++int FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+ {
+     int i;
+     PTR x;
+@@ -74,7 +74,7 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+     if (row2 == NULL || piv == NULL)
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return;
++	return 1;
+     }
+     for (i = 0, x = mat; i < nor; ++i, FfStepPtr(&x))
+     {
+@@ -86,6 +86,7 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+ 	    FfInsert(row2,i,f);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
+@@ -100,10 +101,10 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+  ** @param piv Pivot table for @em mat.
+  ** @param row2 Pointer to the second row to be cleaned.
+  ** @param mat2 Matrix to the second matrix.
+- ** @return Always 0.
++ ** @return 0, or 1 on error.
+  **/
+ 
+-void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PTR mat2)
++int FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PTR mat2)
+ {
+     int i;
+     PTR x, x2;
+@@ -112,7 +113,7 @@ void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PT
+     if (row2 == NULL || piv == NULL || row2 == NULL || mat2 == NULL)
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return;
++	return 1;
+     }
+ #endif
+     for (i = 0, x = mat, x2 = mat2; i < nor; ++i, FfStepPtr(&x), FfStepPtr(&x2))
+@@ -125,6 +126,7 @@ void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PT
+ 	    FfAddMulRow(row2,x2,f);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index fc76bfc781e..68878f3fa19 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -56,8 +56,8 @@ cdef extern from "meataxe.h":
     # FEL FfMul(FEL a, FEL b)
     # FEL FfDiv(FEL a, FEL b)
     # FEL FfInv(FEL a)
-    # FEL FfEmbed(FEL a, int subfield)
-    # FEL FfRestrict(FEL a, int subfield)
+    # FEL FfEmbed(FEL a, int subfield) except 255
+    # FEL FfRestrict(FEL a, int subfield) except 255
     FEL FfFromInt(int l)
     int FfToInt(FEL f)
 
@@ -97,8 +97,8 @@ cdef extern from "meataxe.h":
     ## Basic memory operations
     Matrix_t *MatAlloc(int field, int nor, int noc) except NULL
     int MatFree(Matrix_t *mat)
-    PTR MatGetPtr(Matrix_t *mat, int row)
-    int MatCompare(Matrix_t *a, Matrix_t *b) except? -1
+    PTR MatGetPtr(Matrix_t *mat, int row) except NULL
+    int MatCompare(Matrix_t *a, Matrix_t *b) except -2
     int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols) except -1
     Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols) except NULL
     Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows) except NULL
@@ -115,11 +115,15 @@ cdef extern from "meataxe.h":
     Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src) except NULL
     Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff) except NULL
     Matrix_t *MatPower(Matrix_t *mat, long n) except NULL
-    FEL MatTrace(Matrix_t *mat)
+    int StablePower(Matrix_t *mat, int *pwr, Matrix_t **ker) except -1
+    FEL MatTrace(Matrix_t *mat) except 255
     Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B) except NULL
     void StrassenSetCutoff(size_t size)
 
     ## "Higher" Arithmetic
+    Matrix_t *MatTensor(Matrix_t *m1, Matrix_t *m2) except NULL
+    Matrix_t *TensorMap(Matrix_t *vec, Matrix_t *a, Matrix_t *b) except NULL
+    
     int MatClean(Matrix_t *mat, Matrix_t *sub) except -1
     int MatEchelonize(Matrix_t *mat) except -1
     int MatOrder(Matrix_t *mat) except? -1

From 710668d45f5f1e75672482db62a00b57c29cd16a Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Mon, 5 Oct 2015 15:49:57 +0200
Subject: [PATCH 10/23] Remove overcautious commands in spkg-install; rely on
 default error return values in matrix_gfpn_dense

---
 build/pkgs/meataxe/spkg-install       | 24 -------------
 src/sage/matrix/matrix_gfpn_dense.pyx | 52 ++++++++++-----------------
 2 files changed, 18 insertions(+), 58 deletions(-)

diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
index 163f180e50f..7733e9e44cb 100755
--- a/build/pkgs/meataxe/spkg-install
+++ b/build/pkgs/meataxe/spkg-install
@@ -45,30 +45,6 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-# Just to be sure, we also create other folders, although
-# they are standard SageMath folders
-
-mkdir -p $MTXBIN
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating directory for meataxe binaries."
-    exit 1
-fi
-
-mkdir -p "$SAGE_LOCAL/include"
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating SageMath's include directory."
-    exit 1
-fi
-
-mkdir -p "$SAGE_LOCAL/lib"
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating SageMath's lib folder."
-    exit 1
-fi
-
 ## Install! Aparently MeatAxe would rebuild everything when
 ## testing, and "make check" also installs. So, if a test
 ## is requested then we do it in one go.
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index b7ec01a0b14..1d4fd5e6516 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -459,8 +459,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             except (OSError,IOError):
                 return
             self.Data = MatLoad(FILE)
-            if FfSetField(self.Data.Field):
-                raise ValueError("Invalid data in file {}".format(FILE))
+            FfSetField(self.Data.Field)
             B = GF(self.Data.Field, 'z')
             parent = MatrixSpace(B, self.Data.Nor, self.Data.Noc)
             self._is_immutable = False
@@ -574,8 +573,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
         if self.Data:
             retval.Data = MatDup(self.Data)
-            if not retval.Data:
-                raise MemoryError, "Error copying a %s instance"%repr(type(self))
         else:
             retval.Data = NULL
         return retval
@@ -1046,10 +1043,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._cache = {}
-        if MatAdd(Left.Data, Right.Data) != NULL:
-            return Left
-        else:
-            raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatAdd(Left.Data, Right.Data)
+        return Left
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
         """
@@ -1073,10 +1068,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._is_immutable = False
         Left._cache = {}
-        if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
-            return Left
-        else:
-            raise ArithmeticError, "Matrix sizes or fields not compatible"
+        MatAddMul(Left.Data, Right.Data, mtx_taddinv[1])
+        return Left
 
     def __neg__(self):
         """
@@ -1120,9 +1113,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
         OUT._cache = {}
-        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left)))
+        return OUT
 
     cpdef ModuleElement _lmul_(self, RingElement right):
         """
@@ -1144,9 +1136,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
         OUT._cache = {}
-        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right)))
+        return OUT
 
     cdef int _strassen_default_cutoff(self, sage.matrix.matrix0.Matrix right) except -2:
         # Surprisingly, Winograd-Strassen can compete with school book
@@ -1178,12 +1169,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense OUT = self._new(self._nrows, right._ncols)
         sig_on()
         OUT.Data = MatDup(self.Data)
-        if OUT.Data == NULL:
-            sig_off()
-            raise MemoryError
-        if not MatMul(OUT.Data,right.Data):
-            sig_off()
-            raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMul(OUT.Data,right.Data)
         sig_off()
         OUT._is_immutable = False
         OUT._cache = {}
@@ -1222,8 +1208,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cutoff = cutoff//sizeof(long)
         StrassenSetCutoff(cutoff)
         sig_on()
-        if MatMulStrassen(OUT.Data, self.Data, right.Data) == NULL:
-            raise ArithmeticError("Error multiplying matrices by Strassen-Winograd algorithm")
+        MatMulStrassen(OUT.Data, self.Data, right.Data)
         sig_off()
         return OUT
 
@@ -1239,9 +1224,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             r = FfFromInt(n)
         left = self.__copy__()
         left._cache = {}
-        if MatMulScalar(left.Data, r) != NULL:
-            return left
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(left.Data, r)
+        return left
 
     def __div__(Matrix_gfpn_dense self, p):
         """
@@ -1318,7 +1302,10 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         sig_on()
         try:
             OUT.Data = MatInverse(self.Data)
-        except:
+        except ZeroDivisionError:
+            # Attempting to invert singular matrices happens
+            # in the tests, and we make the special case here
+            # so that the sig_on/off count is fine.
             sig_off()
             raise
         sig_off()
@@ -1424,8 +1411,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrix must not be empty")
         OUT = type(self).__new__(type(self))
         OUT.Data = MatNullSpace(self.Data)
-        if OUT.Data == NULL:
-            raise ArithmeticError("Error computing left kernel matrix")
         OUT._nrows = OUT.Data.Nor
         OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
@@ -1541,8 +1526,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             self.cache('rank', 0)
             self.cache('pivots', ())
             return self
-        if MatEchelonize(self.Data) == -1:
-            raise ArithmeticError("Error echelonizing this matrix")
+        MatEchelonize(self.Data)
         self._cache = {}
         # Now, self.Data is in semi-echelon form.
         r = self.Data.Nor

From c67cb42042b25d17aeb17999ac7430aafef0f142 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 11 Oct 2015 16:48:40 +0200
Subject: [PATCH 11/23] Fix pickling of meataxe matrices

---
 src/sage/matrix/matrix_gfpn_dense.pyx | 51 ++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 1d4fd5e6516..d2a1d6e8347 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -577,7 +577,28 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             retval.Data = NULL
         return retval
 
-    ## Pickling and string representation is taken care of by implementing get_unsafe
+    def __reduce__(self):
+        """
+        TESTS::
+
+            sage: M = MatrixSpace(GF(9,'x'),10,10).random_element()
+            sage: M == loads(dumps(M))   # indirect doctest
+            True
+            sage: M is loads(dumps(M))
+            False
+        """
+        cdef char* d
+        cdef int i,NR
+        cdef PTR p
+        if self.Data:
+            FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
+            return mtx_unpickle, (self._parent, self.Data.Nor, self.Data.Noc,
+                        PyString_FromStringAndSize(<char*>self.Data.Data,self.Data.RowSize * self.Data.Nor),
+                        not self._is_immutable) # for backward compatibility with the group cohomology package
+        else:
+            return mtx_unpickle, (0, 0, 0, '', not self._is_immutable)
+
     cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
         """
         Get an element without checking.
@@ -1579,3 +1600,31 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
         self.cache('in_echelon_form',True)
 
+def mtx_unpickle(f, int nr, int nc, str Data, bint m):
+    """
+    Helper function for unpickling.
+
+    TESTS::
+
+        sage: M = MatrixSpace(GF(9,'x'),10,10).random_element()
+        sage: M == loads(dumps(M))   # indirect doctest
+        True
+        sage: M is loads(dumps(M))
+        False
+    """
+    cdef Matrix_gfpn_dense OUT
+    OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+    if isinstance(f, (int, long)):
+        # This is for old pickles created with the group cohomology spkg
+        Matrix_dense.__init__(OUT, MatrixSpace(GF(f, 'z'), nr, nc))
+    else:
+        Matrix_dense.__init__(OUT, f)
+        f = OUT._base_ring.order()
+    OUT.Data = MatAlloc(f, nr, nc)
+    OUT._is_immutable = not m
+    OUT._converter = FieldConverter(OUT._base_ring)
+    cdef char *x
+    if Data:
+        x = PyString_AsString(Data)
+        memcpy(OUT.Data.Data, x, OUT.Data.RowSize*OUT.Data.Nor)
+    return OUT

From a40752ede75d25526be6c7e2a94797e88d3e082e Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sat, 12 Sep 2015 18:05:40 +0200
Subject: [PATCH 12/23] An optional MeatAxe package

---
 build/pkgs/meataxe/SPKG.txt               | 27 +++++++
 build/pkgs/meataxe/checksums.ini          |  4 +
 build/pkgs/meataxe/package-version.txt    |  1 +
 build/pkgs/meataxe/patches/IO_fixes.patch | 78 +++++++++++++++++++
 build/pkgs/meataxe/spkg-install           | 95 +++++++++++++++++++++++
 build/pkgs/meataxe/type                   |  1 +
 6 files changed, 206 insertions(+)
 create mode 100644 build/pkgs/meataxe/SPKG.txt
 create mode 100644 build/pkgs/meataxe/checksums.ini
 create mode 100644 build/pkgs/meataxe/package-version.txt
 create mode 100644 build/pkgs/meataxe/patches/IO_fixes.patch
 create mode 100755 build/pkgs/meataxe/spkg-install
 create mode 100644 build/pkgs/meataxe/type

diff --git a/build/pkgs/meataxe/SPKG.txt b/build/pkgs/meataxe/SPKG.txt
new file mode 100644
index 00000000000..9892003f3e2
--- /dev/null
+++ b/build/pkgs/meataxe/SPKG.txt
@@ -0,0 +1,27 @@
+= MeatAxe =
+
+== Description ==
+
+The MeatAxe is a set of programs for working with matrix representations
+over finite fields. Permutation representations are supported to some
+extent, too.
+
+The MeatAxe is developed for the UNIX operating system. Supported platforms
+include Linux (x86), SunOS/Solaris (Sparc), HP/UX, DEC OSF/1 (Alpha), and
+Windows NT 4.0 (x86, Alpha, PPC).
+
+== License ==
+
+The C Meat-Axe is free software: you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation, either version 2 of the License, or (at your option) any later
+version.
+
+== Upstream Contact ==
+
+Michael Ringe (mringe@math.rwth-aachen.de)
+
+== Special Update/Build Instructions ==
+
+The original upstream tarball was re-packaged, so that it unpacks into a single
+folder called meataxe-2.4.24
diff --git a/build/pkgs/meataxe/checksums.ini b/build/pkgs/meataxe/checksums.ini
new file mode 100644
index 00000000000..b4d4356ad4e
--- /dev/null
+++ b/build/pkgs/meataxe/checksums.ini
@@ -0,0 +1,4 @@
+tarball=meataxe-VERSION.tar.gz
+sha1=0aa4313cc430c78e058068feba805428ef2324aa
+md5=e0f384e37a69671c73c2904e4e69dc01
+cksum=3083268116
diff --git a/build/pkgs/meataxe/package-version.txt b/build/pkgs/meataxe/package-version.txt
new file mode 100644
index 00000000000..208b2a0070d
--- /dev/null
+++ b/build/pkgs/meataxe/package-version.txt
@@ -0,0 +1 @@
+2.4.24.p1
diff --git a/build/pkgs/meataxe/patches/IO_fixes.patch b/build/pkgs/meataxe/patches/IO_fixes.patch
new file mode 100644
index 00000000000..933908be6be
--- /dev/null
+++ b/build/pkgs/meataxe/patches/IO_fixes.patch
@@ -0,0 +1,78 @@
+Read and create library files in the directory given by MtxLibDir.
+
+The patch keeps a promise given by upstream.
+
+AUTHOR: Simon King 2015-09-18, simon.king@uni-jena.de
+
+diff --git a/src/maketabF.c b/src/maketabF.c
+index fa03eda..d7af83e 100644
+--- a/src/maketabF.c
++++ b/src/maketabF.c
+@@ -319,7 +319,7 @@ static void writeheader()
+     int i, j;
+ 
+     sprintf(filename,"p%3.3ld.zzz",Q);
+-    fd = SysFopen(filename,FM_CREATE);
++    fd = SysFopen(filename,FM_CREATE|FM_LIB);
+     if (fd == NULL)
+     {
+ 	perror(filename);
+diff --git a/src/os.c b/src/os.c
+index a7f4271..b07b971 100644
+--- a/src/os.c
++++ b/src/os.c
+@@ -227,25 +227,31 @@ FILE *SysFopen(const char *name, int mode)
+ 	MTX_ERROR1("Invalid file mode %d",mode);
+ 	return NULL;
+     }
+-    f = fopen(name,fmodes[m]);
+-    if (f != NULL) 
+-	return f;
+ 
+     /* Search library directory
+        ------------------------ */
+     if ((mode & FM_LIB) != 0) 
+     {
+-	strcpy(buf,MtxLibDir);
+-	strcat(buf,"/");
+-	strcat(buf,name);
+-	f = fopen(buf,fmodes[m]);
++        if (*MtxLibDir != 0)
++            {
++            strcpy(buf,MtxLibDir);
++            strcat(buf,"/");
++            strcat(buf,name);
++            f = fopen(buf,fmodes[m]);
++            }
++        else
++            f = fopen(name,fmodes[m]);
+     }
+-
++    else
++    {
++        f = fopen(name,fmodes[m]);
++    }
++    if (f != NULL)
++	return f;
+     /* Error handling
+        -------------- */
+     if (f == NULL && (mode & FM_NOERROR) == 0)
+-	MTX_ERROR1("%s: %S",name);
+-
++    MTX_ERROR1("%s: %S",name);
+     return f;
+ }
+ 
+diff --git a/src/zcv.c b/src/zcv.c
+index a9ad7a3..763c9fb 100644
+--- a/src/zcv.c
++++ b/src/zcv.c
+@@ -584,7 +584,7 @@ static int Init(int argc, const char **argv)
+     inpname = App->ArgV[0];
+     if (strcmp(inpname,"-"))
+     {
+-	src = SysFopen(inpname,FM_READ|FM_TEXT|FM_LIB);
++	src = SysFopen(inpname,FM_READ|FM_TEXT);
+ 	if (src == NULL)
+ 	{
+ 	    MTX_ERROR1("Cannot open %s",inpname);
diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
new file mode 100755
index 00000000000..7733e9e44cb
--- /dev/null
+++ b/build/pkgs/meataxe/spkg-install
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+if [ -z "$SAGE_LOCAL" ]; then
+    echo >&2 "SAGE_LOCAL undefined ... exiting"
+    echo >&2 "Maybe run 'sage --sh'?"
+    exit 1
+fi
+
+cd src
+
+for patch in ../patches/*.patch; do
+    [ -r "$patch" ] || continue  # Skip non-existing or non-readable patches
+    echo "Applying $patch"
+    patch -p1 <"$patch"
+    if [ $? -ne 0 ]; then
+        echo >&2 "Error applying '$patch'"
+        return 1
+    fi
+done
+
+## The following *could* be put into Makefile.conf
+
+# This is the place where arithmetic tables and some other input files are
+# searched by default.
+export MTXLIB="$DOT_SAGE/meataxe"
+# Directory where executables are installed.
+export MTXBIN="$SAGE_LOCAL/bin"
+# Default compiler flags
+export CFLAGS1="-std=gnu99 -O -Wall -fPIC"
+# Field size up to GF(256)
+export ZZZ=0
+
+# In principle, one should uncomment for field sizes up to GF(2^16).
+# But upstream doesn't provide the required sources.
+#export ZZZ=1
+
+# The following is just to make MeatAxe's Makefile happy
+touch Makefile.conf
+
+# We create a directory for the multiplication tables
+mkdir -p $MTXLIB
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating directory for multiplication tables."
+    exit 1
+fi
+
+## Install! Aparently MeatAxe would rebuild everything when
+## testing, and "make check" also installs. So, if a test
+## is requested then we do it in one go.
+
+if [ "x$SAGE_CHECK" = xyes ]; then
+    $MAKE check
+else
+    $MAKE
+fi
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error installing MeatAxe."
+    exit 1
+fi
+
+## Surprisingly, MeatAxe's Makefile does NOT install the binaries
+## in MTXBIN. Hence, we do it manually.
+
+mv bin/* "$MTXBIN"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe executables."
+    exit 1
+fi
+
+# We move the meataxe library to a permanent location
+mv tmp/libmtx.a "$SAGE_LOCAL/lib"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe library."
+    exit 1
+fi
+
+cp src/meataxe.h "$SAGE_LOCAL/include/"
+if [ $? -ne 0 ]; then
+    echo >&2 "Error copying MeatAxe header."
+    exit 1
+fi
+
+# Are we supposed to install the documentation?
+if [ "x$SAGE_SPKG_INSTALL_DOCS" = xyes ] ; then
+    mkdir -p $SAGE_ROOT/local/share/doc/meataxe/
+    cp -r doc/* $SAGE_ROOT/local/share/doc/meataxe/
+    if [ $? -ne 0 ]; then
+        echo "Error copying documentation."
+        exit 1
+    else
+        echo "The documentation can be found in $SAGE_ROOT/local/share/doc/meataxe/"
+    fi
+fi
diff --git a/build/pkgs/meataxe/type b/build/pkgs/meataxe/type
new file mode 100644
index 00000000000..134d9bc32d5
--- /dev/null
+++ b/build/pkgs/meataxe/type
@@ -0,0 +1 @@
+optional

From 5a8631634bab6abb47d949ef17731bc67b0fface Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Fri, 18 Sep 2015 09:37:27 +0200
Subject: [PATCH 13/23] Implement and use Strassen-Winograd matrix
 multiplication in MeatAxe

---
 .../StrassenWinogradImplementation.patch      | 1246 +++++++++++++++++
 .../patches/StrassenWinogradUsage.patch       |  359 +++++
 2 files changed, 1605 insertions(+)
 create mode 100644 build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
 create mode 100644 build/pkgs/meataxe/patches/StrassenWinogradUsage.patch

diff --git a/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch b/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
new file mode 100644
index 00000000000..43664cf6560
--- /dev/null
+++ b/build/pkgs/meataxe/patches/StrassenWinogradImplementation.patch
@@ -0,0 +1,1246 @@
+Implement Strassen-Winograd multiplication in MeatAxe.
+
+We use the schedule from Douglas-Heroux-Slishman-Smith;
+see also Boyer-Pernet-Zhou, "Memory efficient scheduling of
+Strassen-Winograd's matrix multiplication algorithm",
+Table 1 (ISSAC 2009).
+
+AUTHOR: Simon King 2015-09-19, simon.king@uni-jena.de
+
+diff --git a/Makefile b/Makefile
+index b78e244..2ada31e 100644
+--- a/Makefile
++++ b/Makefile
+@@ -88,6 +88,7 @@ LIB_OBJS=\
+ 	temap \
+ 	tkinfo vec2mat \
+ 	wgen \
++	window \
+ 	zcleanrow zcmprow zgap zpermrow \
+ 	zzz2 \
+ 	version
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 4f0a973..178b6cb 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -24,8 +24,8 @@
+ MTX_DEFINE_FILE_INFO
+ 
+ typedef unsigned char BYTE;
+-static int MPB = 0;		/* No. of marks per byte */
+-static int LPR = 0;		/* Long ints per row */
++int MPB = 0;		/* No. of marks per byte */
++int LPR = 0;		/* Long ints per row */
+ 
+ 
+ 
+@@ -646,7 +646,7 @@ PTR FfAddRow(PTR dest, PTR src)
+ 
+ 
+ /**
+- ** Add a part two rows.
++ ** Add a part of two rows.
+  ** This works like FfAddRow(), but the operation is performed only on a given range of
+  ** columns. Note that the working range is not specified as column indexes but in units of
+  ** long integers!
+@@ -707,7 +707,217 @@ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len)
+     return dest;
+ }
+ 
++/**
++ ** Subtract two rows.
++ ** This function subtracts src from dest. Field order and row size must have been set before.
++ ** @param dest The row to subtract from.
++ ** @param src The row to subtract.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRow(PTR dest, PTR src)
++{
++    register int i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++    {	
++#ifdef ASM_MMX
++    /* This assumes Intel with 4 bytes per long, but MMX implies Intel anyway.*/
++	__asm__(
++	"    pushl %ebx\n"
++	"    pushl %ecx\n"
++	"    pushl %edx\n"
+ 
++	"    movl 8(%ebp),%ecx\n"
++        "    movl 12(%ebp),%ebx\n"
++        "    movl LPR,%edx\n"
++        "    sarl $1,%edx\n"
++        "    je .SUBROW2\n"
++        "    .align 16\n"
++	".SUBROW1:\n"
++        "    movq (%ebx),%mm0\n"
++        "    addl $8,%ebx\n"
++        "    pxor (%ecx),%mm0\n"
++        "    movq %mm0,(%ecx)\n"
++        "    addl $8,%ecx\n"
++        "    decl %edx\n"
++        "    jne .SUBROW1\n"
++	".SUBROW2:\n"
++	"    popl %edx\n"
++	"    popl %ecx\n"
++	"    popl %ebx\n"
++	);
++#else
++	register long *l1 = (long *) dest;
++	register long *l2 = (long *) src;
++	for (i = LPR; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    if (x != 0) *l1 ^= x;
++	    l1++;
++	}
++#endif
++    }
++    else		/* any other characteristic */
++    {
++        FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++#ifdef ASM_MMX
++        register BYTE *p1 = dest;
++        register unsigned long *p2 = (unsigned long *) src;
++        for (i = LPR; i != 0; --i)
++        {
++            register unsigned long a;
++            if ((a = *p2++) != 0) {
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++                a >>= 8;
++                *p1++ = mtx_tadd[*p1][table_inv[a & 0xffL]];
++            } else
++              p1 += 4;
++        }
++#else
++        register FEL *p1 = dest;
++        register FEL *p2 = src;
++        for (i = FfTrueRowSize(FfNoc); i != 0; --i)
++        {
++            register int x = *p2++;
++            if (x != 0) *p1 = mtx_tadd[*p1][table_inv[x]];
++            p1++;
++        }
++#endif
++    }
++    return dest;
++}
++
++
++/**
++ ** Subtract a part of two rows.
++ ** This works like FfSubRow(), but the operation is performed only on a given range of
++ ** columns. Note that the working range is not specified as column indexes but in units of
++ ** long integers!
++ ** @param dest The row to subtract from.
++ ** @param src The row to subtract.
++ ** @param first Number of long integers to skip.
++ ** @param len Number of long integers to add.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRowPartial(PTR dest, PTR src, int first, int len)
++{
++    register long i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++#ifdef ASM_MMX
++	__asm__("\n	movl 8(%ebp),%ecx\n"
++		"	movl 12(%ebp),%ebx\n"
++		"	movl 16(%ebp),%edx\n"
++		"       sall $2,%edx\n"
++		"       addl %edx,%ecx\n"
++		"       addl %edx,%ebx\n"
++		"       movl 20(%ebp),%edx\n"
++		"	sarl $1,%edx\n"
++		"	je .SUBROWPART_1\n"
++		"	.align 16\n"
++		".SUBROWPART_2:\n"
++		"	movq (%ebx),%mm0\n"
++		"	addl $8,%ebx\n"
++		"	pxor (%ecx),%mm0\n"
++		"	movq %mm0,(%ecx)\n"
++		"	addl $8,%ecx\n"
++		"	decl %edx\n"
++		"	jne .SUBROWPART_2\n"
++		".SUBROWPART_1:\n"
++	       );
++#else
++    {	register long *l1 = (long *) dest + first;
++	register long *l2 = (long *) src + first;
++	for (i = len; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    *l1 ^= x;
++	    l1++;
++	}
++    }
++#endif
++    else		/* any other characteristic */
++    {   FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++        register BYTE *p1 = dest + first * sizeof(long);
++        register BYTE *p2 = src + first * sizeof(long);
++        for (i = len*sizeof(long); i != 0; --i)
++        {
++            register int x = *p2++;
++            *p1 = mtx_tadd[*p1][table_inv[x]];
++            p1++;
++        }
++    }
++    return dest;
++}
++
++
++/**
++ ** Subtract a part of two rows.
++ ** The difference to FfSubRowPartial is that dest is replaced
++ ** by src-dest, not by dest-src.
++ ** @param dest The row to subtract.
++ ** @param src The row to subtract from.
++ ** @param first Number of long integers to skip.
++ ** @param len Number of long integers to add.
++ ** @return Always returns dest.
++ **/
++
++PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len)
++{
++    register long i;
++
++    if (FfChar == 2)	/* characteristic 2 is simple... */
++#ifdef ASM_MMX
++	__asm__("\n	movl 8(%ebp),%ecx\n"
++		"	movl 12(%ebp),%ebx\n"
++		"	movl 16(%ebp),%edx\n"
++		"       sall $2,%edx\n"
++		"       addl %edx,%ecx\n"
++		"       addl %edx,%ebx\n"
++		"       movl 20(%ebp),%edx\n"
++		"	sarl $1,%edx\n"
++		"	je .SUBROWPART_1\n"
++		"	.align 16\n"
++		".SUBROWPART_2:\n"
++		"	movq (%ebx),%mm0\n"
++		"	addl $8,%ebx\n"
++		"	pxor (%ecx),%mm0\n"
++		"	movq %mm0,(%ecx)\n"
++		"	addl $8,%ecx\n"
++		"	decl %edx\n"
++		"	jne .SUBROWPART_2\n"
++		".SUBROWPART_1:\n"
++	       );
++#else
++    {	register long *l1 = (long *) dest + first;
++	register long *l2 = (long *) src + first;
++	for (i = len; i != 0; --i)
++	{
++	    register long x = *l2++;
++	    *l1 ^= x;
++	    l1++;
++	}
++    }
++#endif
++    else		/* any other characteristic */
++    {   FEL *table_inv = mtx_tmult[mtx_taddinv[FF_ONE]];
++        register BYTE *p1 = dest + first * sizeof(long);
++        register BYTE *p2 = src + first * sizeof(long);
++        for (i = len*sizeof(long); i != 0; --i)
++        {
++            register int x = *p2++;
++            *p1 = mtx_tadd[table_inv[*p1]][x];
++            p1++;
++        }
++    }
++    return dest;
++}
+ 
+ 
+ /**
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 819e88e..e2f5a84 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -107,6 +107,9 @@ extern int FfChar;		/**< Current characteristic */
+ extern FEL FfGen;		/**< Generator */
+ extern int FfNoc;		/**< Number of columns for row ops */
+ extern size_t FfCurrentRowSize;
++extern int FfCurrentRowSizeIo;
++extern int MPB;         /** No. of marks per byte */
++extern int LPR;         /** Long ints per row */
+ 
+ 
+ /* Arithmetic */
+@@ -125,6 +128,9 @@ int FfSetNoc(int noc);
+ void FfAddMulRow(PTR dest, PTR src, FEL f);
+ PTR FfAddRow(PTR dest, PTR src);
+ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len);
++PTR FfSubRow(PTR dest, PTR src);
++PTR FfSubRowPartial(PTR dest, PTR src, int first, int len);
++PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len);
+ PTR FfAlloc(int nor);
+ int FfCmpRows(PTR p1, PTR p2);
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv);
+@@ -519,6 +525,8 @@ int MatIsValid(const Matrix_t *m);
+ Matrix_t *MatLoad(const char *fn);
+ Matrix_t *MatMul(Matrix_t *dest, const Matrix_t *src);
+ Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff);
++Matrix_t *MatMulStrassen(Matrix_t *dest, const Matrix_t *A, const Matrix_t *B);
++void StrassenSetCutoff(size_t size);
+ long MatNullity(const Matrix_t *mat);
+ long MatNullity__(Matrix_t *mat);
+ Matrix_t *MatNullSpace(const Matrix_t *mat);
+diff --git a/src/window.c b/src/window.c
+new file mode 100644
+index 0000000..f374028
+--- /dev/null
++++ b/src/window.c
+@@ -0,0 +1,944 @@
++/* ========================== C MeatAxe =============================
++   window.c -  Matrix window operations and Strassen-Winograd multiplication
++
++   (C) Copyright 2015 Simon King, Institut fuer Mathematik,
++   FSU Jena, Germany  <simon.king@uni-jena.de>
++   This program is free software; see the file COPYING for details.
++   ================================================================== */
++
++#include <string.h>
++#include <stdlib.h>
++#include <inttypes.h>
++#include "meataxe.h"
++
++/* --------------------------------------------------------------------------
++   Local data
++   -------------------------------------------------------------------------- */
++
++MTX_DEFINE_FILE_INFO
++
++typedef unsigned char BYTE;
++
++typedef struct
++{
++  int Nor;                      /* #rows of the window */
++  size_t RowSize;               /* size of window rows in long integers */
++  Matrix_t *Matrix;             /* ambient matrix containing the window */
++  PTR ULCorner;                 /* Pointer to the upper left window corner */
++}
++    MatrixWindow_t;
++
++size_t cutoff = sizeof(long)/2;
++
++/** The divide-and-conquer approach is only done for
++ * matrices with at least "cutoff*MPB*sizeof(long)" rows which
++ * are formed by at least "cutoff" longs.
++ *
++ * The above rule means that the "critical matrices" are square.
++ **/
++void StrassenSetCutoff(size_t size)
++{   if (size)
++        cutoff = size;
++    else
++        cutoff = sizeof(long)/2;
++}
++
++/* ------------------------------------------------------------------
++
++   Allocation and deallocation of a matrix window
++
++   ------------------------------------------------------------------ */
++/**
++ * Note that the rowsize is given in long, not in byte. The reason is
++ * functions such as FfAddRowPartial or FfAddMapRowWindow internally
++ * operating on longs. By consequence, in the Strassen-Winograd
++ * multiplication algorithm, we have to divide our matrix rows
++ * into longs, not into bytes.
++ **/
++
++/* Allocation with initialisation */
++/* Create an empty matrix that is identical with the window. */
++/* fl is the field size, nor is the number of rows. rowsize is */
++/* the size of a row in longs. */
++MatrixWindow_t *WindowAlloc(int fl, int nor, size_t rowsize)
++{
++    MatrixWindow_t *out;
++    out = ALLOC(MatrixWindow_t);
++    if (out == NULL)
++    {
++        MTX_ERROR1("%E",MTX_ERR_NOMEM);
++        return NULL;
++    }
++    FfSetField(fl);
++    out->Matrix = MatAlloc(fl, nor, rowsize*sizeof(long)*MPB);
++    if (out->Matrix == NULL)
++    {
++        free(out);
++        MTX_ERROR1("%E",MTX_ERR_NOMEM);
++        return NULL;
++    }
++    out->ULCorner = out->Matrix->Data;
++    out->Nor = nor;
++    out->RowSize = rowsize;
++    return out;
++}
++
++/** WARNING: Only to be used if the surrounding matrix can be destroyed
++    Otherwise, just do free(m)! **/
++void WindowFree(MatrixWindow_t *m)
++{
++    if (m->Matrix != NULL)
++      {
++    MatFree(m->Matrix);
++      }
++    free(m);
++}
++
++/* ------------------------------------------------------------------
++ * Auxiliary / Debugging
++ ----------------------------------------------------------------- */
++
++void WindowShow(MatrixWindow_t *A)
++{
++long i,j;
++PTR p = A->ULCorner;
++FfSetNoc(A->Matrix->Noc);
++for (i=A->Nor; i>0; i--, FfStepPtr(&p))
++  {
++  for (j=0; j< (A->RowSize)*sizeof(long); j++)
++    printf("%3.3d ", (unsigned char)p[j]);
++  printf("\n");
++  }
++}
++
++/**
++ ** Overwrite the window by zeroes, but let the
++ ** rest of the ambient matrix untouched
++ **/
++
++void WindowClear(MatrixWindow_t *A)
++{
++register long i;
++register size_t rowsize = A->RowSize*sizeof(long);
++PTR p = A->ULCorner;
++FfSetNoc(A->Matrix->Noc);
++for (i=A->Nor; i>0; i--, FfStepPtr(&p))
++{ memset(p, FF_ZERO, rowsize); }
++}
++
++/**
++ ** Multiply a vector by a matrix window.
++ ** This function multiplies the vector @em row from the right by the matrix window
++ ** @em mat and adds the result into @em result.
++ ** The number of columns in both @em mat and @em result is determined by @em rowsize.
++ ** @attention @em result and @em row must not overlap. Otherwise the result is
++ ** undefined.
++ ** @param row The source vector (nor columns).
++ ** @param matrix A matrix window (nor by (rowsize*sizeof(long)*MPB)) of a matrix whose rowsize is FfCurrRowSize.
++ ** @param nor number of rows in the matrix window.
++ ** @param[out] result The resulting vector ((rowsize*sizeof(long)*MPB) columns).
++ ** @param rowsize number of longs forming a row of @em mat.
++ **/
++
++void FfAddMapRowWindow(PTR row, PTR matrix, int nor, PTR result, size_t rowsize)
++
++{
++    register int i;
++    register FEL f;
++    BYTE *m = (BYTE *) matrix;
++
++#ifdef DEBUG
++    if (result >= row && result < row + FfRowSize(nor))
++    MTX_ERROR("row and result overlap: undefined result!");
++    if (row >= result && row < result + (rowsize*sizeof(long)))
++    MTX_ERROR("row and result overlap: undefined result!");
++#endif
++
++    if (FfOrder == 2)       /* GF(2) is a special case */
++    {
++        register long *x1 = (long *) matrix;
++        register BYTE *r = (BYTE *) row;
++
++        for (i = nor; i > 0; ++r)
++        {
++            register BYTE mask;
++            if (*r == 0)
++            {
++                i -= 8;
++                x1 += 8 * LPR;  /* Skip 8 rows of the matrix window in the ambient matrix*/
++                continue;
++            }
++            for (mask = 0x80; mask != 0 && i > 0; mask >>= 1, --i)
++            {
++                if ((mask & *r) == 0)
++                {
++                    x1 += LPR;  /* Skip a single row */
++                    continue;
++                }
++
++#ifdef ASM_MMX
++__asm__("    pushl %ebx\n");
++__asm__("    movl %0,%%ebx" : : "g" (x1) );
++__asm__("    pushl %ecx\n"
++    "    pushl %edx\n"
++    "    movl 20(%ebp),%ecx\n"  /* result */
++    );
++__asm__ (
++        "    movl 24(%ebp),%edx\n"   /* this time, it is rowsize, not LPR */
++        "    sarl $1,%edx\n"
++        "    je .FASTXOR_1\n"
++        "    .align 16\n"
++    ".FASTXOR_2:\n"
++        "    movq (%ebx),%mm0\n"
++        "    addl $8,%ebx\n"
++        "    pxor (%ecx),%mm0\n"
++        "    movq %mm0,(%ecx)\n"
++        "    addl $8,%ecx\n"
++        "    decl %edx\n"
++        "    jne .FASTXOR_2\n"
++    ".FASTXOR_1:\n"
++    "    popl %edx\n"
++    "    popl %ecx\n");
++__asm__("    movl %%ebx,%0" : : "g" (x1) );
++__asm__("    popl %ebx\n"
++    );
++#else
++                {
++                    register long *x2 = (long *)result;
++                    register int k;
++                    for (k = rowsize; k; --k)
++                        *x2++ ^= *x1++;
++                    /* Now, x1 points to the first item
++                     * after the current line of the window.
++                     * We need to move it to the first position
++                     * of the next line of the window.
++                     */
++                    x1 += (LPR-rowsize);
++                }
++#endif
++            }
++        }
++    }
++    else                /* Any other field */
++    {
++        register BYTE *brow = (BYTE *) row;
++        register int pos = 0;
++        size_t l_rowsize = rowsize*sizeof(long);
++        for (i = nor; i > 0; --i)
++        {
++            f = mtx_textract[pos][*brow];
++            if (++pos == (int) MPB)
++            {
++                pos = 0;
++                ++brow;
++            }
++            if (f != FF_ZERO)
++            {
++                register BYTE *v = m;
++                register BYTE *r = result;
++                if (f == FF_ONE)
++                {
++                    register size_t k = l_rowsize;
++                    for (; k != 0; --k)
++                    {
++                        *r = mtx_tadd[*r][*v++];
++                        ++r;
++                    }
++                }
++                else
++                {
++                    register BYTE *multab = mtx_tmult[f];
++                    register size_t k = l_rowsize;
++                    for (; k != 0; --k)
++                    {
++                        if (*v != 0)
++                            *r = mtx_tadd[multab[*v]][*r];
++                        ++v;
++                        ++r;
++                    }
++                }
++            }
++            m += FfCurrentRowSize;  /* next row of window in the ambient matrix */
++        }
++    }
++}
++
++/** dest := left+right
++   left and right must be distinct, but one of them may coincide with dest -- under the assumption
++   that, in that case, the ambient matrices coincide as well. **/
++MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++  PTR x, result, tmp;
++  int i;
++
++  int lnoc, rnoc, dnoc;
++
++  FfSetField(left->Matrix->Field);
++  if (left->Matrix->Field != right->Matrix->Field || (left->Nor != right->Nor) || (left->RowSize != right->RowSize))
++    {
++      MTX_ERROR1("Windows cannot be added: %E", MTX_ERR_INCOMPAT);
++      return NULL;
++    }
++  size_t rowsize = left->RowSize;
++
++  lnoc = left->Matrix->Noc;
++  rnoc = right->Matrix->Noc;
++  dnoc = dest->Matrix->Noc;
++  /* We have to distinguish cases as to whether dest
++     is equal to either left or right */
++  result = dest->ULCorner;
++  if (left->ULCorner == dest->ULCorner)
++    {   /* we write into left */
++        x = right->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else if (right->ULCorner == dest->ULCorner)
++    {   /* we write into right */
++        x = left->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(lnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else
++    {   /* we need to copy left into dest first */
++        x = right->ULCorner;
++        tmp = left->ULCorner;
++        size_t l_rowsize = rowsize * sizeof(long);
++        for (i = left->Nor; i != 0; --i)
++        {
++            memcpy(result, tmp, l_rowsize);
++            FfSetNoc(lnoc);
++            FfStepPtr(&tmp);
++            FfAddRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  return dest;
++}
++
++/** dest := left-right
++   left and right must be distinct, but one of them may coincide with dest -- under the assumption
++   that, in that case, the ambient matrices coincide as well.
++**/
++MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++  PTR x, result, tmp;
++  int i;
++  int lnoc, rnoc, dnoc;
++
++  FfSetField(left->Matrix->Field);
++  if (left->Matrix->Field != right->Matrix->Field || (left->Nor != right->Nor) || (left->RowSize != right->RowSize))
++    {
++      MTX_ERROR1("Windows cannot be subtracted: %E", MTX_ERR_INCOMPAT);
++      return NULL;
++    }
++  size_t rowsize = left->RowSize;
++
++  lnoc = left->Matrix->Noc;
++  rnoc = right->Matrix->Noc;
++  dnoc = dest->Matrix->Noc;
++  /* We have to distinguish cases as to whether dest
++     is equal to either left or right */
++  result = dest->ULCorner;
++  if (left->ULCorner == dest->ULCorner)
++    {   /* we write into left */
++        x = right->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfSubRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else if (right->ULCorner == dest->ULCorner)
++    {   /* we write into right */
++        x = left->ULCorner;
++        for (i = left->Nor; i != 0; --i)
++        {
++            FfSubRowPartialReverse(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(lnoc);
++            FfStepPtr(&x);
++        }
++    }
++  else
++    {   /* we need to copy left into dest first */
++        x = right->ULCorner;
++        tmp = left->ULCorner;
++        size_t l_rowsize = rowsize * sizeof(long);
++        for (i = left->Nor; i != 0; --i)
++        {
++            memcpy(result, tmp, l_rowsize);
++            FfSetNoc(lnoc);
++            FfStepPtr(&tmp);
++            FfSubRowPartial(result, x, 0, rowsize);
++            FfSetNoc(dnoc);
++            FfStepPtr(&result);
++            FfSetNoc(rnoc);
++            FfStepPtr(&x);
++        }
++    }
++  return dest;
++}
++
++/**
++   Add left*right to dest.
++
++   It is assumed that "dest->Matrix" is allocated (with the correct field and dimensions as well), so that we
++   can write the result into it. Moreover, the chunk of memory pointed at by dest MUST be disjoint
++   from the chunks for left and right!
++
++   Dimensions are not tested!
++**/
++MatrixWindow_t *WindowAddMul(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
++{
++    PTR x, y, result;
++    long i;
++
++    FfSetField(left->Matrix->Field);
++    x = left->ULCorner;
++    y = right->ULCorner;
++    result = dest->ULCorner;
++
++    for (i = dest->Nor; i != 0; --i)
++    {
++      /* Set the noc of the surrounding matrix of the right factor,
++     which is assumed by zmaprow_window */
++      FfSetNoc(right->Matrix->Noc);
++      FfAddMapRowWindow(x, y, right->Nor, result, right->RowSize);
++      /* We want to step to the next line of the left factor */
++      FfSetNoc(left->Matrix->Noc);
++      FfStepPtr(&x);
++      /* We want to step to the next line of the result */
++      FfSetNoc(dest->Matrix->Noc);
++      FfStepPtr(&result);
++    }
++    /*
++      dest->RowSize = right->RowSize;
++      dest->Nor = left->Nor;
++    */
++    return dest;
++}
++
++inline void MatrixToWindow (MatrixWindow_t *out, const Matrix_t *M, long nor, long rowsize, PTR p)
++/* presumably M will be freed separately. Hence, use free(...) to free
++   the result of this function
++*/
++{
++  out->Matrix = M;
++  out->Nor = nor;
++  out->RowSize = rowsize;
++  out->ULCorner = p;
++}
++
++/**
++ ** Multiply matrix windows
++ ** This function multiplies @em A_win from the right by @em B_win and writes
++ ** the result into @em dest_win.
++ ** The matrix windows must be compatible for multiplication, i.e. they must be over
++ ** the same field, and the number of columns of @em A_win must be equal to the
++ ** number of rows of @em B_win.
++ ** Moreover, it is assumed that @em dest_win is allocated in the right dimensions.
++ ** Since parts of @em dest_win are used to store temporary results, it is essential
++ ** that @em dest_win initially is zero!
++ ** @param[out] dest_win Result.
++ ** @param A_win Left factor.
++ ** @param B_win Right factor
++ ** @return The function returns 0 on success and a nonzero value on error.
++ **/
++
++int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t *B_win)
++{
++  FfSetField(A_win->Matrix->Field);
++  int MPL = MPB*sizeof(long);
++  int full_nrow_cutoff = cutoff*MPL;
++  /* Determine the size of submatrices in divide-and-conquer */
++  /**
++   * Note that the rowsize is given in the unit "long".
++   * Generally we have trailing padding empty bytes. We have to cut
++   * so that two full blocks fit into the non-padded area. This is what we do:
++   * - We halve the number of rows of A (rounded down).
++   * - We halve the rowsize of B (rounded down) , since padding doesn't matter here.
++   * - We determine how many FULL longs fit into a *row* (of A) of B->Nor items.
++   *   Half of it (rounded down) gives the rowsize of A's submatrices.
++   * - From that rowsize, we obtain the corresponding number of rows of
++   *   B's submatrices.
++   **/
++  /*
++  printf("we start with A_win\n");
++  WindowShow(A_win);
++  */
++  int A_sub_nrows = A_win->Nor/2;
++  size_t B_sub_rowsize = B_win->RowSize/2;
++  size_t A_sub_rowsize = (B_win->Nor/MPL)/2;
++  int B_sub_nrows = A_sub_rowsize*MPL;
++  /*printf("A_sub_nrows %d\nA_subrowsize %d\nB_sub_nrows %d\nB_sub_rowsize %d\n", A_sub_nrows,A_sub_rowsize,B_sub_nrows,B_sub_rowsize);*/
++
++  /* If the submatrices were too small, we use school book multiplication */
++  if ((A_sub_nrows < full_nrow_cutoff) ||
++      (B_sub_nrows < full_nrow_cutoff) ||
++      (A_sub_rowsize < cutoff) ||
++      (B_sub_rowsize < cutoff))
++    {
++      /* The ambient matrix of dest_win is supposed to be empty. Thus, we add rather than overwrite */
++      /* printf("Classical for %d x %d and %d x %d\n", A_win->Nor, A_win->RowSize*MPB*sizeof(long), B_win->Nor, B_win->RowSize*MPB*sizeof(long));*/
++      WindowAddMul(dest_win, A_win, B_win);
++      return 0;
++    }
++  /* printf("Strassen step for %d x %d and %d x %d\n", A_win->Nor, A_win->RowSize*MPB*sizeof(long), B_win->Nor, B_win->RowSize*MPB*sizeof(long));*/
++  size_t B_sub_rowsize2 = B_sub_rowsize + B_sub_rowsize;
++  size_t A_sub_rowsize2 = A_sub_rowsize + A_sub_rowsize;
++  size_t B_sub_rowsize2b = B_sub_rowsize2*sizeof(long); /* size in byte */
++  size_t A_sub_rowsize2b = A_sub_rowsize2*sizeof(long);
++  int B_sub_nrows2 = B_sub_nrows + B_sub_nrows;
++  int A_sub_nrows2 = A_sub_nrows + A_sub_nrows;
++
++  Matrix_t *A, *B, *dest;
++  A = A_win->Matrix;
++  B = B_win->Matrix;
++  dest = dest_win->Matrix;
++
++  /* Because of rounding, there are stripes on the right
++   * and the lower boundary that are not part of the
++   * clean divide-and-conquer algorithm.
++   * */
++  int A_nrows_rem = A_win->Nor - A_sub_nrows2;
++  size_t A_rowsize_rem = A_win->RowSize - A_sub_rowsize2;
++
++  int B_nrows_rem = B_win->Nor - B_sub_nrows2;
++  size_t B_rowsize_rem = B_win->RowSize - B_sub_rowsize2;
++
++  /* ----------------------------------------------------
++   * Allocate temporary space.
++   * We use a schedule introduced by Douglas-Heroux-Slishman-Smith
++   * (see also Boyer-Pernet-Zhou, "Memory efficient scheduling of
++   * Strassen-Winograd's matrix multiplication algorithm", Table 1).
++     ---------------------------------------------------- */
++
++  MatrixWindow_t *X, *Y;
++  if (A_sub_rowsize>B_sub_rowsize)
++    {
++      X = WindowAlloc(A->Field, A_sub_nrows, A_sub_rowsize); }
++  else
++    {
++      X = WindowAlloc(A->Field, A_sub_nrows, B_sub_rowsize); }
++  if (X == NULL)
++  {  MTX_ERROR1("Error allocating a temporary window: %E",MTX_ERR_NOMEM);
++     return 1;
++  }
++  Y = WindowAlloc(A->Field, B_sub_nrows, B_sub_rowsize);
++  if (Y == NULL)
++    {
++      WindowFree(X);
++      MTX_ERROR1("Error allocating a temporary window: %E",MTX_ERR_NOMEM);
++      return 1;
++    }
++
++  /* Define the sub-windows of A, B and dest */
++  /*
++  printf("original windows\n");
++  printf("A\n");
++  WindowShow(A_win);
++  printf("B\n");
++  WindowShow(B_win);
++  printf("dest\n");
++  WindowShow(dest_win);
++  printf("scratch X\n");
++  WindowShow(X);
++  printf("scratch Y\n");
++  WindowShow(Y);
++  */
++  FfSetNoc(A->Noc);
++  MatrixWindow_t A00[1], A01[1], A10[1], A11[1], B00[1], B01[1], B10[1], B11[1];
++  MatrixWindow_t A_last_col[1], A_last_row[1];
++  MatrixWindow_t B_last_col[1], B_last_row[1], B_bulk[1];
++  MatrixWindow_t dest_last_col[1], dest_last_row[1], dest_bulk[1];
++  MatrixToWindow(A00, A, A_sub_nrows, A_sub_rowsize, A_win->ULCorner);
++  MatrixToWindow(A01, A, A_sub_nrows, A_sub_rowsize, (PTR)((char*)(A_win->ULCorner)+A_sub_rowsize*sizeof(long)));
++  MatrixToWindow(A10, A, A_sub_nrows, A_sub_rowsize, FfGetPtr(A_win->ULCorner, A_sub_nrows));
++  MatrixToWindow(A11, A, A_sub_nrows, A_sub_rowsize,
++          (PTR)((char*)(A_win->ULCorner)+(A_sub_nrows*FfCurrentRowSize+A_sub_rowsize*sizeof(long))));
++  /*
++  printf("A00\n");
++  WindowShow(A00);
++  printf("A01\n");
++  WindowShow(A01);
++  printf("A10\n");
++  WindowShow(A10);
++  printf("A11\n");
++  WindowShow(A11);
++  */
++  FfSetNoc(B->Noc);
++  MatrixToWindow(B00, B, B_sub_nrows, B_sub_rowsize, B_win->ULCorner);
++  MatrixToWindow(B01, B, B_sub_nrows, B_sub_rowsize, (PTR)((char*)(B_win->ULCorner)+B_sub_rowsize*sizeof(long)));
++  MatrixToWindow(B10, B, B_sub_nrows, B_sub_rowsize, FfGetPtr(B_win->ULCorner, B_sub_nrows));
++  MatrixToWindow(B11, B, B_sub_nrows, B_sub_rowsize,
++          (PTR)((char*)(B_win->ULCorner)+(B_sub_nrows*FfCurrentRowSize+B_sub_rowsize*sizeof(long))));
++  /*
++  printf("B00\n");
++  WindowShow(B00);
++  printf("B01\n");
++  WindowShow(B01);
++  printf("B10\n");
++  WindowShow(B10);
++  printf("B11\n");
++  WindowShow(B11);
++  */
++  FfSetNoc(dest->Noc);  // since we may multiply into X, the size is not necessarily the same as for B.
++  PTR dest00 = dest_win->ULCorner;
++  PTR dest01 = (PTR)((char*)(dest_win->ULCorner)+B_sub_rowsize*sizeof(long));
++  PTR dest10 = FfGetPtr(dest_win->ULCorner,A_sub_nrows);
++  PTR dest11 = (PTR)((char*)(dest_win->ULCorner)+(A_sub_nrows*FfCurrentRowSize)+B_sub_rowsize*sizeof(long));
++
++  /* Matrix windows containing temporary results */
++  MatrixWindow_t S0[1], S1[1], S2[1], S3[1], T0[1], T1[1], T2[1], T3[1], P0[1], P1[1], P2[1], P3[1], P4[1], P5[1], P6[1], U0[1], U1[1], U2[1], U3[1], U4[1], U5[1], U6[1];
++
++  /* 1.  S2 = A00-A10 in X */
++  S2->Nor = A_sub_nrows;
++  S2->RowSize = A_sub_rowsize;
++  S2->Matrix = X->Matrix;
++  S2->ULCorner = X->ULCorner;
++  WindowDif(S2, A00, A10);
++  /*
++  printf("1.  S2 = A00-A10 in X\n");
++  WindowShow(X);
++  printf("resp.\n");
++  WindowShow(S2);
++  */
++
++  /* 2.  T2 = B11-B01 in Y */
++  T2->Nor = B_sub_nrows;
++  T2->RowSize = B_sub_rowsize;
++  T2->Matrix = Y->Matrix;
++  T2->ULCorner = Y->ULCorner;
++  WindowDif(T2, B11, B01);
++  /*
++  printf("2.  T2 = B11-B01 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 3.  P6 = S2*T2 in dest10 */
++  P6->Nor = A_sub_nrows;
++  P6->RowSize = B_sub_rowsize;
++  P6->Matrix = dest;
++  P6->ULCorner = dest10;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P6, S2, T2)) return 1;
++  /*
++  printf("3.  P6 = S2*T2 in dest10\n");
++  WindowShow(dest_win);
++  */
++
++  /* 4.  S0 = A10+A11 in X */
++  S0->Nor = A_sub_nrows;
++  S0->RowSize = A_sub_rowsize;
++  S0->Matrix = X->Matrix;
++  S0->ULCorner = X->ULCorner;
++  WindowSum(S0, A10, A11);
++  /*
++  printf("4.  S0 = A10+A11 in X\n");
++  WindowShow(X);
++  */
++
++  /* 5.  T0 = B01-B00 in Y */
++  T0->Nor = B_sub_nrows;
++  T0->RowSize = B_sub_rowsize;
++  T0->Matrix = Y->Matrix;
++  T0->ULCorner = Y->ULCorner;
++  WindowDif(T0, B01, B00);
++  /*
++  printf("5.  T0 = B01-B00 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 6.  P4 = S0*T0 in dest11 */
++  P4->Nor = A_sub_nrows;
++  P4->RowSize = B_sub_rowsize;
++  P4->Matrix = dest;
++  P4->ULCorner = dest11;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P4, S0, T0)) return 1;
++  /*
++  printf("6.  P4 = S0*T0 in dest11\n");
++  WindowShow(dest_win);
++  */
++
++  /* 7.  S1 = S0-A00 in X */
++  S1->Nor = A_sub_nrows;
++  S1->RowSize = A_sub_rowsize;
++  S1->Matrix = X->Matrix;
++  S1->ULCorner = X->ULCorner;
++  WindowDif(S1, S0, A00);
++  /*
++  printf("7.  S1 = S0-A00 in X\n");
++  WindowShow(X);
++  */
++
++  /* 8.  T1 = B11-T0 in Y */
++  T1->Nor = B_sub_nrows;
++  T1->RowSize = B_sub_rowsize;
++  T1->Matrix = Y->Matrix;
++  T1->ULCorner = Y->ULCorner;
++  WindowDif(T1, B11, T0);
++  /*
++  printf("8.  T1 = B11-T0 in Y\n");
++  WindowShow(Y);
++  */
++
++  /* 9.  P5 = S1*T1 in dest01 */
++  P5->Nor = A_sub_nrows;
++  P5->RowSize = B_sub_rowsize;
++  P5->Matrix = dest;
++  P5->ULCorner = dest01;
++  /* dest is supposed to be empty */
++  if (StrassenStep(P5, S1, T1)) return 1;
++  /*
++  printf("9.  P5 = S1*T1 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*10.  S3 = A01-S1 in X */
++  S3->Nor = A_sub_nrows;
++  S3->RowSize = A_sub_rowsize;
++  S3->Matrix = X->Matrix;
++  S3->ULCorner = X->ULCorner;
++  WindowDif(S3, A01, S1);
++  /*
++  printf("10.  S3 = A01-S1 in X\n");
++  WindowShow(X);
++  */
++
++  /*11.  P2 = S3*B11 in dest00 */
++  P2->Nor = A_sub_nrows;
++  P2->RowSize = B_sub_rowsize;
++  P2->Matrix = dest;
++  P2->ULCorner = dest00;
++  /* That part of dest is still supposed to be empty */
++  if (StrassenStep(P2, S3, B11)) return 1;
++  /*
++  printf("11.  P2 = S3*B11 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*12.  P0 = A00*B00 in X */
++  P0->Nor = A_sub_nrows;
++  P0->RowSize = B_sub_rowsize;
++  P0->Matrix = X->Matrix;
++  P0->ULCorner = X->ULCorner;
++  /*
++     This time, the matrix we write our product to may be non-empty.
++     Hence, we clear the destination first.
++  */
++  WindowClear(P0);
++  if (StrassenStep(P0, A00, B00)) return 1;
++  /*
++  printf("12. P0 = A00*B00 in X\n");
++  WindowShow(X);
++  */
++
++  /*13.  U1 = P0+P5 in dest01 */
++  U1->Nor = A_sub_nrows;
++  U1->RowSize = B_sub_rowsize;
++  U1->Matrix = dest;
++  U1->ULCorner = dest01;
++  WindowSum(U1, P0, P5);
++  /*
++  printf("13.  U1 = P0+P5 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*14.  U2 = U1+P6 in dest10 */
++  U2->Nor = A_sub_nrows;
++  U2->RowSize = B_sub_rowsize;
++  U2->Matrix = dest;
++  U2->ULCorner = dest10;
++  WindowSum(U2, U1, P6);
++  /*
++  printf("14.  U2 = U1+P6 in dest10\n");
++  WindowShow(dest_win);
++  */
++
++  /*15.  U3 = U1+P4 in dest01 */
++  U3->Nor = A_sub_nrows;
++  U3->RowSize = B_sub_rowsize;
++  U3->Matrix = dest;
++  U3->ULCorner = dest01;
++  WindowSum(U3, U1, P4);
++  /*
++  printf("15.  U3 = U1+P4 in dest01\n");
++  WindowShow(dest_win);
++  */
++
++  /*16.  U6 = U2+P4 in dest11 (final) */
++  U6->Nor = A_sub_nrows;
++  U6->RowSize = B_sub_rowsize;
++  U6->Matrix = dest;
++  U6->ULCorner = dest11;
++  WindowSum(U6, U2, P4);
++  /*
++  printf("16.  U6 = U2+P4 in dest11 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*17.  U4 = U3+P2 in dest01 (final) */
++  U4->Nor = A_sub_nrows;
++  U4->RowSize = B_sub_rowsize;
++  U4->Matrix = dest;
++  U4->ULCorner = dest01;
++  WindowSum(U4, U3, P2);
++  /*
++  printf("17.  U4 = U3+P2 in dest01 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*18.  T3 = T1-B10 in Y */
++  T3->Nor = B_sub_nrows;
++  T3->RowSize = B_sub_rowsize;
++  T3->Matrix = Y->Matrix;
++  T3->ULCorner = Y->ULCorner;
++  WindowDif(T3, T1, B10);
++  /*
++  printf("18.  T3 = T1-B10 in Y\n");
++  WindowShow(Y);
++  */
++
++  /*19.  P3 = A11*T3 in dest00 */
++  P3->Nor = A_sub_nrows;
++  P3->RowSize = B_sub_rowsize;
++  P3->Matrix = dest;
++  P3->ULCorner = dest00;
++  /* Meanwhile dest00 is non-empty. Hence, overwrite */
++  WindowClear(P3);
++  if (StrassenStep(P3, A11, T3)) return 1;
++  /*
++  printf("19.  P3 = A11*T3 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*20.  U5 = U2-P3 in dest10 (final) */
++  U5->Nor = A_sub_nrows;
++  U5->RowSize = B_sub_rowsize;
++  U5->Matrix = dest;
++  U5->ULCorner = dest10;
++  WindowDif(U5, U2, P3);
++  /*
++  printf("20.  U5 = U2-P3 in dest10 (final)\n");
++  WindowShow(dest_win);
++  */
++
++  /*21.  P1 = A01*B10 in dest00 */
++  P1->Nor = A_sub_nrows;
++  P1->RowSize = B_sub_rowsize;
++  P1->Matrix = dest;
++  P1->ULCorner = dest00;
++  /* Again, we need to overwrite */
++  WindowClear(P1);
++  if (StrassenStep(P1, A01, B10)) return 1;
++  /*
++  printf("21.  P1 = A01*B10 in dest00\n");
++  WindowShow(dest_win);
++  */
++
++  /*22.  U0 = P0+P1 in dest00 (final) */
++  U0->Nor = A_sub_nrows;
++  U0->RowSize = B_sub_rowsize;
++  U0->Matrix = dest;
++  U0->ULCorner = dest00;
++  WindowSum(U0, P0, P1);
++  /*
++  printf("22.  U0 = P0+P1 in dest00 (final)\n");
++  WindowShow(dest_win);
++  */
++  WindowFree(X);
++  WindowFree(Y);
++
++  /* ---------------------------------------------------------
++     Deal with the leftovers on the bottom and the right wing
++     --------------------------------------------------------- */
++
++  if (B_rowsize_rem)
++    {
++      MatrixToWindow(B_last_col, B, B_win->Nor, B_rowsize_rem, (PTR)((char*)(B_win->ULCorner) + B_sub_rowsize2b));
++      MatrixToWindow(dest_last_col, dest, A_win->Nor, B_rowsize_rem, (PTR)((char*)(dest_win->ULCorner) + B_sub_rowsize2b));
++      /* that part of dest is still supposed to be empty, so we can add the product */
++      WindowAddMul(dest_last_col, A_win, B_last_col);
++    }
++  if (A_nrows_rem)
++    {
++      FfSetNoc(A->Noc);
++      MatrixToWindow(A_last_row, A, A_nrows_rem, A_win->RowSize, (PTR)((char*)(A_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++      if (B_rowsize_rem) /* We have already considered the lower right corner in the previous if-clause */
++      {
++          MatrixToWindow(B_bulk, B, B_win->Nor, B_sub_rowsize2, B_win->ULCorner);
++          FfSetNoc(dest->Noc);
++          MatrixToWindow(dest_last_row, dest, A_nrows_rem, B_sub_rowsize2, (PTR)((char*)(dest_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++          /* that part of dest is still supposed to be empty, so we can add the product */
++          WindowAddMul(dest_last_row, A_last_row, B_bulk);
++      }
++      else
++      {
++          FfSetNoc(dest->Noc);
++          MatrixToWindow(dest_last_row, dest, A_nrows_rem, B_win->RowSize, (PTR)((char*)(dest_win->ULCorner) + (A_sub_nrows2*FfCurrentRowSize)));
++          /* that part of dest is still supposed to be empty, so we can add the product */
++          WindowAddMul(dest_last_row, A_last_row, B_win);
++      }
++    }
++  if (A_rowsize_rem)
++    { /* By the above operations, we don't need to consider the lower right corner of either A or B. */
++      MatrixToWindow(A_last_col, A, A_sub_nrows2, A_rowsize_rem, (PTR)((char*)(A_win->ULCorner) + A_sub_rowsize2b));
++      FfSetNoc(B->Noc);
++      MatrixToWindow(B_last_row, B, B_nrows_rem, B_sub_rowsize2, (PTR)((char*)(B_win->ULCorner) + (B_sub_nrows2*FfCurrentRowSize)));
++      FfSetNoc(dest->Noc);
++      MatrixToWindow(dest_bulk, dest, A_sub_nrows2, B_sub_rowsize2, dest_win->ULCorner);
++      /* now we are supposed to add the product to the result obtained so far */
++      WindowAddMul(dest_bulk, A_last_col, B_last_row);
++    }
++  return 0;
++}
++
++/**
++ ** Multiply matrices
++ ** This function multiplies @em A from the right by @em B and writes
++ ** the result into @em dest.
++ ** The matrices must be compatible for multiplication, i.e. they must be over
++ ** the same field, and the number of columns of @em A must be equal to the
++ ** number of rows of @em B.
++ ** Moreover, it is assumed that @em dest is allocated in the right dimensions.
++ ** Since parts of @em dest are used to store temporary results, it is essential
++ ** that @em dest initially is zero!
++ ** @param[out] dest Result.
++ ** @param A Left factor.
++ ** @param B Right factor
++ ** @return The function returns @em dest, or NULL on error.
++ **/
++Matrix_t *MatMulStrassen(Matrix_t *dest, const Matrix_t *A, const Matrix_t *B)
++{
++  FfSetField(A->Field);
++  MatrixWindow_t A_win[1], B_win[1], dest_win[1];
++  FfSetNoc(A->Noc);
++  MatrixToWindow(A_win, A, A->Nor, LPR, A->Data);
++  FfSetNoc(B->Noc);
++  MatrixToWindow(B_win, B, B->Nor, LPR, B->Data);
++  FfSetNoc(dest->Noc);
++  MatrixToWindow(dest_win, dest, A->Nor, LPR, dest->Data);
++  if (StrassenStep(dest_win, A_win, B_win)) return NULL;
++  return dest;
++}
diff --git a/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch b/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch
new file mode 100644
index 00000000000..1959336c3fe
--- /dev/null
+++ b/build/pkgs/meataxe/patches/StrassenWinogradUsage.patch
@@ -0,0 +1,359 @@
+Use Strassen-Winograd multiplication in some MeatAxe functions.
+
+AUTHOR: Simon King 2015-09-18, simon.king@uni-jena.de 
+
+diff --git a/src/chbasis.c b/src/chbasis.c
+index 1ea9c9f..34cf886 100644
+--- a/src/chbasis.c
++++ b/src/chbasis.c
+@@ -9,6 +9,8 @@
+ 
+ 
+ #include "meataxe.h"
++#include <stdlib.h>
++#include <string.h>
+ 
+ MTX_DEFINE_FILE_INFO
+ 
+@@ -36,9 +38,6 @@ MTX_DEFINE_FILE_INFO
+ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 
+ {
+-    Matrix_t *bi;
+-    int i;
+-
+     /* Check arguments
+        --------------- */
+     if (!MrIsValid(rep))
+@@ -46,11 +45,6 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 	MTX_ERROR1("rep: %E",MTX_ERR_BADARG);
+ 	return -1;
+     }
+-    if (!MatIsValid(trans))
+-    {
+-	MTX_ERROR1("trans: %E",MTX_ERR_BADARG);
+-	return -1;
+-    }
+     if (rep->NGen <= 0)
+ 	return 0;
+     if (trans->Field != rep->Gen[0]->Field || 
+@@ -60,54 +54,50 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ 	MTX_ERROR1("%E",MTX_ERR_INCOMPAT);
+ 	return -1;
+     }
+-
+-
+-    /* Basis transformation
+-       -------------------- */
+-    if ((bi = MatInverse(trans)) == NULL) 
+-    {
+-	MTX_ERROR("Basis transformation is singular");
+-	return -1;
+-    }
+-    for (i = 0; i < rep->NGen; ++i)
+-    {
+-	Matrix_t *tmp = MatDup(trans);
+-	MatMul(tmp,rep->Gen[i]);
+-	MatMul(tmp,bi);
+-        MatFree(rep->Gen[i]);
+-	rep->Gen[i] = tmp;
+-    }
+-    MatFree(bi);
+-    return 0;
++    return ChangeBasis(trans, rep->NGen, (const Matrix_t **)(rep->Gen), rep->Gen);
+ }
+ 
+ 
+-
+-int ChangeBasisOLD(const Matrix_t *M, int ngen, const Matrix_t *gen[],
++/** Conjugate a list @em gen of @em ngen square matrices over the same
++ *  field and of the same dimensions by a mattrix @em trans
++ *  and write the result into @em newgen. If @em gen == @em newgen, then
++ *  the previous content of @em newgen will be overridden. **/
++int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[])
+ 
+ {
+-    Matrix_t *bi, *tmp;
++    Matrix_t *bi;
+     int i;
+ 
+     MTX_VERIFY(ngen >= 0);
+-    if (!MatIsValid(M))
++    if (!MatIsValid(trans))
++    {
++	MTX_ERROR1("trans: %E",MTX_ERR_BADARG);
+ 	return -1;
+-    if ((bi = MatInverse(M)) == NULL) 
++    }
++
++    if ((bi = MatInverse(trans)) == NULL)
+     {
+-	MTX_ERROR("Matrix is singular");
++	MTX_ERROR("Basis transformation is singular");
+ 	return -1;
+     }
++
++    Matrix_t *tmp = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++    size_t tmpsize = FfCurrentRowSize*trans->Nor;
+     for (i = 0; i < ngen; ++i)
+     {
+-	tmp = MatDup(M);
+-	MatMul(tmp,gen[i]);
+-	MatMul(tmp,bi);
+-	if ((const Matrix_t **)newgen == gen)
+-	    MatFree(newgen[i]);
+-	newgen[i] = tmp;
++        MTX_VERIFY(gen[i]->Nor==trans->Nor);
++        MTX_VERIFY(gen[i]->Noc==trans->Noc);
++        memset(tmp->Data, FF_ZERO, tmpsize);
++        MatMulStrassen(tmp, trans, gen[i]);
++        if ((const Matrix_t **)newgen == gen)
++            memset(newgen[i]->Data, FF_ZERO, tmpsize);
++        else
++            newgen[i] = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++        MatMulStrassen(newgen[i], tmp, bi);
+     }
+     MatFree(bi);
++    MatFree(tmp);
+     return 0;
+ }
+ 
+diff --git a/src/chop.c b/src/chop.c
+index 65a2a98..0f3f38f 100644
+--- a/src/chop.c
++++ b/src/chop.c
+@@ -538,7 +538,7 @@ static int checkspl(const MatRep_t *rep, Matrix_t *nsp)
+        ------------------------------------------------------------ */
+     sb1 = SpinUp(nsp,rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+     MTX_VERIFY(sb1 != NULL && sb1->Nor == sb1->Noc);
+-    ChangeBasisOLD(sb1,LI.NGen,(const Matrix_t **)rep->Gen,g1);
++    ChangeBasis(sb1,LI.NGen,(const Matrix_t **)rep->Gen,g1);
+     endo = MrAlloc(0,NULL,0);
+ 
+     sb2 = NULL;	/* Mark as unused */
+@@ -576,7 +576,7 @@ static int checkspl(const MatRep_t *rep, Matrix_t *nsp)
+ 	sb2 = SpinUp(v2,rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+ 	MTX_VERIFY(sb2 != NULL && sb2->Nor == sb2->Noc);
+ 	MatFree(v2);
+-	ChangeBasisOLD(sb2,rep->NGen,(const Matrix_t **)rep->Gen,g2);
++	ChangeBasis(sb2,rep->NGen,(const Matrix_t **)rep->Gen,g2);
+ 
+ 	/* Compare the two representations. If they are different,
+ 	   we know that the splitting field degree must be smaller
+@@ -762,7 +762,7 @@ static void newirred(node_t *n)
+     LI.Cf[i].spl = n->spl = n->nsp->Nor;
+     b = SpinUp(n->nsp,n->Rep,SF_FIRST|SF_CYCLIC|SF_STD,NULL,NULL);
+     MTX_VERIFY(b != NULL && b->Nor == b->Noc);
+-    ChangeBasisOLD(b,LI.NGen,(const Matrix_t **)n->Rep->Gen,n->Rep->Gen);
++    ChangeBasis(b,LI.NGen,(const Matrix_t **)n->Rep->Gen,n->Rep->Gen);
+     MatFree(b);
+ 
+     /* Write out the generators
+diff --git a/src/homcomp.c b/src/homcomp.c
+index a808089..351af2b 100644
+--- a/src/homcomp.c
++++ b/src/homcomp.c
+@@ -112,10 +112,10 @@ Matrix_t *HomogeneousPart(MatRep_t *m, MatRep_t *s, Matrix_t *npw,
+ 	{
+ 	    PTR matptr = MatGetPtr(A,j);
+ 	    int u;
+-	    a = MatDup(V[j]);
+-	    b = MatDup(s->Gen[i]);
+-	    MatMul(a,m->Gen[i]);		/* the equations that describe  */
+-	    MatMul(b,V[j]);			/* that a vector in the null-   */
++	    a = MatAlloc(V[j]->Field, V[j]->Nor, m->Gen[i]->Noc);
++	    b = MatAlloc(s->Gen[i]->Field, s->Gen[i]->Nor, V[j]->Noc);
++	    MatMulStrassen(a, V[j], m->Gen[i]);		/* the equations that describe  */
++	    MatMulStrassen(b,s->Gen[i], V[j]);			/* that a vector in the null-   */
+ 	    MatMulScalar(b,FfNeg(FF_ONE));	/* space is the first element   */
+ 	    MatAdd(a, b);			/* of a standard basis of a     */ 
+ 					/* module isomorphic to S       */
+diff --git a/src/isisom.c b/src/isisom.c
+index 790d2b0..e2b7f07 100644
+--- a/src/isisom.c
++++ b/src/isisom.c
+@@ -9,7 +9,7 @@
+ 
+ 
+ #include "meataxe.h"
+-
++#include <string.h>
+ 
+ MTX_DEFINE_FILE_INFO
+ 
+@@ -114,7 +114,7 @@ int IsIsomorphic(const MatRep_t *rep1, const CfInfo *info1,
+ {
+     int j;
+     WgData_t *wg;
+-    Matrix_t  *word, *m, *seed, *b, *bi;
++    Matrix_t  *word, *m, *seed, *b, *g1, *g2;
+     int result;
+ 
+     if (CheckArgs(rep1->NGen,rep1->Gen,info1,rep2->Gen,use_pw) != 0)
+@@ -148,27 +148,35 @@ int IsIsomorphic(const MatRep_t *rep1, const CfInfo *info1,
+ 	MatFree(b);
+ 	return 0;
+     }
+-    bi = MatInverse(b);
+ 
+     /* Compare generators
+        ------------------ */
++    /**
++     * We test whether b*rep2_j*b^-1 == rep1_j
++     * by testing whether b*rep2_j == rep1_j*b
++     * */
++    g1 = MatAlloc(b->Field, b->Nor, b->Noc);
++    g2 = MatAlloc(b->Field, b->Nor, b->Noc);
++    size_t memsize = FfCurrentRowSize*b->Nor;
+     for (j = 0, result = 0; result == 0 && j < rep2->NGen; ++j)
+     {
+-	Matrix_t *g = MatDup(b);
+-	MatMul(g,rep2->Gen[j]);
+-	MatMul(g,bi);
+-	if (MatCompare(g,rep1->Gen[j]) != 0)
+-	    result = 1;
+-	MatFree(g);
++	MatMulStrassen(g2, b, rep2->Gen[j]);
++	MatMulStrassen(g1, rep1->Gen[j], b);
++	if (MatCompare(g1, g2) != 0)
++	    {   result = 1;
++            break;
++        }
++	memset(g1->Data, FF_ZERO, memsize);
++    memset(g2->Data, FF_ZERO, memsize);
+     }
+ 
+     /* Clean up 
+        -------- */
+     if (trans != NULL && result == 0)
+-	*trans = b;
++        *trans = b;
+     else
+-	MatFree(b);
+-    MatFree(bi);
+-
++        MatFree(b);
++    MatFree(g1);
++    MatFree(g2);
+     return (result == 0);
+ }
+diff --git a/src/meataxe.h b/src/meataxe.h
+index e2f5a84..5123f1c 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -1096,11 +1096,7 @@ int LdFree(LdLattice_t *l);
+ int LdAddIncidence(LdLattice_t *lat, int sub, int sup);
+ int LdSetPositions(LdLattice_t *l);
+ 
+-
+-
+-
+-/* OLD STUFF */
+-int ChangeBasisOLD(const Matrix_t *M, int ngen, const Matrix_t *gen[],
++int ChangeBasis(const Matrix_t *M, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[]);
+ 
+ 
+diff --git a/src/mktree.c b/src/mktree.c
+index ede7881..3e99489 100644
+--- a/src/mktree.c
++++ b/src/mktree.c
+@@ -213,8 +213,8 @@ static int MakeTree()
+ 	{
+ 	    /* Calculate next element
+ 	       ---------------------- */
+-	    Matrix_t *newelem = MatDup(Elms[src].Matrix);
+-	    MatMul(newelem,Rep->Gen[g]);
++	    Matrix_t *newelem = MatAlloc(Elms[src].Matrix->Field, Elms[src].Matrix->Nor, Rep->Gen[g]->Noc);
++	    MatMulStrassen(newelem, Elms[src].Matrix, Rep->Gen[g]);
+ 
+ 	    /* If it is new, add to tree, else discard
+ 	       --------------------------------------- */
+diff --git a/src/precond.c b/src/precond.c
+index f144716..efc2f3d 100644
+--- a/src/precond.c
++++ b/src/precond.c
+@@ -391,8 +391,8 @@ static void MakePQ(int n, int mj, int nj)
+ 	for (k = 0; k < spl; ++k)
+ 	{
+ 	    FEL f;
+-	    Matrix_t *x = MatDup(endo[i]);  
+-	    MatMul(x,endo[k]);
++	    Matrix_t *x = MatAlloc(endo[i]->Field, endo[i]->Nor, endo[k]->Noc);  
++	    MatMulStrassen(x,endo[i],endo[k]);
+ 	    f = MatTrace(x);
+ 	    FfInsert(pptr,k,f);
+ 	    MatFree(x);
+diff --git a/src/pseudochop.c b/src/pseudochop.c
+index 68cadae..3f1fa97 100644
+--- a/src/pseudochop.c
++++ b/src/pseudochop.c
+@@ -105,8 +105,8 @@ int main(int argc, const char *argv[])
+         {
+ 	    Matrix_t *newmat;
+             oldnul = newnul;
+-            newmat = MatDup(old);
+-            MatMul(newmat,old);
++            newmat = MatAlloc(old->Field, old->Nor, old->Noc);
++            MatMulStrassen(newmat, old, old);
+             MatFree(old);
+             MatFree(nulsp);
+             old = MatDup(newmat);
+diff --git a/src/pwkond.c b/src/pwkond.c
+index c14c20e..5eaa5de 100644
+--- a/src/pwkond.c
++++ b/src/pwkond.c
+@@ -309,8 +309,8 @@ static void gkond(const Lat_Info *li, int i, Matrix_t *b, Matrix_t *k,
+     char fn[LAT_MAXBASENAME+10];
+     Matrix_t *x1, *x2;
+ 
+-    x1 = MatDup(k);
+-    MatMul(x1,w);
++    x1 = MatAlloc(k->Field, k->Nor, w->Noc);
++    MatMulStrassen(x1, k, w);
+     x2 = QProjection(b,x1);
+     sprintf(fn,"%s%s.%s",li->BaseName,Lat_CfName(li,i),name);
+     MatSave(x2,fn);
+@@ -340,7 +340,7 @@ static void Standardize(int cf)
+     MESSAGE(0,("  Transforming to standard basis\n"));
+     sb = SpinUp(CfList[cf].PWNullSpace,CfList[cf].Gen,
+ 	SF_FIRST|SF_CYCLIC|SF_STD,&script,NULL);
+-    ChangeBasisOLD(sb,CfList[cf].Gen->NGen,
++    ChangeBasis(sb,CfList[cf].Gen->NGen,
+ 	(const Matrix_t **)CfList[cf].Gen->Gen,std);
+     MatFree(sb);
+ 
+@@ -782,7 +782,7 @@ static int try2(long w, FEL f)
+ 	    	MESSAGE(3,("failed\n"));
+ 		return -1;  /* Nullity should be 0 */
+ 	    }
+-	    nul = MatNullity__(MatMul(MatDup(word),word));
++	    nul = MatNullity__(MatMulStrassen(MatAlloc(word->Field, word->Nor, word->Noc), word, word));
+ 	    if (nul != CfList[i].Info->spl)
+ 	    {
+ 		MatFree(word);
+@@ -915,7 +915,7 @@ static int try_p(long w)
+ 	       /* Check if the nullity is stable
+ 	          ------------------------------ */
+ 	       wp = MatInsert(word,mp->Factor[k]);
+-	       wp2 = MatMul(MatDup(wp),wp);
++	       wp2 = MatMulStrassen(MatAlloc(wp->Field, wp->Nor, wp->Noc), wp, wp);
+ 	       MatFree(wp);
+ 	       nul = MatNullity__(wp2);
+ 	       if (nul != CfList[i].Info->spl) 
+diff --git a/src/soc.c b/src/soc.c
+index 789a02b..199a2e0 100644
+--- a/src/soc.c
++++ b/src/soc.c
+@@ -294,8 +294,8 @@ static int NextLayer()
+ 	Matrix_t *mat, *stgen;
+ 
+ 	mat = MatCutRows(basis,basis->Nor - Dimension,Dimension);
+-	stgen = MatDup(bas);
+-	MatMul(stgen, mat);
++	stgen = MatAlloc(bas->Field, bas->Nor, mat->Noc);
++	MatMulStrassen(stgen, bas, mat);
+ 	MatCopyRegion(basis,basis->Nor - Dimension,0,stgen,0,0,Dimension,-1);
+ 	MatFree(mat);
+ 	MatFree(stgen);

From 17266791b4e1d8b79cbabccce7fa48a8f44bcacb Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 13 Sep 2015 10:56:13 +0200
Subject: [PATCH 14/23] A very basic MeatAxe Cython wrapper

---
 src/module_list.py                    |    5 +
 src/sage/libs/meataxe.pxd             |  128 +++
 src/sage/matrix/matrix_gfpn_dense.pxd |   31 +
 src/sage/matrix/matrix_gfpn_dense.pyx | 1101 +++++++++++++++++++++++++
 src/sage/matrix/matrix_space.py       |   30 +-
 5 files changed, 1286 insertions(+), 9 deletions(-)
 create mode 100644 src/sage/libs/meataxe.pxd
 create mode 100644 src/sage/matrix/matrix_gfpn_dense.pxd
 create mode 100644 src/sage/matrix/matrix_gfpn_dense.pyx

diff --git a/src/module_list.py b/src/module_list.py
index 66e432267ee..f49165d4392 100644
--- a/src/module_list.py
+++ b/src/module_list.py
@@ -903,6 +903,11 @@ def uname_specific(name, value, alternative):
     Extension('sage.matrix.matrix_window',
               sources = ['sage/matrix/matrix_window.pyx']),
 
+    OptionalExtension("sage.matrix.matrix_gfpn_dense",
+              sources = ['sage/matrix/matrix_gfpn_dense.pyx'],
+              libraries = ['mtx'],
+              package = 'meataxe'),
+
     Extension('sage.matrix.misc',
               sources = ['sage/matrix/misc.pyx'],
               libraries=['mpfr']),
diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
new file mode 100644
index 00000000000..2b413533c4a
--- /dev/null
+++ b/src/sage/libs/meataxe.pxd
@@ -0,0 +1,128 @@
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+#
+# Import SOME features from meataxe.h
+# (most types are not needed, but listed here
+# in the comments, for completeness)
+#
+cdef extern from "meataxe.h":
+    # general ctype emulations
+    # ctypedef int size_t   # size_t should be a standard type!
+    ctypedef unsigned long Ulong
+    ctypedef unsigned short Ushort
+    ctypedef unsigned char Uchar
+    ctypedef unsigned char FEL
+    ctypedef FEL *PTR
+
+    # global constants
+    cdef extern int FfOrder             # Current field order
+    cdef extern int FfChar              # Current characteristic
+    cdef extern FEL FfGen               # Generator
+    cdef extern int FfNoc               # Number of columns for row ops
+    cdef extern size_t FfCurrentRowSize # The byte size of a single row in memory,
+                                        # always a multiple of sizeof(long)
+    cdef extern size_t FfCurrentRowSizeIo # The number of bytes actually used in a row.
+    cdef extern char MtxLibDir[250]     # Where to search/create multiplication tables
+
+    # we only wrap MeatAxe for small fields (size < 255)
+    cdef extern FEL mtx_tmult[256][256]
+    cdef extern FEL mtx_tadd[256][256]
+    cdef extern FEL mtx_taddinv[256]
+    cdef extern FEL mtx_tmultinv[256]
+    cdef extern FEL mtx_tinsert[8][256]
+    cdef extern FEL mtx_textract[8][256]
+    cdef extern FEL FF_ONE, FF_ZERO
+
+#########################################
+# function prototypes
+    ## global parameters
+    size_t FfRowSize(int noc)
+    size_t FfTrueRowSize(int noc) # Difference to FfRowSize: Doesn't count padding bytes
+    int FfSetField(int field)
+    int FfSetNoc(int ncols)
+
+    ## Finite Fields
+    # FEL FfAdd(FEL a,FEL b)
+    # FEL FfSub(FEL a, FEL b)
+    # FEL FfNeg(FEL a)
+    # FEL FfMul(FEL a, FEL b)
+    # FEL FfDiv(FEL a, FEL b)
+    # FEL FfInv(FEL a)
+    # FEL FfEmbed(FEL a, int subfield)
+    # FEL FfRestrict(FEL a, int subfield)
+    FEL FfFromInt(int l)
+    int FfToInt(FEL f)
+
+    ## Rows
+    void FfMulRow(PTR row, FEL mark)
+    # void FfAddMulRow(PTR dest, PTR src, FEL f)
+    PTR FfAddRow(PTR dest, PTR src)
+    FEL FfExtract(PTR row, int col)
+    void FfInsert(PTR row, int col, FEL mark)
+    int FfFindPivot(PTR row, FEL *mark)
+    # FEL FfScalarProduct(PTR a, PTR b)
+    # void FfSwapRows(PTR dest, PTR src)
+    # void FfPermRow(PTR row, long *perm, PTR result)
+    # int FfCmpRows(PTR p1, PTR p2)
+
+    ## multiple rows
+    PTR FfAlloc(int nor)
+    void FfExtractColumn(PTR mat,int nor,int col,PTR result)
+    int FfStepPtr(PTR *x)  # Advance to next row
+    PTR FfGetPtr(PTR base, int row)  # Advance to "row" rows after base
+    void FfInsert(PTR row, int col, FEL mark)
+    void FfMapRow(PTR row, PTR matrix, int nor, PTR result)
+
+    ############
+    ## Skip: Application, error handling, i/o
+
+    ############
+    ## Matrices
+    ############
+    ctypedef struct Matrix_t:
+        unsigned long Magic         #/* Used internally */
+        int Field, Nor, Noc     #/* Field, #rows, #columns */
+        PTR Data            #/* Pointer to data area */
+        int RowSize                     # Size (in bytes) of one row
+        int *PivotTable                 # Pivot table (if matrix is in echelon form
+    ## Basic memory operations
+    Matrix_t *MatAlloc(int field, int nor, int noc)
+    int MatFree(Matrix_t *mat)
+    PTR MatGetPtr(Matrix_t *mat, int row)
+    int MatCompare(Matrix_t *a, Matrix_t *b)
+    # int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols)
+    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols)
+    # Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows)
+    Matrix_t *MatDup(Matrix_t *src)
+    Matrix_t *MatId(int fl, int nor)
+    Matrix_t *MatLoad(char *fn)
+    int MatSave(Matrix_t *mat, char *fn)
+
+
+    ## Basic Arithmetic  ## general rule: dest is changed, src/mat are unchanged!
+    Matrix_t *MatTransposed(Matrix_t *src)
+    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src)
+    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff)
+    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src)
+    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
+    Matrix_t *MatPower(Matrix_t *mat, long n)
+    FEL MatTrace(Matrix_t *mat)
+    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B)
+    void StrassenSetCutoff(size_t size)
+
+    ## "Higher" Arithmetic ## all arguments are unchanged
+    # int MatClean(Matrix_t *mat, Matrix_t *sub)
+    int MatEchelonize(Matrix_t *mat)
+    int MatOrder(Matrix_t *mat)
+    long MatNullity(Matrix_t *mat)
+    Matrix_t *MatInverse(Matrix_t *src)
+    Matrix_t *MatNullSpace(Matrix_t *mat)
+# thats's all of meataxe.h !
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
new file mode 100644
index 00000000000..61025adf6a3
--- /dev/null
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -0,0 +1,31 @@
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+cdef class FieldConverter_class:
+    cdef object field  # that's a function converting an int to a field element
+    cdef object int_to_field(self, int x)
+    cdef int field_to_int(self, x)
+
+from sage.matrix.matrix_dense cimport Matrix_dense
+from sage.structure.element cimport Matrix
+from sage.libs.meataxe cimport *
+
+cdef class Matrix_gfpn_dense(Matrix_dense):
+    cdef Matrix_t *Data
+    cdef FieldConverter_class _converter
+    #cpdef Matrix_gfpn_dense normalized(Matrix_gfpn_dense self)
+    #cpdef Matrix_gfpn_dense semi_echelon(Matrix_gfpn_dense self)
+    #cpdef int nullity(Matrix_gfpn_dense self)
+    #cpdef tuple lead(self)
+    cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
+    cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
+    cdef Matrix _matrix_times_matrix_(self, Matrix right)
+    cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right)
+    cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=*)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
new file mode 100644
index 00000000000..6021c554828
--- /dev/null
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -0,0 +1,1101 @@
+r"""
+Dense Matrices over `\mathbb F_q`, with `q<255` odd and not prime
+
+This module is a wrapper for version 2.4.24 of the Aachen
+`C-MeatAxe <http://www.math.rwth-aachen.de/homes/MTX/download.html>`_,
+improved by an implementation of the Winograd-Strassen multiplication
+algorithm. It provides matrices over the finite field `\mathbb F_q`,
+where `q\le 255` is odd and not prime.
+
+AUTHORS:
+
+- Simon King (2015-09-18): initial version
+
+"""
+
+#*****************************************************************************
+#       Copyright (C) 2015 Simon King <simon.king@uni-jena.de>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  http://www.gnu.org/licenses/
+#*****************************************************************************
+
+
+## Define an environment variable that enables MeatAxe to find
+## its multiplication tables.
+
+from sage.env import DOT_SAGE
+import os
+cdef extern from "Python.h":
+    object PyString_FromStringAndSize(char *s, Py_ssize_t len)
+    char* PyString_AsString(object string)
+MtxLibDir = PyString_AsString(os.path.join(DOT_SAGE,'meataxe'))
+
+####################
+#
+# import sage types
+#
+####################
+
+from sage.rings.integer import Integer
+from sage.rings.finite_rings.constructor import GF
+from sage.rings.finite_rings.integer_mod import IntegerMod_int
+from sage.matrix.constructor import random_matrix
+from sage.rings.arith import is_prime_power, factor
+from sage.matrix.matrix_space import MatrixSpace
+from sage.misc.randstate import current_randstate
+from sage.misc.cachefunc import cached_method, cached_function
+from sage.structure.element cimport Element, ModuleElement, RingElement, Matrix
+
+include 'sage/ext/stdsage.pxi'
+
+####################
+#
+# auxiliary functions
+#
+####################
+import sys
+from libc.string cimport memcpy
+
+cdef inline int setfield(long n) except -1:
+    # This is a wrapper around FfSetField, but
+    # we guard it against MTX_Error, which would immediately
+    # crash the Sage session.
+    if n == FfOrder:
+        return 0
+    if not (0 < n < 255 and is_prime_power(n)):
+        raise ValueError("Only finite fields of order at most 255 are supported")
+    return FfSetField(n)
+
+# Fast conversion from field to int and int to field
+cdef class FieldConverter_class:
+    """
+    An auxiliary class, used to convert between <int> and finite field element
+
+    This class is for non-prime fields only. The method
+    :meth:`int_to_field` exists for speed. The method
+    :meth:`field_to_int` exists in order to have a common interface
+    for elements of prime and non-prime fields; see
+    :class:`PrimeFieldConverter_class`.
+
+    EXAMPLE::
+
+        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+        sage: F.<y> = GF(125)
+        sage: C = FieldConverter_class(F)
+        sage: C.int_to_field(15)
+        3*y
+        sage: F.fetch_int(15)
+        3*y
+        sage: %timeit C.int_to_field(15)    #not tested
+        625 loops, best of 3: 1.04 µs per loop
+        sage: %timeit F.fetch_int(15)       #not tested
+        625 loops, best of 3: 3.97 µs per loop
+        sage: C.field_to_int(y)
+        5
+        sage: y.integer_representation()
+        5
+
+    """
+    def __init__(self, field):
+        """
+        INPUT:
+
+        A finite *non-prime* field. This assumption is not tested.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.int_to_field(15)
+            3*y
+            sage: F.fetch_int(15)
+            3*y
+            sage: C.field_to_int(y)
+            5
+            sage: y.integer_representation()
+            5
+
+        """
+        self.field = field._cache.fetch_int
+    cdef object int_to_field(self, int x):
+        """
+        Fetch a python int into the field.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.int_to_field(15)
+            3*y
+            sage: F.fetch_int(15)
+            3*y
+
+        """
+        return self.field(x)
+    cdef int field_to_int(self, x):
+        """
+        Represent a field element by a python int.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: F.<y> = GF(125)
+            sage: C = FieldConverter_class(F)
+            sage: C.field_to_int(y)
+            5
+            sage: y.integer_representation()
+            5
+
+        """
+        return x.integer_representation()
+
+cdef class PrimeFieldConverter_class(FieldConverter_class):
+    """
+    An auxiliary class, used to convert between <int> and finite field element
+
+    This class is for prime fields only. The methods
+    :meth:`int_to_field` and :meth:`field_to_int` exist in order to
+    have a common interface for elements of prime and non-prime fields;
+    see :class:`FieldConverter_class`.
+
+    EXAMPLE::
+
+        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+        sage: F = GF(5)
+        sage: C = PrimeFieldConverter_class(F)
+        sage: C.int_to_field(int(2))
+        2
+        sage: F(2)
+        2
+        sage: C.field_to_int(F(2))
+        2
+        sage: int(F(2))
+        2
+
+    """
+    def __init__(self, field):
+        """
+        INPUT:
+
+        A finite *prime* field. This assumption is not tested.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.int_to_field(int(2))
+            2
+            sage: F(2)
+            2
+            sage: C.field_to_int(F(2))
+            2
+            sage: int(F(2))
+            2
+
+        """
+        self.field = field
+    cdef object int_to_field(self, int x):
+        """
+        Fetch a python int into the field.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.int_to_field(int(2))
+            2
+            sage: F(2)
+            2
+
+        """
+        return IntegerMod_int(self.field, x)
+    cdef int field_to_int(self, x):
+        """
+        Represent a field element by a python int.
+
+        EXAMPLE::
+
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: F = GF(5)
+            sage: C = PrimeFieldConverter_class(F)
+            sage: C.field_to_int(F(2))
+            2
+            sage: int(F(2))
+            2
+
+        """
+        return int(x)
+
+cdef dict _converter_cache = {}
+cdef FieldConverter_class FieldConverter(field):
+    """
+    Return a :class:`FieldConverter_class` or :class:`PrimeFieldConverter_class` instance,
+    depending whether the field is prime or not.
+
+    EXAMPLE::
+
+        sage: MS = MatrixSpace(GF(5^3,'y'),2)
+        sage: A = MS.random_element()
+        sage: A*2 == A+A    # indirect doctest
+        True
+        sage: A = MS.random_element()
+        sage: A*2 == A+A
+        True
+
+    """
+    try:
+        return _converter_cache[field]
+    except KeyError:
+        if field.is_prime_field():
+            return _converter_cache.setdefault(field, PrimeFieldConverter_class(field))
+        return _converter_cache.setdefault(field, FieldConverter_class(field))
+
+
+
+cdef class Matrix_gfpn_dense(Matrix_dense):
+    r"""
+    Dense matrices over `\mathbb F_q`, `q<255` odd and not prime.
+
+    NOTE:
+
+    This class uses a major modification of the Aachen C-MeatAxe
+    as backend. In principle, it would also work for prime fields
+    and in characteristic two. However, other matrices in Sage,
+    relying on linbox, m4ri or m4rie, are more efficient in these
+    cases.
+
+    EXAMPLES::
+
+        sage: M = MatrixSpace(GF(25,'z'),2,3)([1,2,3,4,5,6])
+        sage: print M
+        [1 2 3]
+        [4 0 1]
+        sage: type(M)
+        <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+
+    The documentation of the ``__init__`` methods shows further
+    ways of creating a :class:`Matrix_gfpn_dense` instance.
+    However, these should only be of internal use.
+
+    """
+##################
+## Init, Dealloc, Copy
+    def __cinit__(self, parent=None, entries=None, *args, **kwds):
+        """
+        TESTS::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # indirect doctest
+            []
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)
+            [0 0 0 0]
+            [0 0 0 0]
+            [0 0 0 0]
+            [0 0 0 0]
+
+        """
+        if parent is None:  # this makes Matrix_gfpn_dense.__new__(Matrix_gfpn_dense) work,
+                            # returning a non-initialised matrix
+            return
+        if isinstance(parent, basestring): # this allows to provide a file when initialising a matrix
+            return
+        cdef int f = parent.base_ring().order()
+        cdef int nrows = parent.nrows()
+        cdef int ncols = parent.ncols()
+        self.Data = MatAlloc(f, nrows, ncols)
+
+    def __init__(self, parent, data=None, mutable=True, copy=False, coerce=False):
+        """
+        Matrix extension class using libmeataxe as backend
+
+        INPUT:
+
+        Instances of this class can be created by providing one of
+        the following input data, where ``q<255`` is a prime power,
+        ``m,n`` are non-negative integers, and `a_{11},...,a_{mn}`
+        can be coerced into ``GF(q)``. Note that a user should
+        create these instances via the matrix constructors; what
+        we explain here is for internal use only!
+
+        - None => empty matrix over an unspecified field (used for unpickling)
+        - a string ``f`` ==> load matrix from the file named ``f``
+        - A matrix space of `m\\times n` matrices over GF(q) and either
+
+          - a list `[a_{11},a_{12},...,a_{1n},a_{21},...,a_{m1},...,a_{mn}]`,
+            which results in a matrix with the given marks
+          - ``None``, which is the fastest way to creata a zero matrix.
+          - an element of GF(q), which results in a diagonal matrix with the
+            given element on the diagonal.
+
+        If the optional parameter ``mutable`` is ``False`` (by default,
+        it is ``True``), the resulting matrix can not be changed, and
+        it can be used as key in a Python dictionary.
+
+        The arguments ``copy`` and ``coerce`` are ignored, they are only
+        here for a common interface with :class:`~sage.matrix.matrix.Matrix`.
+
+        EXAMPLES::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+
+        1. Creating an empty matrix::
+
+            sage: Matrix_gfpn_dense(None)
+            []
+
+        2. Creating a zero (3x2)-matrix::
+
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))
+            [0 0]
+            [0 0]
+            [0 0]
+
+        3. Creating a matrix from a list or list of lists::
+
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])
+            [1 2 3]
+            [4 0 1]
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])  # indirect doctest
+            [1 2 3]
+            [4 0 1]
+
+        4. Creating a diagonal matrix::
+
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M
+            [2 0 0 0 0]
+            [0 2 0 0 0]
+            [0 0 2 0 0]
+            [0 0 0 2 0]
+            [0 0 0 0 2]
+
+        5. Creating a matrix from a file in MeatAxe format.
+
+           First, we have to create that file; we use a temporary file,
+           that will be removed when leaving Sage. Note that the method
+           :meth:`msave` must be used, which does not use Python pickling
+           but relies on the intrinsic C--MeatAxe way of saving.
+           ::
+
+            sage: f = tmp_filename()
+            sage: M.msave(f)
+            sage: Matrix_gfpn_dense(f)
+            [2 0 0 0 0]
+            [0 2 0 0 0]
+            [0 0 2 0 0]
+            [0 0 0 2 0]
+            [0 0 0 0 2]
+
+        TESTS::
+
+            sage: MS = MatrixSpace(GF(125,'y'),2)
+            sage: A = MS(0)
+            sage: A.left_kernel()
+            Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
+            Basis matrix:
+            [1 0]
+            [0 1]
+            sage: A.right_kernel()
+            Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
+            Basis matrix:
+            [1 0]
+            [0 1]
+
+        """
+        if parent is None:
+            self._is_immutable = False
+            self._ncols = 0
+            self._nrows = 0
+            self._cache = {}
+            return
+        if isinstance(parent, basestring): # load from file
+            FILE = os.path.realpath(parent)
+            try:
+                fsock = open(FILE,"rb",0)
+                fsock.close()
+            except (OSError,IOError):
+                return
+            self.Data = MatLoad(FILE)
+            if FfSetField(self.Data.Field):
+                raise ValueError("Invalid data in file {}".format(FILE))
+            B = GF(self.Data.Field, 'z')
+            parent = MatrixSpace(B, self.Data.Nor, self.Data.Noc)
+            self._is_immutable = False
+            self._parent = parent
+            self._base_ring = B
+            self._converter = FieldConverter(B)
+            self._ncols = self.Data.Noc
+            self._nrows = self.Data.Nor
+            self._cache = {}
+            return
+
+        if not self.Data: # should have been initialised by __cinit__
+            raise MemoryError, "Error allocating memory for MeatAxe matrix"
+        Matrix_dense.__init__(self, parent)
+        self._is_immutable = not mutable
+        B = self._base_ring
+        self._converter = FieldConverter(B)
+        if data is None:
+            return
+
+        cdef int i,j
+        cdef FEL f
+        cdef PTR x
+        if not isinstance(data,list):
+            if not data:
+                return
+            if self._nrows != self._ncols:
+                raise ValueError("Cannot initialise non-square matrix from {}".format(data))
+            f = FfFromInt(self._converter.field_to_int(self._coerce_element(data)))
+            x = self.Data.Data
+            for j from 0 <= j < self.Data.Noc:
+                FfInsert(x,j,f)
+                FfStepPtr(&x)
+            return
+
+        x = self.Data.Data
+        cdef int nr = self.Data.Nor
+        cdef int nc = self.Data.Noc
+        assert self._ncols == nc
+        assert self._nrows == nr
+        if nr==0 or nc==0:
+            return
+        if len(data)<nr:
+            raise ValueError, "Expected a list of size at least the number of rows"
+        cdef list dt, dt_i
+        FfSetField(self.Data.Field)
+        FfSetNoc(nc)
+        if isinstance(data[0],list):
+            # The matrix is given by a list of rows
+            dt = data
+            for i from 0 <= i < nr:
+                idx = 0
+                dt_i = dt[i]
+                for j from 0 <= j < nc:
+                    FfInsert(x, j, FfFromInt(self._converter.field_to_int(self._coerce_element(dt_i[j]))))
+                FfStepPtr(&(x))
+        else:
+            # It is supposed to be a flat list of all entries, sorted by rows
+            dtnext = data.__iter__().next
+            for i from 0 <= i < nr:
+                for j from 0 <= j < nc:
+                    bla = self._converter.field_to_int(self._coerce_element(dtnext()))
+                    FfInsert(x, j, FfFromInt(bla))
+                FfStepPtr(&(x))
+
+    def rowsize(self):
+        return self.Data.RowSize
+
+    def __dealloc__(self):
+        if self.Data != NULL:
+            MatFree(self.Data)
+            self.Data = NULL
+
+    def __copy__(self):
+        """
+        Return a copy of this matrix.
+
+        EXAMPLES::
+
+            sage: M=MatrixSpace(GF(25,'x')([20*[0],20*[0],[1]+19*[0]])
+            sage: N=copy(M)
+            sage: print N
+            [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+            sage: N==M
+            True
+            sage: N is M
+            False
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: M=Matrix_gfpn_dense('')
+            sage: N=copy(M)
+            sage: N
+            Empty MTX matrix
+            sage: N==M
+            True
+            sage: N is M
+            False
+        """
+        cdef Matrix_gfpn_dense retval = type(self).__new__(type(self))
+        # Do the initialisation "manually"
+        retval._is_immutable = False  # a copy of a matrix is mutable!
+        retval._parent = self._parent
+        retval._base_ring = self._base_ring
+        retval._converter = self._converter
+        retval._ncols = self._ncols
+        retval._nrows = self._nrows
+        retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
+        if self.Data:
+            retval.Data = MatDup(self.Data)
+            if not retval.Data:
+                raise MemoryError, "Error copying a %s instance"%repr(type(self))
+        else:
+            retval.Data = NULL
+        return retval
+
+    ##########################
+    ## Saving should be done via pickling
+    ## However, we keep a method that relies on MeatAxe matsave:
+    def msave(self,f):
+        """
+        M.msave('filename') ==> save matrix into file <filename>
+
+        It can be reloaded with ``Matrix_gfpn_dense('filename')``.
+        """
+        MatSave(self.Data,f)
+
+    ## Pickling and string representation is taken care of by implementing get_unsafe
+    cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
+        """
+        Get an element without checking.
+
+        TEST::
+
+            sage: F.<z> = GF(9)
+            sage: M = MatrixSpace(F,3)(list(F))
+            sage: type(M)
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: M    # indirect doctest
+            [      0     2*z   z + 1]
+            [  z + 2       2       z]
+            [2*z + 2 2*z + 1       1]
+
+        """
+        if self.Data == NULL:
+            raise IndexError, "Matrix is empty"
+        return self._converter.int_to_field(FfToInt(FfExtract(MatGetPtr(self.Data,i), j)))
+
+    cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j):
+        # NOTE:
+        # It is essential that you call FfSetField and FfSetNoc YOURSELF
+        # and that you assert that the matrix is not empty!
+        # This method is here for speed!
+        return FfToInt(FfExtract(FfGetPtr(self.Data.Data,i) ,j))
+
+    cdef set_unsafe(self, Py_ssize_t i, Py_ssize_t j, value):
+        # ASSUMPTION: value's parent is the base ring
+        if self.Data == NULL:
+            raise IndexError, "Matrix is empty"
+        FfInsert(MatGetPtr(self.Data,i), j, FfFromInt(self._converter.field_to_int(value)))
+
+    cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value):
+        # NOTE:
+        # It is essential that you call FfSetField and FfSetNoc YOURSELF
+        # and that you assert that the matrix is not empty!
+        # This method is here for speed!
+        FfInsert(FfGetPtr(self.Data.Data,i), j, FfFromInt(value))
+
+    def randomize(self, density=None, nonzero=False, *args, **kwds):
+        """
+        Fill the matrix with random values.
+
+        INPUT:
+
+        - ``density`` (optional real number between zero and one) --
+          the expected density of the resulting matrix
+        - ``nonzero`` (optional bool, default ``False``) --
+          If true, all inserted marks are non-zero.
+
+        EXAMPLE::
+
+            sage: MS = MatrixSpace(GF(27,'z'),6,6)
+            sage: M = MS.random_element(); M    # indirect doctest
+            [              1           z + 1     z^2 + z + 1             z^2       2*z^2 + z           z + 1]
+            [2*z^2 + 2*z + 2   2*z^2 + z + 2         z^2 + 1 2*z^2 + 2*z + 2         z^2 + z   2*z^2 + z + 1]
+            [        2*z + 2     z^2 + z + 2           z + 2 2*z^2 + 2*z + 2           2*z^2           2*z^2]
+            [  2*z^2 + z + 2             z^2           z + 2         z^2 + z       2*z^2 + 2         z^2 + 2]
+            [      2*z^2 + z             2*z 2*z^2 + 2*z + 1       2*z^2 + 1 2*z^2 + 2*z + 1       2*z^2 + z]
+            [        2*z + 1         z^2 + z             z^2             z^2     2*z^2 + 2*z           z + 1]
+            sage: type(M)
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: MS.random_element(nonzero=True)
+            [            2*z               1   z^2 + 2*z + 1   2*z^2 + z + 1             z^2     z^2 + z + 1]
+            [    2*z^2 + 2*z   2*z^2 + z + 2         2*z + 1       z^2 + 2*z     2*z^2 + 2*z             z^2]
+            [        z^2 + z     z^2 + z + 2 2*z^2 + 2*z + 1         z^2 + 2               1           2*z^2]
+            [              z     2*z^2 + 2*z           2*z^2         2*z + 1           z + 2           z + 2]
+            [        z^2 + z             z^2           z + 2     2*z^2 + 2*z         2*z + 1         z^2 + z]
+            [    z^2 + z + 2       2*z^2 + z             z^2           z + 1     2*z^2 + 2*z   z^2 + 2*z + 1]
+            sage: MS.random_element(density=0.5)
+            [        z^2 + 2               0   z^2 + 2*z + 2       2*z^2 + z               0     z^2 + z + 2]
+            [              0               1               0               0               0               0]
+            [  2*z^2 + z + 1   2*z^2 + z + 2               0     z^2 + z + 2               0     z^2 + z + 1]
+            [              0               0               0               0               0               0]
+            [2*z^2 + 2*z + 2               0               0   2*z^2 + z + 2               0         2*z + 1]
+            [              0       2*z^2 + z               0               1               0   2*z^2 + z + 1]
+
+        """
+        self.check_mutability()
+        cdef int fl = self.Data.Field
+        density = float(density)
+        if density <= 0:
+            return
+        if density > 1:
+            density = float(1)
+
+        self.clear_cache()
+
+        cdef PTR x
+        cdef unsigned char *y
+        x = self.Data.Data
+        cdef int nr = self.Data.Nor
+        cdef int nc = self.Data.Noc
+        cdef int i, j, k
+
+        FfSetField(fl)
+        FfSetNoc(nc)
+        cdef int O, MPB, tmp
+        randint = current_randstate().c_random
+        randdouble = current_randstate().c_rand_double
+
+        if not nonzero:
+            if density == 1:
+                MPB = 0
+                tmp = fl
+                while tmp <= 256:
+                    MPB += 1
+                    tmp *= fl
+                O = (fl**MPB)
+                sig_on()
+                if nc%MPB:
+                    for i from 0 <= i < nr:
+                        y = <unsigned char*>x
+                        for j from 0 <= j < FfCurrentRowSizeIo-1:
+                            y[j] = randint()%O
+                        y[FfCurrentRowSizeIo-1] = randint()%(fl**(nc%MPB))
+                        FfStepPtr(&(x))
+                else:
+                    for i from 0 <= i < nr:
+                        y = <unsigned char*>x
+                        for j from 0 <= j < FfCurrentRowSizeIo:
+                            y[j] = randint()%O
+                        FfStepPtr(&(x))
+                sig_off()
+            else:
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        if randdouble() < density:
+                            FfInsert(x, j, FfFromInt( (randint()%fl) ))
+                    FfStepPtr(&(x))
+                sig_off()
+        else:
+            if density == 1:
+                fl -= 1
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        FfInsert(x, j, FfFromInt( (randint()%fl)+1 ))
+                    FfStepPtr(&(x))
+                sig_off()
+            else:
+                fl -= 1
+                sig_on()
+                for i from 0 <= i < nr:
+                    for j from 0 <= j < nc:
+                        if randdouble() < density:
+                            FfInsert(x, j, FfFromInt( (randint()%fl)+1 ))
+                    FfStepPtr(&(x))
+                sig_off()
+
+    def show_contents(self, r=None):
+        FfSetField(self.Data.Field)
+        FfSetNoc(self.Data.Noc)
+        cdef PTR p
+        cdef size_t i, j
+        if r is not None:
+            r_min = r
+            r_max = r+1
+        else:
+            r_min = 0
+            r_max = self.Data.Nor
+        for i in range(r_min, r_max):
+            p = FfGetPtr(self.Data.Data, i)
+            for j from 0<=j<self.Data.RowSize:
+                print "%3.3d"%p[j],
+            print
+
+##################
+## comparison
+    cpdef int _cmp_(left, Element right) except -2:
+        """
+        Compare two Matrix_gfpn_dense matrices
+
+        Of course, '<' and '>' doesn't make much sense for matrices.
+
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(125,'x'),[20*[0],20*[0],[1]+19*[0]])
+            sage: N = copy(M)
+            sage: M == N
+            True
+            sage: M != N
+            False
+            sage: print M < N
+            None
+            sage: N[2,19] = 1
+            sage: M == N
+            False
+            sage: M != N
+            True
+        """
+        cdef Matrix_gfpn_dense self = left
+        cdef Matrix_gfpn_dense N = right
+        cdef char* d1
+        cdef char* d2
+        if self.Data.Field != N.Data.Field:
+            if self.Data.Field > N.Data.Field:
+                return 1
+            return -1
+        if self.Data.Noc != N.Data.Noc:
+            if self.Data.Noc > N.Data.Noc:
+                return 1
+            return -1
+        if self.Data.Nor != N.Data.Nor:
+            if self.Data.Nor > N.Data.Nor:
+                return 1
+            return -1
+        d1 = <char*>(self.Data.Data)
+        d2 = <char*>(N.Data.Data)
+        cdef str s1 = PyString_FromStringAndSize(d1,self.Data.RowSize * self.Data.Nor)
+        cdef str s2 = PyString_FromStringAndSize(d2,N.Data.RowSize * N.Data.Nor)
+        if s1 != s2:
+            if s1 > s2:
+                return 1
+            return -1
+        return 0
+
+    def _rowlist_(self, i, j=-1):
+        "M._rowlist_(i): Return row <i> as a list of python ints"
+        cdef int k
+        if self.Data:
+            FfSetField(self.Data.Field)
+        else:
+            raise ValueError("Matrix is empty")
+        if (i<0) or (i>=self.Data.Nor):
+            raise IndexError("Index {} out of range 0..{}",format(i,self.Data.Nor-1))
+        cdef PTR p
+        p = MatGetPtr(self.Data,i)
+        L = [FfToInt(FfExtract(p,k)) for k in range(self.Data.Noc)]
+        if j!=-1:
+            if not(isinstance(j,int) or isinstance(j,Integer)):
+                raise TypeError, "Second index must be an integer"
+            if j >= self.Data.Nor:
+                raise IndexError, "Index out of range"
+            for k from i < k <= j:
+                FfStepPtr(&(p)) # This is only called after MatGetPtr, hence, after FfSetNoc.
+                L.extend([FfToInt(FfExtract(p,l)) for l in range(self.Data.Noc)])
+        return L
+
+    def _matlist_(self):
+        "M._matlist_(): Return M as a list of lists of python ints"
+        cdef int i
+        if self.Data:
+            FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
+        else:
+            raise IndexError, "Matrix is empty"
+        cdef PTR p
+        p = self.Data.Data
+        l_out=[]
+        for i from 1<=i<self.Data.Nor:
+            l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+            FfStepPtr(&(p))
+        l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+        return l_out
+
+#########################
+## Arithmetics
+    cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
+        if start_col != 0 or self.Data == NULL:
+            raise ValueError
+        cdef PTR = MatGetPtr(self.Data, i)
+        FfMulRow(PTR, FfFromInt(self._converter.field_to_int(s)))
+
+    cpdef ModuleElement _add_(self, ModuleElement right):
+        "add two MTX matrices of equal size"
+        cdef Matrix_gfpn_dense Self = self
+        cdef Matrix_gfpn_dense Right = right
+        assert Self is not None
+        assert Right is not None
+        if Self.Data == NULL or Right.Data == NULL:
+            raise NotImplementedError, "The matrices must not be empty"
+        cdef Matrix_gfpn_dense Left = Self.__copy__()
+        if MatAdd(Left.Data, Right.Data) != NULL:
+            return Left
+        else:
+            raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cpdef ModuleElement _sub_(self, ModuleElement right):
+        "subtract two MTX matrices of equal size"
+        cdef Matrix_gfpn_dense Self = self
+        cdef Matrix_gfpn_dense Right = right
+        assert Self is not None
+        assert Right is not None
+        if Self.Data == NULL or Right.Data == NULL:
+            raise NotImplementedError, "The matrices must not be empty"
+        cdef Matrix_gfpn_dense Left = Self.__copy__()
+        Left._is_immutable = False
+        if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
+            return Left
+        else:
+            raise ArithmeticError, "Matrix sizes or fields not compatible"
+
+    def __neg__(self):
+        "return negation of a MTX matrix: -M == M.__neg__()"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        return self._rmul_(self._base_ring(-1))
+
+    cpdef ModuleElement _rmul_(self, RingElement left):
+        "Scalar multiplication"
+        if self.Data == NULL:
+            return self.__copy__()
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cpdef ModuleElement _lmul_(self, RingElement right):
+        "Scalar multiplication"
+        if self.Data == NULL:
+            return self.__copy__()
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    cdef Matrix _matrix_times_matrix_(self, Matrix right):
+        # Surprisingly, Winograd-Strassen can compete with school book
+        # multiplication for smallish matrices, and of course it is
+        # asymptotically faster. So, we used it by default.
+        return self._multiply_strassen(right)
+
+    cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
+        "multiply two meataxe matrices by the school book algorithm"
+        if self.Data == NULL or right.Data == NULL:
+            raise ValueError("The matrices must not be empty")
+        if self._ncols != right._nrows:
+            raise ArithmeticError("left ncols must match right nrows")
+        MS = self.matrix_space(self._nrows, right._ncols, False)
+        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+        sig_on()
+        OUT.Data = MatDup(self.Data)
+        if OUT.Data == NULL:
+            sig_off()
+            raise MemoryError
+        if not MatMul(OUT.Data,right.Data):
+            sig_off()
+            raise ArithmeticError("Matrix sizes or fields not compatible")
+        sig_off()
+        OUT._nrows = OUT.Data.Nor
+        OUT._ncols = OUT.Data.Noc
+        OUT._is_immutable = False
+        OUT._parent = MS
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        return OUT
+
+    cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=0):
+        """
+        cutoff is NOT the number of rows/columns, but the rowsize expressed in bytes.
+        If `cutoff==0` then the default ``sizeof(long)^2/2`` is chosen.
+        """
+        if self.Data == NULL or right.Data == NULL:
+            raise ValueError("The matrices must not be empty")
+        if self._ncols != right._nrows:
+            raise ArithmeticError("left ncols must match right nrows")
+        MS = self.matrix_space(self._nrows, right._ncols, False)
+        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense(MS, None)
+        # Now, OUT.Data is initialised, which is neede for MatrixMulStrassen to work.
+        cutoff = cutoff//sizeof(long)
+        StrassenSetCutoff(cutoff)
+        sig_on()
+        if MatMulStrassen(OUT.Data, self.Data, right.Data) == NULL:
+            raise ArithmeticError("Error multiplying matrices by Strassen-Winograd algorithm")
+        sig_off()
+        return OUT
+
+    cdef ModuleElement _mul_long(self, long n):
+        "multiply an MTX matrix with a field element represented by an integer"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense left
+        cdef FEL r
+        if n < 0:
+            r = mtx_taddinv[FfFromInt(-n)]
+        else:
+            r = FfFromInt(n)
+        left = self.__copy__()
+        if MatMulScalar(left.Data, r) != NULL:
+            return left
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    def __div__(Matrix_gfpn_dense self, p):
+        "divide an MTX matrix by a field element represented by an integer"
+        if self.Data == NULL:
+            return self.__copy__()
+        if not p:
+            raise ZeroDivisionError
+        if p not in self._base_ring:
+            raise ValueError("{} is not a scalar".format(p))
+        FfSetField(self.Data.Field)
+        cdef Matrix_gfpn_dense OUT = self.__copy__()
+        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
+        if MatMulScalar(OUT.Data, r) != NULL:
+            return OUT
+        raise ArithmeticError("Matrix sizes or fields not compatible")
+
+    def __pow__(Matrix_gfpn_dense self, n, ignored):
+        "M.__pow__(n): return M^n"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if not self.is_square():
+            raise ArithmeticError("self must be a square matrix")
+        if ignored is not None:
+            raise RuntimeError("__pow__ third argument not used")
+        cdef Matrix_gfpn_dense OUT
+        cdef Matrix_gfpn_dense SELFINV
+        OUT = type(self).__new__(type(self))
+        OUT._nrows = self._nrows
+        OUT._ncols = self._ncols
+        OUT._is_immutable = False
+        OUT._parent = self._parent
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        if n>=0:
+            OUT.Data = MatPower(self.Data,n)
+        else:
+            SELFINV = self.__invert__()
+            OUT.Data = MatPower(SELFINV.Data,-n)
+        if OUT.Data != NULL:
+            return OUT
+        raise ArithmeticError("Failure in exponentiating a matrix")
+
+    def __invert__(Matrix_gfpn_dense self):
+        "M__invert__(): return M^(-1)"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if not self.is_square():
+            raise ArithmeticError("self must be a square matrix")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT._nrows = self._nrows
+        OUT._ncols = self._ncols
+        OUT._is_immutable = False
+        OUT._parent = self._parent
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        OUT.Data = MatInverse(self.Data)
+        if OUT.Data != NULL:
+            return OUT
+        raise ArithmeticError("This matrix is not invertible")
+
+    def transpose(Matrix_gfpn_dense self):
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT._nrows = self._ncols
+        OUT._ncols = self._nrows
+        OUT._is_immutable = False
+        OUT._parent = self.matrix_space(self._ncols, self._nrows, False)
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        OUT.Data = MatTransposed(self.Data)
+        return OUT
+
+    def order(self):
+        "M.order(): return multiplicative order of M"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        if (self.Data.Nor <> self.Data.Noc):
+            raise ValueError("only defined for square matrices")
+        o = MatOrder(self.Data)
+        if o==-1:
+            raise ArithmeticError("order too large")
+        else:
+            return o
+
+###################
+## Gauss algorithm
+
+    def nullity(self):
+        "M.nullity(): return the nullity of M"
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        return MatNullity(self.Data)
+
+    def left_kernel_matrix(self):
+        """M.left_kernel_matrix(): return the null space of M
+
+        M.left_kernel_matrix()*M is a null matrix
+        """
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT.Data = MatNullSpace(self.Data)
+        if OUT.Data == NULL:
+            return OUT
+        OUT._nrows = OUT.Data.Nor
+        OUT._ncols = OUT.Data.Noc
+        OUT._is_immutable = False
+        OUT._parent = self.matrix_space(OUT._nrows, OUT._ncols, False)
+        OUT._base_ring = self._base_ring
+        OUT._converter = self._converter
+        OUT._cache = {}
+        return OUT
+
+    def lead(self):
+        """
+(f,i) = M.lead() <=> f=M[0,i] is the first non-zero coefficient in the first row of M
+
+If the first row of M has no non-zero entry then f==0
+        """
+        cdef int i
+        cdef int fe
+        if self.Data == NULL:
+            raise ValueError("The matrix must not be empty")
+        FfSetField(self.Data.Field)
+        for i from 0 <= i < self.Data.Noc:
+            fe = FfToInt(FfExtract(self.Data.Data,i))
+            if fe:
+                return fe, i
+        return 0, self.Data.Noc
+
+########################
+### String representations
+#    def __repr__(self):
+#        "return a short description of an MTX matrix"
+#        if self.Data == NULL:
+#            return 'Empty MTX matrix'
+#        return '(%s x %s) MTX matrix over GF(%s)'%(self.Data.Nor, self.Data.Noc, self.Data.Field)
+#
+#    def __str__(self):
+#        "return a string showing the contents of an MTX matrix"
+#        # cdef long i,j
+#        if self.Data == NULL:
+#            return '[]'
+#        nc = self.Data.Noc
+#        nr = self.Data.Nor
+#        setfield(self.Data.Field)
+#        fln = len(str(FfOrder))
+#        matL = self._matlist_()
+#        return "\n".join(["["+" ".join([str(el).rjust(fln) for el in matL[i]])+"]" \
+#                                   for i in range(nr)])
+
+###############################################################################
+# Further features may be added later
+###############################################################################
+
diff --git a/src/sage/matrix/matrix_space.py b/src/sage/matrix/matrix_space.py
index 6336b8e1c25..22a4e591216 100644
--- a/src/sage/matrix/matrix_space.py
+++ b/src/sage/matrix/matrix_space.py
@@ -56,7 +56,6 @@
 
 import matrix_mpolynomial_dense
 
-
 # Sage imports
 from sage.misc.superseded import deprecation
 import sage.structure.coerce
@@ -986,6 +985,12 @@ def _get_matrix_class(self):
             <type 'sage.matrix.matrix_modn_dense_float.Matrix_modn_dense_float'>
             sage: type(matrix(GF(16007), 2, range(4)))
             <type 'sage.matrix.matrix_modn_dense_double.Matrix_modn_dense_double'>
+            sage: type(matrix(GF(2), 2, range(4)))
+            <type 'sage.matrix.matrix_mod2_dense.Matrix_mod2_dense'>
+            sage: type(matrix(GF(64,'z'), 2, range(4)))
+            <type 'sage.matrix.matrix_mod2e_dense.Matrix_mod2e_dense'>
+            sage: type(matrix(GF(125,'z'), 2, range(4)))
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
         """
         R = self.base_ring()
         if self.is_dense():
@@ -1013,19 +1018,26 @@ def _get_matrix_class(self):
                 elif R.order() < matrix_modn_dense_double.MAX_MODULUS:
                     return matrix_modn_dense_double.Matrix_modn_dense_double
                 return matrix_generic_dense.Matrix_generic_dense
-            elif sage.rings.finite_rings.constructor.is_FiniteField(R) and R.characteristic() == 2 and R.order() <= 65536:
-                return matrix_gf2e_dense.Matrix_gf2e_dense
+            elif sage.rings.finite_rings.constructor.is_FiniteField(R):
+                if R.characteristic() == 2:
+                    if R.order() <= 65536:
+                        return matrix_gf2e_dense.Matrix_gf2e_dense
+                elif R.order() <= 255:
+                    try:
+                        import matrix_gfpn_dense
+                        return matrix_gfpn_dense.Matrix_gfpn_dense
+                    except ImportError:
+                        pass
             elif sage.rings.polynomial.multi_polynomial_ring_generic.is_MPolynomialRing(R) and R.base_ring() in _Fields:
                 return matrix_mpolynomial_dense.Matrix_mpolynomial_dense
             #elif isinstance(R, sage.rings.padics.padic_ring_capped_relative.pAdicRingCappedRelative):
             #    return padics.matrix_padic_capped_relative_dense
             # the default
-            else:
-                from sage.symbolic.ring import SR   # causes circular imports
-                if R is SR:
-                    import matrix_symbolic_dense
-                    return matrix_symbolic_dense.Matrix_symbolic_dense
-                return matrix_generic_dense.Matrix_generic_dense
+            from sage.symbolic.ring import SR   # causes circular imports
+            if R is SR:
+                import matrix_symbolic_dense
+                return matrix_symbolic_dense.Matrix_symbolic_dense
+            return matrix_generic_dense.Matrix_generic_dense
 
         else:
             if sage.rings.finite_rings.integer_mod_ring.is_IntegerModRing(R) and R.order() < matrix_modn_sparse.MAX_MODULUS:

From c2e6fe5cd34ae01fe4096312c6d1f9133dd267ee Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 20 Sep 2015 16:36:58 +0200
Subject: [PATCH 15/23] A full wrapper for MeatAxe matrices

---
 src/sage/libs/meataxe.pxd             |   4 +-
 src/sage/matrix/matrix_gfpn_dense.pxd |   5 +-
 src/sage/matrix/matrix_gfpn_dense.pyx | 216 ++++++++++++++++----------
 3 files changed, 136 insertions(+), 89 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index 2b413533c4a..cfca8ca40bc 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -63,13 +63,13 @@ cdef extern from "meataxe.h":
 
     ## Rows
     void FfMulRow(PTR row, FEL mark)
-    # void FfAddMulRow(PTR dest, PTR src, FEL f)
+    void FfAddMulRow(PTR dest, PTR src, FEL f)
     PTR FfAddRow(PTR dest, PTR src)
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
     # FEL FfScalarProduct(PTR a, PTR b)
-    # void FfSwapRows(PTR dest, PTR src)
+    void FfSwapRows(PTR dest, PTR src)
     # void FfPermRow(PTR row, long *perm, PTR result)
     # int FfCmpRows(PTR p1, PTR p2)
 
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
index 61025adf6a3..300118918ad 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pxd
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -20,10 +20,7 @@ from sage.libs.meataxe cimport *
 cdef class Matrix_gfpn_dense(Matrix_dense):
     cdef Matrix_t *Data
     cdef FieldConverter_class _converter
-    #cpdef Matrix_gfpn_dense normalized(Matrix_gfpn_dense self)
-    #cpdef Matrix_gfpn_dense semi_echelon(Matrix_gfpn_dense self)
-    #cpdef int nullity(Matrix_gfpn_dense self)
-    #cpdef tuple lead(self)
+    cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols)
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
     cdef Matrix _matrix_times_matrix_(self, Matrix right)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 6021c554828..7996917c3a7 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1,15 +1,19 @@
 r"""
-Dense Matrices over `\mathbb F_q`, with `q<255` odd and not prime
+Dense Matrices over `\mathbb F_q`, with `q<255`
 
 This module is a wrapper for version 2.4.24 of the Aachen
 `C-MeatAxe <http://www.math.rwth-aachen.de/homes/MTX/download.html>`_,
 improved by an implementation of the Winograd-Strassen multiplication
 algorithm. It provides matrices over the finite field `\mathbb F_q`,
-where `q\le 255` is odd and not prime.
+where `q\le 255`.
+
+By default, it is only used when `q` is odd and not prime, because other
+matrix implementations in SageMath perform better for prime fields or in
+characteristic two.
 
 AUTHORS:
 
-- Simon King (2015-09-18): initial version
+- Simon King (2015-09): initial version
 
 """
 
@@ -50,6 +54,12 @@ from sage.misc.randstate import current_randstate
 from sage.misc.cachefunc import cached_method, cached_function
 from sage.structure.element cimport Element, ModuleElement, RingElement, Matrix
 
+from libc.stdlib cimport free
+from sage.ext.memory cimport check_realloc
+from libc.string cimport memset, memcpy
+
+cimport sage.matrix.matrix0
+
 include 'sage/ext/stdsage.pxi'
 
 ####################
@@ -312,6 +322,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef int ncols = parent.ncols()
         self.Data = MatAlloc(f, nrows, ncols)
 
+    def __dealloc__(self):
+        if self.Data != NULL:
+            MatFree(self.Data)
+            self.Data = NULL
+
     def __init__(self, parent, data=None, mutable=True, copy=False, coerce=False):
         """
         Matrix extension class using libmeataxe as backend
@@ -490,13 +505,22 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     FfInsert(x, j, FfFromInt(bla))
                 FfStepPtr(&(x))
 
-    def rowsize(self):
-        return self.Data.RowSize
+    cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols):
+        r"""
+        Return a new matrix with no entries set.
+        """
+        cdef Matrix_gfpn_dense res
+        res = self.__class__.__new__(self.__class__)
 
-    def __dealloc__(self):
-        if self.Data != NULL:
-            MatFree(self.Data)
-            self.Data = NULL
+        if nrows == self._nrows and ncols == self._ncols:
+            res._parent = self._parent
+        else:
+            res._parent = self.matrix_space(nrows, ncols)
+        res._ncols  = ncols
+        res._nrows  = nrows
+        res._base_ring = self._base_ring
+        res._converter = self._converter
+        return res
 
     def __copy__(self):
         """
@@ -524,14 +548,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             sage: N is M
             False
         """
-        cdef Matrix_gfpn_dense retval = type(self).__new__(type(self))
-        # Do the initialisation "manually"
+        cdef Matrix_gfpn_dense retval = self._new(self._nrows, self._ncols)
         retval._is_immutable = False  # a copy of a matrix is mutable!
-        retval._parent = self._parent
-        retval._base_ring = self._base_ring
-        retval._converter = self._converter
-        retval._ncols = self._ncols
-        retval._nrows = self._nrows
         retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
         if self.Data:
             retval.Data = MatDup(self.Data)
@@ -794,8 +812,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                 L.extend([FfToInt(FfExtract(p,l)) for l in range(self.Data.Noc)])
         return L
 
-    def _matlist_(self):
-        "M._matlist_(): Return M as a list of lists of python ints"
+    def _list(self):
+        cdef list x = self.fetch('list')
+        if not x is None:
+            return x
+        x = []
         cdef int i
         if self.Data:
             FfSetField(self.Data.Field)
@@ -804,23 +825,47 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise IndexError, "Matrix is empty"
         cdef PTR p
         p = self.Data.Data
-        l_out=[]
         for i from 1<=i<self.Data.Nor:
-            l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
+            x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
             FfStepPtr(&(p))
-        l_out.append([FfToInt(FfExtract(p,j)) for j in range(self.Data.Noc)])
-        return l_out
+        x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
+        self.cache('list', x)
+        return x
 
 #########################
 ## Arithmetics
     cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
         if start_col != 0 or self.Data == NULL:
-            raise ValueError
-        cdef PTR = MatGetPtr(self.Data, i)
-        FfMulRow(PTR, FfFromInt(self._converter.field_to_int(s)))
+            raise ValueError("We can only rescale a full row of a non-empty matrix")
+        FfMulRow(MatGetPtr(self.Data, i), FfFromInt(self._converter.field_to_int(self._base_ring(s))))
+
+    cdef add_multiple_of_row_c(self,  Py_ssize_t row_to, Py_ssize_t row_from, multiple, Py_ssize_t start_col):
+        if start_col != 0 or self.Data == NULL:
+            raise ValueError("We can only rescale a full row of a non-empty matrix")
+        FfAddMulRow(MatGetPtr(self.Data, row_to), MatGetPtr(self.Data, row_from), FfFromInt(self._converter.field_to_int(self._base_ring(multiple))))
+
+    cdef swap_rows_c(self, Py_ssize_t row1, Py_ssize_t row2):
+        FfSwapRows(MatGetPtr(self.Data, row1), MatGetPtr(self.Data, row2))
+
+    def trace(self):
+        if self._nrows != self._ncols:
+            raise ValueError, "self must be a square matrix"
+        return self._converter.int_to_field(FfToInt(MatTrace(self.Data)))
+
+    def stack(self, Matrix_gfpn_dense other):
+        if self._ncols != other._ncols:
+            raise TypeError("Both numbers of columns must match.")
+        if self._nrows == 0 or self.Data == NULL:
+            return other.__copy__()
+        if other._nrows == 0 or other.Data == NULL:
+            return self.__copy__()
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows+other._nrows, self._ncols)
+        OUT.Data = MatAlloc(self.Data.Field, self.Data.Nor+other.Data.Nor, self.Data.Noc)
+        memcpy(OUT.Data.Data, self.Data.Data, FfCurrentRowSize*self.Data.Nor)
+        memcpy(MatGetPtr(OUT.Data, self.Data.Nor), other.Data.Data, FfCurrentRowSize*other.Data.Nor)
+        return OUT
 
     cpdef ModuleElement _add_(self, ModuleElement right):
-        "add two MTX matrices of equal size"
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -834,7 +879,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
-        "subtract two MTX matrices of equal size"
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -849,13 +893,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError, "Matrix sizes or fields not compatible"
 
     def __neg__(self):
-        "return negation of a MTX matrix: -M == M.__neg__()"
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         return self._rmul_(self._base_ring(-1))
 
     cpdef ModuleElement _rmul_(self, RingElement left):
-        "Scalar multiplication"
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
@@ -865,7 +907,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _lmul_(self, RingElement right):
-        "Scalar multiplication"
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
@@ -874,11 +915,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
-    cdef Matrix _matrix_times_matrix_(self, Matrix right):
+    cdef int _strassen_default_cutoff(self, sage.matrix.matrix0.Matrix right) except -2:
         # Surprisingly, Winograd-Strassen can compete with school book
         # multiplication for smallish matrices, and of course it is
         # asymptotically faster. So, we used it by default.
-        return self._multiply_strassen(right)
+        return 0
 
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
         "multiply two meataxe matrices by the school book algorithm"
@@ -886,8 +927,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrices must not be empty")
         if self._ncols != right._nrows:
             raise ArithmeticError("left ncols must match right nrows")
-        MS = self.matrix_space(self._nrows, right._ncols, False)
-        cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, right._ncols)
         sig_on()
         OUT.Data = MatDup(self.Data)
         if OUT.Data == NULL:
@@ -897,12 +937,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             sig_off()
             raise ArithmeticError("Matrix sizes or fields not compatible")
         sig_off()
-        OUT._nrows = OUT.Data.Nor
-        OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
-        OUT._parent = MS
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         return OUT
 
@@ -917,7 +952,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("left ncols must match right nrows")
         MS = self.matrix_space(self._nrows, right._ncols, False)
         cdef Matrix_gfpn_dense OUT = Matrix_gfpn_dense(MS, None)
-        # Now, OUT.Data is initialised, which is neede for MatrixMulStrassen to work.
+        # Now, OUT.Data is initialised, which is needed for MatMulStrassen to work.
         cutoff = cutoff//sizeof(long)
         StrassenSetCutoff(cutoff)
         sig_on()
@@ -964,15 +999,9 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ArithmeticError("self must be a square matrix")
         if ignored is not None:
             raise RuntimeError("__pow__ third argument not used")
-        cdef Matrix_gfpn_dense OUT
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         cdef Matrix_gfpn_dense SELFINV
-        OUT = type(self).__new__(type(self))
-        OUT._nrows = self._nrows
-        OUT._ncols = self._ncols
         OUT._is_immutable = False
-        OUT._parent = self._parent
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         if n>=0:
             OUT.Data = MatPower(self.Data,n)
@@ -989,13 +1018,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrix must not be empty")
         if not self.is_square():
             raise ArithmeticError("self must be a square matrix")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
-        OUT._nrows = self._nrows
-        OUT._ncols = self._ncols
+        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         OUT._is_immutable = False
-        OUT._parent = self._parent
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         OUT.Data = MatInverse(self.Data)
         if OUT.Data != NULL:
@@ -1005,13 +1029,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
     def transpose(Matrix_gfpn_dense self):
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
-        OUT._nrows = self._ncols
-        OUT._ncols = self._nrows
+        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._rows)
         OUT._is_immutable = False
-        OUT._parent = self.matrix_space(self._ncols, self._nrows, False)
-        OUT._base_ring = self._base_ring
-        OUT._converter = self._converter
         OUT._cache = {}
         OUT.Data = MatTransposed(self.Data)
         return OUT
@@ -1074,28 +1093,59 @@ If the first row of M has no non-zero entry then f==0
                 return fe, i
         return 0, self.Data.Noc
 
-########################
-### String representations
-#    def __repr__(self):
-#        "return a short description of an MTX matrix"
-#        if self.Data == NULL:
-#            return 'Empty MTX matrix'
-#        return '(%s x %s) MTX matrix over GF(%s)'%(self.Data.Nor, self.Data.Noc, self.Data.Field)
-#
-#    def __str__(self):
-#        "return a string showing the contents of an MTX matrix"
-#        # cdef long i,j
-#        if self.Data == NULL:
-#            return '[]'
-#        nc = self.Data.Noc
-#        nr = self.Data.Nor
-#        setfield(self.Data.Field)
-#        fln = len(str(FfOrder))
-#        matL = self._matlist_()
-#        return "\n".join(["["+" ".join([str(el).rjust(fln) for el in matL[i]])+"]" \
-#                                   for i in range(nr)])
-
-###############################################################################
-# Further features may be added later
-###############################################################################
+    def _echelon_in_place_classical(self, reduced=True):
+        if self._nrows == 0 or self._ncols == 0:
+            self.cache('in_echelon_form',True)
+            self.cache('rank', 0)
+            self.cache('pivots', ())
+            return self
+        if MatEchelonize(self.Data) == -1:
+            raise ArithmeticError("Error echelonizing this matrix")
+        self._cache = {}
+        # Now, self.Data is in semi-echelon form.
+        r = self.Data.Nor
+        cdef size_t i, j, pos
+        cdef PTR old, dest, src
+        cdef FEL piv
+        self.cache('rank', r)
+        # Next, we do permutations to achieve the reduced echelon form,
+        # if requested.
+        if reduced:
+            pivs = [(self.Data.PivotTable[i],i) for i in range(r)]
+            pivs.sort()
+            if pivs != [(self.Data.PivotTable[i],i) for i in range(r)] or self.Data.Nor < self._nrows:
+                # We copy the row one by one, sorting their pivot positions
+                # and scaling the pivot to one.
+                old = self.Data.Data
+                self.Data.Data = FfAlloc(self._nrows)
+                for i, (pos,j) in enumerate(pivs):
+                    # We have to move row j to row i
+                    dest = self.Data.Data+FfCurrentRowSize*i
+                    memcpy(dest, old+FfCurrentRowSize*j, FfCurrentRowSize)
+                    self.Data.PivotTable[i] = pos
+                    piv = FfExtract(dest, pos)
+                    assert piv!=FF_ZERO
+                    if piv != FF_ONE:
+                        FfMulRow(dest, mtx_tmultinv[piv])
+                free(old)
+                self.Data.Nor = self._nrows
+                # Finally, we annulate everything above the pivots
+                # (currently, we only know that the matrix is zero
+                # below the pivots).
+                for i from 1 <= i < r:
+                    src = MatGetPtr(self.Data, i)
+                    for j from 0 <= j < i:
+                        dest = MatGetPtr(self.Data, j)
+                        piv = FfExtract(dest, self.Data.PivotTable[i])
+                        if piv != FF_ZERO:
+                            FfAddMulRow(dest, src, mtx_taddinv[piv])
+        elif self.Data.Nor < self._nrows:
+            # Some rows may have vanished. In SageMath, we
+            # want that the number of rows does not change,
+            # thus, we have to append zero rows.
+            self.Data.Data = <PTR>check_realloc(self.Data.Data, FfCurrentRowSize*self._nrows)
+            memset(self.Data.Data + FfCurrentRowSize*self.Data.Nor, FF_ZERO, FfCurrentRowSize*(self._nrows-self.Data.Nor))
+            self.Data.Nor = self._nrows
+        self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
+        self.cache('in_echelon_form',True)
 

From 395aa9a75fd03bb87ffbfe09f8c2ed556132569e Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Mon, 21 Sep 2015 12:17:06 +0200
Subject: [PATCH 16/23] Improve echelon computation in MeatAxe, and fix some
 compiler warnings

---
 build/pkgs/meataxe/dependencies               |   1 +
 build/pkgs/meataxe/patches/TweakEchelon.patch | 229 ++++++++++++++++++
 build/pkgs/meataxe/spkg-install               |  24 ++
 3 files changed, 254 insertions(+)
 create mode 100644 build/pkgs/meataxe/dependencies
 create mode 100644 build/pkgs/meataxe/patches/TweakEchelon.patch

diff --git a/build/pkgs/meataxe/dependencies b/build/pkgs/meataxe/dependencies
new file mode 100644
index 00000000000..2f9f3849682
--- /dev/null
+++ b/build/pkgs/meataxe/dependencies
@@ -0,0 +1 @@
+# no dependencies
diff --git a/build/pkgs/meataxe/patches/TweakEchelon.patch b/build/pkgs/meataxe/patches/TweakEchelon.patch
new file mode 100644
index 00000000000..eeee5e4d8f6
--- /dev/null
+++ b/build/pkgs/meataxe/patches/TweakEchelon.patch
@@ -0,0 +1,229 @@
+Improve echelon computation by restricting FfAddMulRow to the
+nonzero part of the to-be-added row.
+
+Also remove some compiler warnings.
+
+AUTHOR:
+
+- Simon King, 2015-09-22
+diff --git a/src/c-kernel.c b/src/c-kernel.c
+index f74e97e..d4355bc 100644
+--- a/src/c-kernel.c
++++ b/src/c-kernel.c
+@@ -311,13 +311,10 @@ void TestFelToInt(unsigned flags)
+ static void TestSubfield1(int fld, int sub)
+ 
+ {
+-    FEL tabfld[256], tabsub[256];
++    FEL tabsub[256];
+     FEL tabemb[256];
+     int i;
+ 
+-    FfSetField(fld);
+-    for (i = 0; i < fld; ++i) 
+-	tabfld[i] = FfFromInt(i);
+     FfSetField(sub);
+     for (i = 0; i < sub; ++i) 
+ 	tabsub[i] = FfFromInt(i);
+diff --git a/src/cfcomp.c b/src/cfcomp.c
+index 7434549..fa739d6 100644
+--- a/src/cfcomp.c
++++ b/src/cfcomp.c
+@@ -131,7 +131,7 @@ static void Compare(const char *name)
+ {
+     ReadGens(name);
+     FindEquiv(name);
+-    FreeGens(name);
++    FreeGens();
+ }
+ 
+ 
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 178b6cb..6ef2f72 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -919,6 +919,54 @@ PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len)
+     return dest;
+ }
+ 
++/**
++ ** Add a multiple of a part of a row.
++ ** This function adds a multiple of @em src to @em dest.
++ ** This works like FfAddRow(), but the operation is performed only on a given range of
++ ** columns.
++ ** @param dest The row to add to.
++ ** @param src The row to add.
++ ** @param first Number of bytes to skip.
++ ** @param len Number of bytes to add.
++**/
++/* Warning!! Let L be the long integer to which the first byte of the a row
++ * belongs. It is assumed that all previous bytes in L are zero!
++ * Moreover, it is assumed that either the part of the rows ends at the
++ * end of the row, or that it ends with a full long.
++ */
++void FfAddMulRowPartial(PTR dest, PTR src, FEL f, int first, int len)
++{
++    register int i;
++    register BYTE *p1, *p2, *multab;
++
++    CHECKFEL(f);
++    if (f == FF_ZERO)
++    return;
++    int lfirst;
++    if (f == FF_ONE)
++    {
++        lfirst = first/sizeof(long);
++        if (first+len>=FfCurrentRowSizeIo)
++        {
++            FfAddRowPartial(dest,src,lfirst,FfCurrentRowSize/sizeof(long)-lfirst);
++            return;
++        }
++        FfAddRowPartial(dest,src,lfirst,(first+len)/sizeof(long)-lfirst);
++        return;
++    }
++    multab = mtx_tmult[f];
++    p1 = dest + first;
++    p2 = src + first;
++    int rem = FfCurrentRowSizeIo - first;
++    if (rem > len) rem = len;
++    for (i = rem; i != 0; --i)
++    {
++        register BYTE x = *p2++;
++        if (x!=0)
++            *p1 = mtx_tadd[*p1][multab[x]];
++        ++p1;
++    }
++}
+ 
+ /**
+  ** Multiply a row by a coefficient.
+@@ -977,10 +1025,12 @@ void FfAddMulRow(PTR dest, PTR src, FEL f)
+     multab = mtx_tmult[f];
+     p1 = dest;
+     p2 = src;
+-    for (i = FfTrueRowSize(FfNoc); i != 0; --i)
++    for (i = FfCurrentRowSizeIo; i != 0; --i)
+     {
+-	*p1 = mtx_tadd[*p1][multab[*p2++]];
+-	++p1;
++        register BYTE x = *p2++;
++        if (x!=0)
++            *p1 = mtx_tadd[*p1][multab[x]];
++        ++p1;
+     }
+ }
+ 
+@@ -1131,7 +1181,9 @@ __asm__("    popl %ebx\n"
+                 {
+                     for (; k != 0; --k)
+                     {
+-                        *r = mtx_tadd[*r][*v++];
++                        register BYTE x = *v++;
++                        if (x!=0)
++                            *r = mtx_tadd[*r][x];
+                         ++r;
+                     }
+                 }
+@@ -1140,9 +1192,9 @@ __asm__("    popl %ebx\n"
+                     register BYTE *multab = mtx_tmult[f];
+                     for (; k != 0; --k)
+                     {
+-		 	if (*v != 0)
+-			    *r = mtx_tadd[multab[*v]][*r];
+-			++v;
++                        if (*v != 0)
++                            *r = mtx_tadd[multab[*v]][*r];
++                        ++v;
+                         ++r;
+                     }
+                 }
+diff --git a/src/matcopy.c b/src/matcopy.c
+index 75b29c0..457dfeb 100644
+--- a/src/matcopy.c
++++ b/src/matcopy.c
+@@ -57,7 +57,10 @@ int MatCopyRegion(Matrix_t *dest, int destrow, int destcol,
+     if (!MatIsValid(src) || !MatIsValid(dest))
+ 	return -1;
+     if (src->Field != dest->Field)
+-	return MTX_ERROR1("%E",MTX_ERR_INCOMPAT), -1;
++    {
++        MTX_ERROR1("%E",MTX_ERR_INCOMPAT);
++        return -1;
++    }
+     if (nrows == -1)
+ 	nrows = src->Nor - row1;
+     if (ncols == -1)
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 5123f1c..368b37b 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -126,6 +126,7 @@ int FfSetNoc(int noc);
+ 
+ 
+ void FfAddMulRow(PTR dest, PTR src, FEL f);
++void FfAddMulRowPartial(PTR dest, PTR src, FEL f, int first, int len);
+ PTR FfAddRow(PTR dest, PTR src);
+ PTR FfAddRowPartial(PTR dest, PTR src, int first, int len);
+ PTR FfSubRow(PTR dest, PTR src);
+diff --git a/src/window.c b/src/window.c
+index f374028..9c87694 100644
+--- a/src/window.c
++++ b/src/window.c
+@@ -236,12 +236,14 @@ __asm__("    popl %ebx\n"
+             {
+                 register BYTE *v = m;
+                 register BYTE *r = result;
++                register BYTE x;
+                 if (f == FF_ONE)
+                 {
+                     register size_t k = l_rowsize;
+                     for (; k != 0; --k)
+                     {
+-                        *r = mtx_tadd[*r][*v++];
++                        x=*v++;
++                        if (x) *r = mtx_tadd[*r][x];
+                         ++r;
+                     }
+                 }
+@@ -251,9 +253,8 @@ __asm__("    popl %ebx\n"
+                     register size_t k = l_rowsize;
+                     for (; k != 0; --k)
+                     {
+-                        if (*v != 0)
+-                            *r = mtx_tadd[multab[*v]][*r];
+-                        ++v;
++                        x=*v++;
++                        if (x) *r = mtx_tadd[multab[x]][*r];
+                         ++r;
+                     }
+                 }
+diff --git a/src/zcleanrow.c b/src/zcleanrow.c
+index 649e551..b4dcb30 100644
+--- a/src/zcleanrow.c
++++ b/src/zcleanrow.c
+@@ -35,18 +35,21 @@ MTX_DEFINE_FILE_INFO
+ 
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv)
+ {
+-    int i;
++    register int i, pivi, first;
+     PTR x;
+ 
+     for (i=0, x=matrix; i < nor; ++i, FfStepPtr(&x))
+     {
+-        FEL f = FfExtract(row,piv[i]);
++        pivi = piv[i];
++        FEL f = FfExtract(row,pivi);
+         if (f != FF_ZERO)
+-	    FfAddMulRow(row,x,FfNeg(FfDiv(f,FfExtract(x,piv[i]))));
++        {
++            first = pivi/MPB;
++            FfAddMulRowPartial(row,x,FfNeg(FfDiv(f,FfExtract(x,pivi))),first,FfCurrentRowSizeIo-first);
++        }
+     }
+ }
+ 
+-
+ /**
+  ** Clean Row and Record Operations.
+  ** This function works like FfCleanRow(), but it stores a record of the operations performed
diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
index 7733e9e44cb..163f180e50f 100755
--- a/build/pkgs/meataxe/spkg-install
+++ b/build/pkgs/meataxe/spkg-install
@@ -45,6 +45,30 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
+# Just to be sure, we also create other folders, although
+# they are standard SageMath folders
+
+mkdir -p $MTXBIN
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating directory for meataxe binaries."
+    exit 1
+fi
+
+mkdir -p "$SAGE_LOCAL/include"
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating SageMath's include directory."
+    exit 1
+fi
+
+mkdir -p "$SAGE_LOCAL/lib"
+
+if [ $? -ne 0 ]; then
+    echo >&2 "Error creating SageMath's lib folder."
+    exit 1
+fi
+
 ## Install! Aparently MeatAxe would rebuild everything when
 ## testing, and "make check" also installs. So, if a test
 ## is requested then we do it in one go.

From c5f328c5cc83e5cc4c67d626f9ba1368d12cab23 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Tue, 22 Sep 2015 18:08:36 +0200
Subject: [PATCH 17/23] Doctests and error handling for MeatAxe

---
 src/sage/libs/meataxe.pxd             |  79 +--
 src/sage/matrix/matrix_gfpn_dense.pxd |   5 +-
 src/sage/matrix/matrix_gfpn_dense.pyx | 786 ++++++++++++++++++++------
 3 files changed, 659 insertions(+), 211 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index cfca8ca40bc..79a8d03448d 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -16,9 +16,9 @@
 cdef extern from "meataxe.h":
     # general ctype emulations
     # ctypedef int size_t   # size_t should be a standard type!
-    ctypedef unsigned long Ulong
-    ctypedef unsigned short Ushort
-    ctypedef unsigned char Uchar
+    # ctypedef unsigned long Ulong
+    # ctypedef unsigned short Ushort
+    # ctypedef unsigned char Uchar
     ctypedef unsigned char FEL
     ctypedef FEL *PTR
 
@@ -46,8 +46,8 @@ cdef extern from "meataxe.h":
     ## global parameters
     size_t FfRowSize(int noc)
     size_t FfTrueRowSize(int noc) # Difference to FfRowSize: Doesn't count padding bytes
-    int FfSetField(int field)
-    int FfSetNoc(int ncols)
+    int FfSetField(int field) except -1
+    int FfSetNoc(int ncols) except -1
 
     ## Finite Fields
     # FEL FfAdd(FEL a,FEL b)
@@ -68,13 +68,13 @@ cdef extern from "meataxe.h":
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
-    # FEL FfScalarProduct(PTR a, PTR b)
+    FEL FfScalarProduct(PTR a, PTR b)
     void FfSwapRows(PTR dest, PTR src)
-    # void FfPermRow(PTR row, long *perm, PTR result)
-    # int FfCmpRows(PTR p1, PTR p2)
+    void FfPermRow(PTR row, long *perm, PTR result)
+    int FfCmpRows(PTR p1, PTR p2)
 
     ## multiple rows
-    PTR FfAlloc(int nor)
+    PTR FfAlloc(int nor) except NULL
     void FfExtractColumn(PTR mat,int nor,int col,PTR result)
     int FfStepPtr(PTR *x)  # Advance to next row
     PTR FfGetPtr(PTR base, int row)  # Advance to "row" rows after base
@@ -94,35 +94,50 @@ cdef extern from "meataxe.h":
         int RowSize                     # Size (in bytes) of one row
         int *PivotTable                 # Pivot table (if matrix is in echelon form
     ## Basic memory operations
-    Matrix_t *MatAlloc(int field, int nor, int noc)
+    Matrix_t *MatAlloc(int field, int nor, int noc) except NULL
     int MatFree(Matrix_t *mat)
     PTR MatGetPtr(Matrix_t *mat, int row)
-    int MatCompare(Matrix_t *a, Matrix_t *b)
-    # int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols)
-    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols)
-    # Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows)
-    Matrix_t *MatDup(Matrix_t *src)
-    Matrix_t *MatId(int fl, int nor)
-    Matrix_t *MatLoad(char *fn)
-    int MatSave(Matrix_t *mat, char *fn)
+    int MatCompare(Matrix_t *a, Matrix_t *b) except? -1
+    int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols) except -1
+    Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols) except NULL
+    Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows) except NULL
+    Matrix_t *MatDup(Matrix_t *src) except NULL
+    Matrix_t *MatId(int fl, int nor) except NULL
+    Matrix_t *MatLoad(char *fn) except NULL
+    int MatSave(Matrix_t *mat, char *fn) except -1
 
 
     ## Basic Arithmetic  ## general rule: dest is changed, src/mat are unchanged!
-    Matrix_t *MatTransposed(Matrix_t *src)
-    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src)
-    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff)
-    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src)
-    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
-    Matrix_t *MatPower(Matrix_t *mat, long n)
+    Matrix_t *MatTransposed(Matrix_t *src) except NULL
+    Matrix_t *MatAdd(Matrix_t *dest, Matrix_t *src) except NULL
+    Matrix_t *MatAddMul(Matrix_t *dest, Matrix_t *src, FEL coeff) except NULL
+    Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src) except NULL
+    Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff) except NULL
+    Matrix_t *MatPower(Matrix_t *mat, long n) except NULL
     FEL MatTrace(Matrix_t *mat)
-    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B)
+    Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B) except NULL
     void StrassenSetCutoff(size_t size)
 
-    ## "Higher" Arithmetic ## all arguments are unchanged
-    # int MatClean(Matrix_t *mat, Matrix_t *sub)
-    int MatEchelonize(Matrix_t *mat)
-    int MatOrder(Matrix_t *mat)
+    ## "Higher" Arithmetic
+    int MatClean(Matrix_t *mat, Matrix_t *sub) except -1
+    int MatEchelonize(Matrix_t *mat) except -1
+    int MatOrder(Matrix_t *mat) except? -1
     long MatNullity(Matrix_t *mat)
-    Matrix_t *MatInverse(Matrix_t *src)
-    Matrix_t *MatNullSpace(Matrix_t *mat)
-# thats's all of meataxe.h !
+    Matrix_t *MatInverse(Matrix_t *src) except NULL
+    Matrix_t *MatNullSpace(Matrix_t *mat) except NULL
+
+    ## Error handling
+    cdef extern int MTX_ERR_NOMEM, MTX_ERR_GAME_OVER, MTX_ERR_DIV0, MTX_ERR_FILEFMT, MTX_ERR_BADARG
+    cdef extern int MTX_ERR_RANGE, MTX_ERR_NOTECH, MTX_ERR_NOTSQUARE, MTX_ERR_INCOMPAT
+    cdef extern int MTX_ERR_BADUSAGE, MTX_ERR_OPTION, MTX_ERR_NARGS, MTX_ERR_NOTMATRIX, MTX_ERR_NOTPERM
+    ctypedef struct MtxFileInfo_t:
+        char *Name
+        char *BaseName
+
+    ctypedef struct MtxErrorRecord_t:
+        MtxFileInfo_t *FileInfo
+        int LineNo
+        char *Text
+
+    ctypedef void MtxErrorHandler_t(MtxErrorRecord_t*)
+    MtxErrorHandler_t *MtxSetErrorHandler(MtxErrorHandler_t *h)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pxd b/src/sage/matrix/matrix_gfpn_dense.pxd
index 300118918ad..34487536b14 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pxd
+++ b/src/sage/matrix/matrix_gfpn_dense.pxd
@@ -10,8 +10,8 @@
 
 cdef class FieldConverter_class:
     cdef object field  # that's a function converting an int to a field element
-    cdef object int_to_field(self, int x)
-    cdef int field_to_int(self, x)
+    cpdef object int_to_field(self, int x)
+    cpdef int field_to_int(self, x)
 
 from sage.matrix.matrix_dense cimport Matrix_dense
 from sage.structure.element cimport Matrix
@@ -23,6 +23,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
     cdef Matrix_gfpn_dense _new(self, Py_ssize_t nrows, Py_ssize_t ncols)
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value)
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j)
+    cdef list _rowlist_(self, i, j=*)
     cdef Matrix _matrix_times_matrix_(self, Matrix right)
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right)
     cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=*)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 7996917c3a7..dc0c978c8ef 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -70,16 +70,6 @@ include 'sage/ext/stdsage.pxi'
 import sys
 from libc.string cimport memcpy
 
-cdef inline int setfield(long n) except -1:
-    # This is a wrapper around FfSetField, but
-    # we guard it against MTX_Error, which would immediately
-    # crash the Sage session.
-    if n == FfOrder:
-        return 0
-    if not (0 < n < 255 and is_prime_power(n)):
-        raise ValueError("Only finite fields of order at most 255 are supported")
-    return FfSetField(n)
-
 # Fast conversion from field to int and int to field
 cdef class FieldConverter_class:
     """
@@ -93,18 +83,18 @@ cdef class FieldConverter_class:
 
     EXAMPLE::
 
-        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+        sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
         sage: F.<y> = GF(125)
-        sage: C = FieldConverter_class(F)
-        sage: C.int_to_field(15)
+        sage: C = FieldConverter_class(F)               # optional: meataxe
+        sage: C.int_to_field(15)                        # optional: meataxe
         3*y
-        sage: F.fetch_int(15)
+        sage: F.fetch_int(15)                           # optional: meataxe
         3*y
-        sage: %timeit C.int_to_field(15)    #not tested
+        sage: %timeit C.int_to_field(15)    # not tested
         625 loops, best of 3: 1.04 µs per loop
-        sage: %timeit F.fetch_int(15)       #not tested
+        sage: %timeit F.fetch_int(15)       # not tested
         625 loops, best of 3: 3.97 µs per loop
-        sage: C.field_to_int(y)
+        sage: C.field_to_int(y)                         # optional: meataxe
         5
         sage: y.integer_representation()
         5
@@ -118,46 +108,46 @@ cdef class FieldConverter_class:
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.int_to_field(15)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.int_to_field(15)                    # optional: meataxe
             3*y
             sage: F.fetch_int(15)
             3*y
-            sage: C.field_to_int(y)
+            sage: C.field_to_int(y)                     # optional: meataxe
             5
             sage: y.integer_representation()
             5
 
         """
         self.field = field._cache.fetch_int
-    cdef object int_to_field(self, int x):
+    cpdef object int_to_field(self, int x):
         """
         Fetch a python int into the field.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.int_to_field(15)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.int_to_field(15)                    # optional: meataxe
             3*y
             sage: F.fetch_int(15)
             3*y
 
         """
         return self.field(x)
-    cdef int field_to_int(self, x):
+    cpdef int field_to_int(self, x):
         """
         Represent a field element by a python int.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import FieldConverter_class  # optional: meataxe
             sage: F.<y> = GF(125)
-            sage: C = FieldConverter_class(F)
-            sage: C.field_to_int(y)
+            sage: C = FieldConverter_class(F)           # optional: meataxe
+            sage: C.field_to_int(y)                     # optional: meataxe
             5
             sage: y.integer_representation()
             5
@@ -176,14 +166,14 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
     EXAMPLE::
 
-        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+        sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class # optional: meataxe
         sage: F = GF(5)
-        sage: C = PrimeFieldConverter_class(F)
-        sage: C.int_to_field(int(2))
+        sage: C = PrimeFieldConverter_class(F)      # optional: meataxe
+        sage: C.int_to_field(int(2))                # optional: meataxe
         2
         sage: F(2)
         2
-        sage: C.field_to_int(F(2))
+        sage: C.field_to_int(F(2))                  # optional: meataxe
         2
         sage: int(F(2))
         2
@@ -197,29 +187,29 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
-            sage: C.int_to_field(int(2))
+            sage: C = PrimeFieldConverter_class(F)  # optional: meataxe
+            sage: C.int_to_field(int(2))            # optional: meataxe
             2
             sage: F(2)
             2
-            sage: C.field_to_int(F(2))
+            sage: C.field_to_int(F(2))              # optional: meataxe
             2
             sage: int(F(2))
             2
 
         """
         self.field = field
-    cdef object int_to_field(self, int x):
+    cpdef object int_to_field(self, int x):
         """
         Fetch a python int into the field.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
+            sage: C = PrimeFieldConverter_class(F)  # optional: meataxe
             sage: C.int_to_field(int(2))
             2
             sage: F(2)
@@ -227,16 +217,16 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
 
         """
         return IntegerMod_int(self.field, x)
-    cdef int field_to_int(self, x):
+    cpdef int field_to_int(self, x):
         """
         Represent a field element by a python int.
 
         EXAMPLE::
 
-            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class
+            sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
-            sage: C = PrimeFieldConverter_class(F)
-            sage: C.field_to_int(F(2))
+            sage: C = PrimeFieldConverter_class(F)      # optional: meataxe
+            sage: C.field_to_int(F(2))                  # optional: meataxe
             2
             sage: int(F(2))
             2
@@ -268,7 +258,41 @@ cdef FieldConverter_class FieldConverter(field):
             return _converter_cache.setdefault(field, PrimeFieldConverter_class(field))
         return _converter_cache.setdefault(field, FieldConverter_class(field))
 
+######################################
+## Error handling for MeatAxe, to prevent immediate exit of the program
+
+cdef dict ErrMsg = {
+    "Not enough memory": MemoryError,
+    "Time limit exceeded": RuntimeError,
+    "Division by zero": ZeroDivisionError,
+    "Bad file format": IOError,
+    "Bad argument": ValueError,
+    "Argument out of range": IndexError,
+
+    "Matrix not in echelon form": ValueError,
+    "Matrix not square": ArithmeticError,
+    "Incompatible objects": TypeError,
+
+    "Bad syntax, try `-help'": SyntaxError,
+    "Bad usage of option, try `-help'": ValueError,
+    "Bad number of arguments, try `-help'": ValueError,
+
+    "Not a matrix": TypeError,
+    "Not a permutation": TypeError
+}
 
+from cpython.exc cimport PyErr_SetObject
+
+cdef void ErrorHandler(MtxErrorRecord_t *err):
+    PyErr_SetObject(ErrMsg.get(err.Text, SystemError), "{} in file {} (line {})".format(err.Text, err.FileInfo.BaseName, err.LineNo))
+
+MtxSetErrorHandler(ErrorHandler)
+
+######################################
+##
+## Wrapper for MeatAxe matrices
+##
+######################################
 
 cdef class Matrix_gfpn_dense(Matrix_dense):
     r"""
@@ -288,7 +312,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         sage: print M
         [1 2 3]
         [4 0 1]
-        sage: type(M)
+        sage: type(M)     # optional: meataxe
         <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
 
     The documentation of the ``__init__`` methods shows further
@@ -302,10 +326,10 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         """
         TESTS::
 
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
-            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # indirect doctest
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # optional: meataxe
             []
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)  # optional: meataxe
             [0 0 0 0]
             [0 0 0 0]
             [0 0 0 0]
@@ -323,6 +347,16 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.Data = MatAlloc(f, nrows, ncols)
 
     def __dealloc__(self):
+        """
+        TESTS::
+
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)   # optional: meataxe
+            []
+            sage: M = None
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(64,'z'),4), None)  # optional: meataxe
+            sage: del M    # indirect doctest
+        """
         if self.Data != NULL:
             MatFree(self.Data)
             self.Data = NULL
@@ -359,32 +393,32 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         EXAMPLES::
 
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
 
         1. Creating an empty matrix::
 
-            sage: Matrix_gfpn_dense(None)
+            sage: Matrix_gfpn_dense(None)  # optional: meataxe
             []
 
         2. Creating a zero (3x2)-matrix::
 
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(4,'z'),3,2))  # optional: meataxe
             [0 0]
             [0 0]
             [0 0]
 
         3. Creating a matrix from a list or list of lists::
 
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[1,2,3,4,5,6])  # optional: meataxe
             [1 2 3]
             [4 0 1]
-            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])  # indirect doctest
+            sage: Matrix_gfpn_dense(MatrixSpace(GF(5),2,3),[[1,2,3],[4,5,6]])    # optional: meataxe
             [1 2 3]
             [4 0 1]
 
         4. Creating a diagonal matrix::
 
-            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M
+            sage: M = Matrix_gfpn_dense(MatrixSpace(GF(7),5),2); M  # optional: meataxe
             [2 0 0 0 0]
             [0 2 0 0 0]
             [0 0 2 0 0]
@@ -393,24 +427,11 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         5. Creating a matrix from a file in MeatAxe format.
 
-           First, we have to create that file; we use a temporary file,
-           that will be removed when leaving Sage. Note that the method
-           :meth:`msave` must be used, which does not use Python pickling
-           but relies on the intrinsic C--MeatAxe way of saving.
-           ::
-
-            sage: f = tmp_filename()
-            sage: M.msave(f)
-            sage: Matrix_gfpn_dense(f)
-            [2 0 0 0 0]
-            [0 2 0 0 0]
-            [0 0 2 0 0]
-            [0 0 0 2 0]
-            [0 0 0 0 2]
+           This is not tested.
 
         TESTS::
 
-            sage: MS = MatrixSpace(GF(125,'y'),2)
+            sage: MS = MatrixSpace(GF(125,'y'),2)  # indirect doctest
             sage: A = MS(0)
             sage: A.left_kernel()
             Vector space of degree 2 and dimension 2 over Finite Field in y of size 5^3
@@ -528,22 +549,22 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
         EXAMPLES::
 
-            sage: M=MatrixSpace(GF(25,'x')([20*[0],20*[0],[1]+19*[0]])
-            sage: N=copy(M)
+            sage: M = MatrixSpace(GF(25,'x'), 3, 20)([20*[0],20*[0],[1]+19*[0]])
+            sage: N = copy(M)   # indirect doctest
             sage: print N
             [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
             [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-            sage: N==M
+            sage: N== M
             True
             sage: N is M
             False
-            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense
-            sage: M=Matrix_gfpn_dense('')
-            sage: N=copy(M)
-            sage: N
-            Empty MTX matrix
-            sage: N==M
+            sage: from sage.matrix.matrix_gfpn_dense import Matrix_gfpn_dense  # optional: meataxe
+            sage: M = Matrix_gfpn_dense('')   # optional: meataxe
+            sage: N = copy(M)
+            sage: N                         # optional: meataxe
+            []
+            sage: N == M
             True
             sage: N is M
             False
@@ -559,17 +580,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             retval.Data = NULL
         return retval
 
-    ##########################
-    ## Saving should be done via pickling
-    ## However, we keep a method that relies on MeatAxe matsave:
-    def msave(self,f):
-        """
-        M.msave('filename') ==> save matrix into file <filename>
-
-        It can be reloaded with ``Matrix_gfpn_dense('filename')``.
-        """
-        MatSave(self.Data,f)
-
     ## Pickling and string representation is taken care of by implementing get_unsafe
     cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
         """
@@ -578,17 +588,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         TEST::
 
             sage: F.<z> = GF(9)
-            sage: M = MatrixSpace(F,3)(list(F))
-            sage: type(M)
+            sage: M = MatrixSpace(F,3)(sorted(list(F)))
+            sage: type(M)               # optional: meataxe
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
-            sage: M    # indirect doctest
-            [      0     2*z   z + 1]
-            [  z + 2       2       z]
-            [2*z + 2 2*z + 1       1]
+            sage: M                     # indirect doctest
+            [      0       1       2]
+            [      z   z + 1   z + 2]
+            [    2*z 2*z + 1 2*z + 2]
 
         """
         if self.Data == NULL:
             raise IndexError, "Matrix is empty"
+        FfSetField(self.Data.Field)
         return self._converter.int_to_field(FfToInt(FfExtract(MatGetPtr(self.Data,i), j)))
 
     cdef inline int get_unsafe_int(self, Py_ssize_t i, Py_ssize_t j):
@@ -596,12 +607,37 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         # It is essential that you call FfSetField and FfSetNoc YOURSELF
         # and that you assert that the matrix is not empty!
         # This method is here for speed!
-        return FfToInt(FfExtract(FfGetPtr(self.Data.Data,i) ,j))
+        return FfToInt(FfExtract(MatGetPtr(self.Data,i), j))
 
     cdef set_unsafe(self, Py_ssize_t i, Py_ssize_t j, value):
+        """
+        Set values without bound checking.
+
+        TESTS:
+
+        The following test would have failed in a preliminary version
+        of this MeatAxe wrapper::
+
+            sage: K.<x> = GF(125)
+            sage: M = MatrixSpace(K,9,9)()
+            sage: N = MatrixSpace(GF(9,'x'),20).random_element()
+            sage: M[2,2] = x
+            sage: M
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 x 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+            [0 0 0 0 0 0 0 0 0]
+
+        """
         # ASSUMPTION: value's parent is the base ring
         if self.Data == NULL:
             raise IndexError, "Matrix is empty"
+        FfSetField(self.Data.Field)
         FfInsert(MatGetPtr(self.Data,i), j, FfFromInt(self._converter.field_to_int(value)))
 
     cdef set_unsafe_int(self, Py_ssize_t i, Py_ssize_t j, int value):
@@ -625,23 +661,24 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         EXAMPLE::
 
             sage: MS = MatrixSpace(GF(27,'z'),6,6)
-            sage: M = MS.random_element(); M    # indirect doctest
+            sage: M = MS.random_element()       # indirect doctest
+            sage: M                             # optional: meataxe
             [              1           z + 1     z^2 + z + 1             z^2       2*z^2 + z           z + 1]
             [2*z^2 + 2*z + 2   2*z^2 + z + 2         z^2 + 1 2*z^2 + 2*z + 2         z^2 + z   2*z^2 + z + 1]
             [        2*z + 2     z^2 + z + 2           z + 2 2*z^2 + 2*z + 2           2*z^2           2*z^2]
             [  2*z^2 + z + 2             z^2           z + 2         z^2 + z       2*z^2 + 2         z^2 + 2]
             [      2*z^2 + z             2*z 2*z^2 + 2*z + 1       2*z^2 + 1 2*z^2 + 2*z + 1       2*z^2 + z]
             [        2*z + 1         z^2 + z             z^2             z^2     2*z^2 + 2*z           z + 1]
-            sage: type(M)
+            sage: type(M)                           # optional: meataxe
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
-            sage: MS.random_element(nonzero=True)
+            sage: MS.random_element(nonzero=True)   # optional: meataxe
             [            2*z               1   z^2 + 2*z + 1   2*z^2 + z + 1             z^2     z^2 + z + 1]
             [    2*z^2 + 2*z   2*z^2 + z + 2         2*z + 1       z^2 + 2*z     2*z^2 + 2*z             z^2]
             [        z^2 + z     z^2 + z + 2 2*z^2 + 2*z + 1         z^2 + 2               1           2*z^2]
             [              z     2*z^2 + 2*z           2*z^2         2*z + 1           z + 2           z + 2]
             [        z^2 + z             z^2           z + 2     2*z^2 + 2*z         2*z + 1         z^2 + z]
             [    z^2 + z + 2       2*z^2 + z             z^2           z + 1     2*z^2 + 2*z   z^2 + 2*z + 1]
-            sage: MS.random_element(density=0.5)
+            sage: MS.random_element(density=0.5)    # optional: meataxe
             [        z^2 + 2               0   z^2 + 2*z + 2       2*z^2 + z               0     z^2 + z + 2]
             [              0               1               0               0               0               0]
             [  2*z^2 + z + 1   2*z^2 + z + 2               0     z^2 + z + 2               0     z^2 + z + 1]
@@ -723,41 +760,42 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     FfStepPtr(&(x))
                 sig_off()
 
-    def show_contents(self, r=None):
-        FfSetField(self.Data.Field)
-        FfSetNoc(self.Data.Noc)
-        cdef PTR p
-        cdef size_t i, j
-        if r is not None:
-            r_min = r
-            r_max = r+1
-        else:
-            r_min = 0
-            r_max = self.Data.Nor
-        for i in range(r_min, r_max):
-            p = FfGetPtr(self.Data.Data, i)
-            for j from 0<=j<self.Data.RowSize:
-                print "%3.3d"%p[j],
-            print
+## Debugging
+#    def show_contents(self, r=None):
+#        FfSetField(self.Data.Field)
+#        FfSetNoc(self.Data.Noc)
+#        cdef PTR p
+#        cdef size_t i, j
+#        if r is not None:
+#            r_min = r
+#            r_max = r+1
+#        else:
+#            r_min = 0
+#            r_max = self.Data.Nor
+#        for i in range(r_min, r_max):
+#            p = FfGetPtr(self.Data.Data, i)
+#            for j from 0<=j<self.Data.RowSize:
+#                print "%3.3d"%p[j],
+#            print
 
 ##################
 ## comparison
     cpdef int _cmp_(left, Element right) except -2:
         """
-        Compare two Matrix_gfpn_dense matrices
+        Compare two :class:`Matrix_gfpn_dense` matrices
 
         Of course, '<' and '>' doesn't make much sense for matrices.
 
         EXAMPLES::
 
-            sage: M = MatrixSpace(GF(125,'x'),[20*[0],20*[0],[1]+19*[0]])
+            sage: M = MatrixSpace(GF(125,'x'),3,20)([20*[0],20*[0],[1]+19*[0]])
             sage: N = copy(M)
             sage: M == N
             True
             sage: M != N
             False
-            sage: print M < N
-            None
+            sage: M < N
+            False
             sage: N[2,19] = 1
             sage: M == N
             False
@@ -766,8 +804,17 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         """
         cdef Matrix_gfpn_dense self = left
         cdef Matrix_gfpn_dense N = right
+        if self is None or N is None:
+            return -1
         cdef char* d1
         cdef char* d2
+        if self.Data == NULL:
+            if N.Data == NULL:
+                return 0
+            else:
+                return 1
+        elif N.Data == NULL:
+            return -1
         if self.Data.Field != N.Data.Field:
             if self.Data.Field > N.Data.Field:
                 return 1
@@ -790,11 +837,12 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             return -1
         return 0
 
-    def _rowlist_(self, i, j=-1):
+    cdef list _rowlist_(self, i, j=-1):
         "M._rowlist_(i): Return row <i> as a list of python ints"
         cdef int k
         if self.Data:
             FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
         else:
             raise ValueError("Matrix is empty")
         if (i<0) or (i>=self.Data.Nor):
@@ -813,6 +861,17 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return L
 
     def _list(self):
+        """
+        Return a flat list of all entries of this matrix.
+
+        The result is cached.
+
+        EXAMPLES::
+
+            sage: MatrixSpace(GF(9,'x'),3)(sorted(list(GF(9,'x')))).list()  # indirect doctest
+            [0, 1, 2, x, x + 1, x + 2, 2*x, 2*x + 1, 2*x + 2]
+
+        """
         cdef list x = self.fetch('list')
         if not x is None:
             return x
@@ -825,34 +884,135 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise IndexError, "Matrix is empty"
         cdef PTR p
         p = self.Data.Data
+        sig_on()
         for i from 1<=i<self.Data.Nor:
             x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
             FfStepPtr(&(p))
         x.extend([self._converter.int_to_field(FfToInt(FfExtract(p,j))) for j in range(self.Data.Noc)])
+        sig_off()
         self.cache('list', x)
         return x
 
 #########################
 ## Arithmetics
     cdef rescale_row_c(self, Py_ssize_t i, s, Py_ssize_t start_col):
+        """
+        Rescale row number `i` in-place by multiplication with the scalar `s`.
+
+        The argument ``start_col`` is ignored. The scalar `s` is
+        converted into the base ring.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.rescale_row(1, 3)   # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 3 3*x + 1 3*x + 4 3*x + 2]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.rescale_row(4, x)
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 3 3*x + 1 3*x + 4 3*x + 2]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [4*x + 2       2   x + 2 2*x + 2 3*x + 2]
+
+        """
         if start_col != 0 or self.Data == NULL:
             raise ValueError("We can only rescale a full row of a non-empty matrix")
         FfMulRow(MatGetPtr(self.Data, i), FfFromInt(self._converter.field_to_int(self._base_ring(s))))
 
     cdef add_multiple_of_row_c(self,  Py_ssize_t row_to, Py_ssize_t row_from, multiple, Py_ssize_t start_col):
+        """
+        Add the ``multiple``-fold of row ``row_from`` in-place to row ``row_to``.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.add_multiple_of_row(2, 4, x)  # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [  x + 2 2*x + 3 3*x + 4     4*x       1]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+
+        """
         if start_col != 0 or self.Data == NULL:
             raise ValueError("We can only rescale a full row of a non-empty matrix")
         FfAddMulRow(MatGetPtr(self.Data, row_to), MatGetPtr(self.Data, row_from), FfFromInt(self._converter.field_to_int(self._base_ring(multiple))))
 
     cdef swap_rows_c(self, Py_ssize_t row1, Py_ssize_t row2):
+        """
+        Swap the rows ``row1`` and ``row2`` in-place.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K,5,5)(sorted(list(K)))
+            sage: M
+            [      0       1       2       3       4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+            sage: M.swap_rows(1, 3)    # indirect doctest
+            sage: M
+            [      0       1       2       3       4]
+            [    3*x 3*x + 1 3*x + 2 3*x + 3 3*x + 4]
+            [    2*x 2*x + 1 2*x + 2 2*x + 3 2*x + 4]
+            [      x   x + 1   x + 2   x + 3   x + 4]
+            [    4*x 4*x + 1 4*x + 2 4*x + 3 4*x + 4]
+
+        """
         FfSwapRows(MatGetPtr(self.Data, row1), MatGetPtr(self.Data, row2))
 
     def trace(self):
+        """
+        Trace of this matrix, i.e., the sum of diagonal elements.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(125)
+            sage: MatrixSpace(K,7,7)(x).trace()
+            2*x
+
+        """
         if self._nrows != self._ncols:
             raise ValueError, "self must be a square matrix"
         return self._converter.int_to_field(FfToInt(MatTrace(self.Data)))
 
     def stack(self, Matrix_gfpn_dense other):
+        """
+        Stack two matrices of the same number of columns.
+
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),1,9)(sorted(list(GF(9,'x'))))
+            sage: M
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+            sage: M.stack(M)
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+            [      0       1       2       x   x + 1   x + 2     2*x 2*x + 1 2*x + 2]
+
+        """
         if self._ncols != other._ncols:
             raise TypeError("Both numbers of columns must match.")
         if self._nrows == 0 or self.Data == NULL:
@@ -866,6 +1026,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return OUT
 
     cpdef ModuleElement _add_(self, ModuleElement right):
+        """
+        TESTS::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: N = MatrixSpace(K,3,3)(2*x)
+            sage: M+N           # indirect doctest
+            [    2*x       1       2]
+            [      x       1   x + 2]
+            [    2*x 2*x + 1   x + 2]
+
+        """
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -873,12 +1045,25 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         if Self.Data == NULL or Right.Data == NULL:
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
+        Left._cache = {}
         if MatAdd(Left.Data, Right.Data) != NULL:
             return Left
         else:
             raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
+        """
+        TESTS::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: N = MatrixSpace(K,3,3)(2*x)
+            sage: M-N    # indirect doctest
+            [      x       1       2]
+            [      x 2*x + 1   x + 2]
+            [    2*x 2*x + 1       2]
+
+        """
         cdef Matrix_gfpn_dense Self = self
         cdef Matrix_gfpn_dense Right = right
         assert Self is not None
@@ -887,30 +1072,78 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._is_immutable = False
+        Left._cache = {}
         if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
             return Left
         else:
             raise ArithmeticError, "Matrix sizes or fields not compatible"
 
     def __neg__(self):
+        """
+        TESTS::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: -M
+            [      0       2       1]
+            [    2*x 2*x + 2 2*x + 1]
+            [      x   x + 2   x + 1]
+
+        ::
+
+            sage: M = MatrixSpace(GF(125,'x'),10,30).random_element()
+            sage: N = MatrixSpace(GF(125,'x'),10,30).random_element()
+            sage: M + (-N) == M - N == -(N - M)
+            True
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         return self._rmul_(self._base_ring(-1))
 
     cpdef ModuleElement _rmul_(self, RingElement left):
+        """
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(list(K))
+            sage: x*M    # indirect doctest
+            [      0   x + 1 2*x + 1]
+            [      2     2*x 2*x + 2]
+            [  x + 2       1       x]
+            sage: -M == (-1)*M
+            True
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
+        OUT._cache = {}
         if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     cpdef ModuleElement _lmul_(self, RingElement right):
+        """
+        EXAMPLES::
+
+            sage: M = MatrixSpace(GF(9,'x'),3,3)(sorted(list(GF(9,'x'))))
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: x*M    # indirect doctest
+            [      0       x     2*x]
+            [  x + 1 2*x + 1       1]
+            [2*x + 2       2   x + 2]
+            sage: -M == (-1)*M
+            True
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
+        OUT._cache = {}
         if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
             return OUT
         raise ArithmeticError("Matrix sizes or fields not compatible")
@@ -922,6 +1155,21 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         return 0
 
     cpdef Matrix_gfpn_dense _multiply_classical(Matrix_gfpn_dense self, Matrix_gfpn_dense right):
+        """
+        Multiplication using the cubic school book multiplication algorithm.
+
+        EXAMPLES:
+
+        Since by default the asymptotically faster Strassen-Winograd
+        multiplication algorithm is used, the following is a valid
+        consistency check::
+
+            sage: M = MatrixSpace(GF(9,'x'),1000,500).random_element()
+            sage: N = MatrixSpace(GF(9,'x'),500,2000).random_element()
+            sage: M*N == M._multiply_classical(N)                       # optional: meataxe
+            True
+
+        """
         "multiply two meataxe matrices by the school book algorithm"
         if self.Data == NULL or right.Data == NULL:
             raise ValueError("The matrices must not be empty")
@@ -943,8 +1191,26 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 
     cpdef Matrix_gfpn_dense _multiply_strassen(Matrix_gfpn_dense self, Matrix_gfpn_dense right, cutoff=0):
         """
-        cutoff is NOT the number of rows/columns, but the rowsize expressed in bytes.
-        If `cutoff==0` then the default ``sizeof(long)^2/2`` is chosen.
+        Matrix multiplication using the asymptotically fast Strassen-Winograd algorithm.
+
+        INPUT:
+
+        - ``right`` -- a matrix of dimensions suitable to do multiplication
+        - ``cutoff`` (optional integer) -- indicates the minimal size of submatrices
+          that will be considered in the divide-and-conquer algorithm. The size is
+          *not* expressed by the number of rows/columns, but the rowsize expressed
+          in bytes. Depending on the base field, one byte may represent up to eight
+          entries in a matrix row. The default is ``sizeof(long)^2/2`` byte.
+
+        EXAMPLES:
+
+        We test that different cutoffs yield the same result::
+
+            sage: M = MatrixSpace(GF(9,'x'),1500,600).random_element()
+            sage: N = MatrixSpace(GF(9,'x'),600,1500).random_element()
+            sage: M._multiply_strassen(N) == M._multiply_strassen(N,80) == M._multiply_strassen(N,2) # optional: meataxe
+            True
+
         """
         if self.Data == NULL or right.Data == NULL:
             raise ValueError("The matrices must not be empty")
@@ -972,48 +1238,76 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         else:
             r = FfFromInt(n)
         left = self.__copy__()
+        left._cache = {}
         if MatMulScalar(left.Data, r) != NULL:
             return left
         raise ArithmeticError("Matrix sizes or fields not compatible")
 
     def __div__(Matrix_gfpn_dense self, p):
-        "divide an MTX matrix by a field element represented by an integer"
+        """
+        Divide a matrix by a scalar.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K,3,3)(sorted(list(K)))
+            sage: M
+            [      0       1       2]
+            [      x   x + 1   x + 2]
+            [    2*x 2*x + 1 2*x + 2]
+            sage: M/2                   # indirect doctest
+            [      0       2       1]
+            [    2*x 2*x + 2 2*x + 1]
+            [      x   x + 2   x + 1]
+            sage: M/x
+            [      0   x + 2 2*x + 1]
+            [      1       x 2*x + 2]
+            [      2   x + 1     2*x]
+
+        """
         if self.Data == NULL:
             return self.__copy__()
         if not p:
             raise ZeroDivisionError
         if p not in self._base_ring:
             raise ValueError("{} is not a scalar".format(p))
+        p = self._base_ring(p)
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
-        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
-        if MatMulScalar(OUT.Data, r) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
-
-    def __pow__(Matrix_gfpn_dense self, n, ignored):
-        "M.__pow__(n): return M^n"
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        if not self.is_square():
-            raise ArithmeticError("self must be a square matrix")
-        if ignored is not None:
-            raise RuntimeError("__pow__ third argument not used")
-        cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
-        cdef Matrix_gfpn_dense SELFINV
-        OUT._is_immutable = False
         OUT._cache = {}
-        if n>=0:
-            OUT.Data = MatPower(self.Data,n)
-        else:
-            SELFINV = self.__invert__()
-            OUT.Data = MatPower(SELFINV.Data,-n)
-        if OUT.Data != NULL:
-            return OUT
-        raise ArithmeticError("Failure in exponentiating a matrix")
+        cdef FEL r = mtx_tmultinv[FfFromInt(self._converter.field_to_int(p))]
+        MatMulScalar(OUT.Data, r)
+        return OUT
 
     def __invert__(Matrix_gfpn_dense self):
-        "M__invert__(): return M^(-1)"
+        """
+        Multiplicative inverse of this matrix (if available)
+
+        TESTS::
+
+            sage: MS = MatrixSpace(GF(9,'x'),500)
+            sage: while 1:
+            ....:     M = MS.random_element()
+            ....:     if M.rank() == 500:
+            ....:         break
+            sage: Minv = ~M    # indirect doctest
+            sage: Minv*M == M*Minv == 1
+            True
+
+        We use the occasion to demonstrate that errors in MeatAxe are
+        correctly handled in Sage::
+
+            sage: MS = MatrixSpace(GF(25,'x'),5)
+            sage: while 1:
+            ....:     M = MS.random_element(density=0.4)
+            ....:     if M.rank() < 5:
+            ....:         break
+            sage: ~M                    # optional: meataxe
+            Traceback (most recent call last):
+            ...
+            ZeroDivisionError: Division by zero in file matinv.c (line 50)
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         if not self.is_square():
@@ -1021,22 +1315,61 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense OUT = self._new(self._nrows, self._ncols)
         OUT._is_immutable = False
         OUT._cache = {}
-        OUT.Data = MatInverse(self.Data)
+        sig_on()
+        try:
+            OUT.Data = MatInverse(self.Data)
+        except:
+            sig_off()
+            raise
+        sig_off()
         if OUT.Data != NULL:
             return OUT
         raise ArithmeticError("This matrix is not invertible")
 
     def transpose(Matrix_gfpn_dense self):
+        """
+        Return the transposed matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(9)
+            sage: M = MatrixSpace(K, 2,4)(sorted(list(K)[1:]))
+            sage: M
+            [      1       2       x   x + 1]
+            [  x + 2     2*x 2*x + 1 2*x + 2]
+            sage: M.transpose()
+            [      1   x + 2]
+            [      2     2*x]
+            [      x 2*x + 1]
+            [  x + 1 2*x + 2]
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._rows)
+        cdef Matrix_gfpn_dense OUT = self._new(self._ncols, self._nrows)
         OUT._is_immutable = False
         OUT._cache = {}
         OUT.Data = MatTransposed(self.Data)
         return OUT
 
     def order(self):
-        "M.order(): return multiplicative order of M"
+        """
+        Return the multiplicative order of this matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(27)
+            sage: M = MatrixSpace(K, 4)([2*x^2 + 2*x, 2*x^2 + x, 2*x^2 + x + 1,
+            ....: x^2 + x + 2, x + 2, x^2, 2*x + 2, 2*x^2 + 2*x, 2*x^2 + 1,
+            ....: 1, 2, x^2 + 2*x + 1, x^2 + x + 2, x + 1, 2*x^2 + 2*x, x^2 + x])
+            sage: M.order()                 # optional: meataxe
+            104
+            sage: M^104 == 1
+            True
+            sage: M^103 == 1
+            False
+
+        """
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
         if (self.Data.Nor <> self.Data.Noc):
@@ -1050,23 +1383,49 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
 ###################
 ## Gauss algorithm
 
-    def nullity(self):
-        "M.nullity(): return the nullity of M"
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        return MatNullity(self.Data)
-
     def left_kernel_matrix(self):
-        """M.left_kernel_matrix(): return the null space of M
+        """
+        Return the null space of this matrix, represented as a matrix.
+
+        NOTE:
+
+        - For a matrix `M`, ``M.left_kernel_matrix()*M`` is a null matrix.
+        - The command `M.left_kernel()` uses a generic implementation in Sage,
+          that relies on computing the echelon form of the transposed
+          matrix. This method however uses a MeatAxe function to compute
+          the left kernel matrix.
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K, 10)()
+            sage: entries = [((0, 2), x), ((0, 4), 3*x + 2),
+            ....: ((0, 8), 2*x), ((1, 1), x + 3), ((1, 5), 3*x),
+            ....: ((1, 6), x + 4), ((2, 3), 2*x), ((2, 5), 4*x + 1),
+            ....: ((2, 6), 4), ((3, 4), x + 4), ((3, 5), x + 1),
+            ....: ((5, 5), 3*x), ((5, 7), x + 3), ((6, 1), x),
+            ....: ((6, 2), x + 1), ((6, 5), x + 1), ((8, 2), 4),
+            ....: ((8, 8), 4), ((8, 9), x + 3), ((9, 8), 4*x + 2)]
+            sage: for (i,j),v in entries: M[i,j] = v
+            sage: M.left_kernel()
+            Vector space of degree 10 and dimension 2 over Finite Field in x of size 5^2
+            Basis matrix:
+            [0 0 0 0 1 0 0 0 0 0]
+            [0 0 0 0 0 0 0 1 0 0]
+            sage: M.left_kernel_matrix()    # optional: meataxe
+            [0 0 0 0 1 0 0 0 0 0]
+            [0 0 0 0 0 0 0 1 0 0]
 
-        M.left_kernel_matrix()*M is a null matrix
         """
+        cdef Matrix_gfpn_dense OUT = self.fetch("left_kernel_matrix")
+        if OUT is not None:
+            return OUT
         if self.Data == NULL:
             raise ValueError("The matrix must not be empty")
-        cdef Matrix_gfpn_dense OUT = type(self).__new__(type(self))
+        OUT = type(self).__new__(type(self))
         OUT.Data = MatNullSpace(self.Data)
         if OUT.Data == NULL:
-            return OUT
+            raise ArithmeticError("Error computing left kernel matrix")
         OUT._nrows = OUT.Data.Nor
         OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
@@ -1074,26 +1433,97 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         OUT._base_ring = self._base_ring
         OUT._converter = self._converter
         OUT._cache = {}
+        self.cache("left_kernel_matrix", OUT)
         return OUT
 
-    def lead(self):
+    def _echelon_in_place_classical(self, reduced=True):
         """
-(f,i) = M.lead() <=> f=M[0,i] is the first non-zero coefficient in the first row of M
+        Change this matrix into echelon form, using classical Gaussian elimination.
 
-If the first row of M has no non-zero entry then f==0
-        """
-        cdef int i
-        cdef int fe
-        if self.Data == NULL:
-            raise ValueError("The matrix must not be empty")
-        FfSetField(self.Data.Field)
-        for i from 0 <= i < self.Data.Noc:
-            fe = FfToInt(FfExtract(self.Data.Data,i))
-            if fe:
-                return fe, i
-        return 0, self.Data.Noc
+        INPUT:
 
-    def _echelon_in_place_classical(self, reduced=True):
+        - ``reduced`` (optional, default ``True``) -- will result
+          in the row-reduced echelon form (otherwise, only a
+          semi-echelon form results).
+
+        EXAMPLES::
+
+            sage: K.<x> = GF(25)
+            sage: M = MatrixSpace(K, 10)()
+            sage: entries = [((0, 2), x), ((0, 4), 3*x + 2),
+            ....: ((0, 8), 2*x), ((1, 1), x + 3), ((1, 5), 3*x),
+            ....: ((1, 6), x + 4), ((2, 3), 2*x), ((2, 5), 4*x + 1),
+            ....: ((2, 6), 4), ((3, 4), x + 4), ((3, 5), x + 1),
+            ....: ((5, 5), 3*x), ((5, 7), x + 3), ((6, 1), x),
+            ....: ((6, 2), x + 1), ((6, 5), x + 1), ((8, 2), 4),
+            ....: ((8, 8), 4), ((8, 9), x + 3), ((9, 8), 4*x + 2)]
+            sage: for (i,j),v in entries: M[i,j] = v
+            sage: M
+            [      0       0       x       0 3*x + 2       0       0       0     2*x       0]
+            [      0   x + 3       0       0       0     3*x   x + 4       0       0       0]
+            [      0       0       0     2*x       0 4*x + 1       4       0       0       0]
+            [      0       0       0       0   x + 4   x + 1       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0     3*x       0   x + 3       0       0]
+            [      0       x   x + 1       0       0   x + 1       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       4       0       0       0       0       0       4   x + 3]
+            [      0       0       0       0       0       0       0       0 4*x + 2       0]
+            sage: M.echelon_form()   # indirect doctest
+            [      0       1       0       0       0       0       0       0       0 4*x + 4]
+            [      0       0       1       0       0       0       0       0       0 4*x + 2]
+            [      0       0       0       1       0       0       0       0       0 3*x + 4]
+            [      0       0       0       0       1       0       0       0       0 3*x + 3]
+            [      0       0       0       0       0       1       0       0       0 2*x + 3]
+            [      0       0       0       0       0       0       1       0       0       x]
+            [      0       0       0       0       0       0       0       1       0 2*x + 2]
+            [      0       0       0       0       0       0       0       0       1       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        A semi-echelon form can be produced by invoking the single-underscore
+        method directly::
+
+            sage: N = copy(M)
+            sage: N._echelon_in_place_classical(reduced=False)      # optional: meataxe
+            sage: N                                                 # optional: meataxe
+            [      0       0       x       0 3*x + 2       0       0       0     2*x       0]
+            [      0   x + 3       0       0       0     3*x   x + 4       0       0       0]
+            [      0       0       0     2*x       0 4*x + 1       4       0       0       0]
+            [      0       0       0       0   x + 4   x + 1       0       0       0       0]
+            [      0       0       0       0       0     3*x       0   x + 3       0       0]
+            [      0       0       0       0       0       0 2*x + 2     4*x 3*x + 3       0]
+            [      0       0       0       0       0       0       0   x + 1       1   x + 3]
+            [      0       0       0       0       0       0       0       0 4*x + 2       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        TESTS:
+
+        We verify that the above echelon form is consistent with Sage's generic
+        implementation of dense matrices::
+
+            sage: type(M)                           # optional: meataxe
+            <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
+            sage: MS = M.parent()
+            sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+            sage: MS._MatrixSpace__matrix_class = Matrix_generic_dense
+            sage: X = MS(M._list())
+            sage: type(X)
+            <type 'sage.matrix.matrix_generic_dense.Matrix_generic_dense'>
+            sage: X.echelon_form()
+            [      0       1       0       0       0       0       0       0       0 4*x + 4]
+            [      0       0       1       0       0       0       0       0       0 4*x + 2]
+            [      0       0       0       1       0       0       0       0       0 3*x + 4]
+            [      0       0       0       0       1       0       0       0       0 3*x + 3]
+            [      0       0       0       0       0       1       0       0       0 2*x + 3]
+            [      0       0       0       0       0       0       1       0       0       x]
+            [      0       0       0       0       0       0       0       1       0 2*x + 2]
+            [      0       0       0       0       0       0       0       0       1       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+            [      0       0       0       0       0       0       0       0       0       0]
+
+        """
         if self._nrows == 0 or self._ncols == 0:
             self.cache('in_echelon_form',True)
             self.cache('rank', 0)
@@ -1110,6 +1540,7 @@ If the first row of M has no non-zero entry then f==0
         self.cache('rank', r)
         # Next, we do permutations to achieve the reduced echelon form,
         # if requested.
+        sig_on()
         if reduced:
             pivs = [(self.Data.PivotTable[i],i) for i in range(r)]
             pivs.sort()
@@ -1146,6 +1577,7 @@ If the first row of M has no non-zero entry then f==0
             self.Data.Data = <PTR>check_realloc(self.Data.Data, FfCurrentRowSize*self._nrows)
             memset(self.Data.Data + FfCurrentRowSize*self.Data.Nor, FF_ZERO, FfCurrentRowSize*(self._nrows-self.Data.Nor))
             self.Data.Nor = self._nrows
+        sig_off()
         self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
         self.cache('in_echelon_form',True)
 

From c29d1f87ad8f397b89e46f20ffdf703994122de4 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Wed, 23 Sep 2015 00:07:33 +0200
Subject: [PATCH 18/23] Fix computation of row-reduced echelon form

---
 src/sage/libs/meataxe.pxd             |  1 +
 src/sage/matrix/matrix_gfpn_dense.pyx | 44 ++++++++++++++++++---------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index 79a8d03448d..fc76bfc781e 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -65,6 +65,7 @@ cdef extern from "meataxe.h":
     void FfMulRow(PTR row, FEL mark)
     void FfAddMulRow(PTR dest, PTR src, FEL f)
     PTR FfAddRow(PTR dest, PTR src)
+    PTR FfSubRow(PTR dest, PTR src)
     FEL FfExtract(PTR row, int col)
     void FfInsert(PTR row, int col, FEL mark)
     int FfFindPivot(PTR row, FEL *mark)
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index dc0c978c8ef..5ab82de2f83 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1523,6 +1523,18 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             [      0       0       0       0       0       0       0       0       0       0]
             [      0       0       0       0       0       0       0       0       0       0]
 
+        The following was a problem in a preliminary version of the code::
+
+            sage: K.<a> = GF(25)
+            sage: M = MatrixSpace(K, 2, 4)([4, 4, 1, 0, 0, 2*a+1, a+2, 1])
+            sage: M
+            [      4       4       1       0]
+            [      0 2*a + 1   a + 2       1]
+            sage: M.echelonize()
+            sage: M
+            [      1       0 3*a + 4 2*a + 2]
+            [      0       1     2*a 3*a + 3]
+
         """
         if self._nrows == 0 or self._ncols == 0:
             self.cache('in_echelon_form',True)
@@ -1546,7 +1558,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             pivs.sort()
             if pivs != [(self.Data.PivotTable[i],i) for i in range(r)] or self.Data.Nor < self._nrows:
                 # We copy the row one by one, sorting their pivot positions
-                # and scaling the pivot to one.
                 old = self.Data.Data
                 self.Data.Data = FfAlloc(self._nrows)
                 for i, (pos,j) in enumerate(pivs):
@@ -1554,22 +1565,25 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
                     dest = self.Data.Data+FfCurrentRowSize*i
                     memcpy(dest, old+FfCurrentRowSize*j, FfCurrentRowSize)
                     self.Data.PivotTable[i] = pos
-                    piv = FfExtract(dest, pos)
-                    assert piv!=FF_ZERO
-                    if piv != FF_ONE:
-                        FfMulRow(dest, mtx_tmultinv[piv])
                 free(old)
                 self.Data.Nor = self._nrows
-                # Finally, we annulate everything above the pivots
-                # (currently, we only know that the matrix is zero
-                # below the pivots).
-                for i from 1 <= i < r:
-                    src = MatGetPtr(self.Data, i)
-                    for j from 0 <= j < i:
-                        dest = MatGetPtr(self.Data, j)
-                        piv = FfExtract(dest, self.Data.PivotTable[i])
-                        if piv != FF_ZERO:
-                            FfAddMulRow(dest, src, mtx_taddinv[piv])
+            # Now, the pivot columns are strictly increasing.
+            # We now normalize each row, and annulate everything
+            # above the pivot (currently, we only know that the matrix
+            # is zero below the pivots).
+            for i from 0 <= i < r:
+                src = MatGetPtr(self.Data, i)
+                piv = FfExtract(src, self.Data.PivotTable[i])
+                assert piv!=FF_ZERO
+                if piv != FF_ONE:
+                    FfMulRow(src, mtx_tmultinv[piv])
+                for j from 0 <= j < i:
+                    dest = MatGetPtr(self.Data, j)
+                    piv = FfExtract(dest, self.Data.PivotTable[i])
+                    if piv != FF_ONE:
+                        FfAddMulRow(dest, src, mtx_taddinv[piv])
+                    else:
+                        FfSubRow(dest, src)
         elif self.Data.Nor < self._nrows:
             # Some rows may have vanished. In SageMath, we
             # want that the number of rows does not change,

From f816e41dfe0cd77310de4b8010bdfee975b2153e Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Wed, 23 Sep 2015 00:55:03 +0200
Subject: [PATCH 19/23] Fix doctests when meataxe is installed

---
 src/sage/matrix/constructor.py        |  9 +++++++--
 src/sage/matrix/matrix2.pyx           | 22 ++++++++++++++++------
 src/sage/matrix/matrix_gfpn_dense.pyx |  2 +-
 src/sage/matrix/matrix_space.py       |  2 +-
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/sage/matrix/constructor.py b/src/sage/matrix/constructor.py
index b6a93e5abd6..d45f3de2c38 100644
--- a/src/sage/matrix/constructor.py
+++ b/src/sage/matrix/constructor.py
@@ -1018,7 +1018,7 @@ def random_matrix(ring, nrows, ncols=None, algorithm='randomize', *args, **kwds)
          eigenvectors, if computed by hand, will have only integer
          entries.
 
-    -  ``*args, **kwds`` - arguments and keywords to describe additional 
+    -  ``*args, **kwds`` - arguments and keywords to describe additional
        properties. See more detailed documentation below.
 
     .. warning::
@@ -1175,9 +1175,14 @@ def random_matrix(ring, nrows, ncols=None, algorithm='randomize', *args, **kwds)
 
     The default implementation of :meth:`~sage.matrix.matrix2.randomize` relies
     on the ``random_element()`` method for the base ring.  The ``density`` and
-    ``sparse`` keywords behave as described above. ::
+    ``sparse`` keywords behave as described above. Since we have a different
+    randomisation when using the optional meataxe package, we have to make sure
+    that we use the default implementation in this test::
 
         sage: K.<a>=FiniteField(3^2)
+        sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+        sage: MS = MatrixSpace(K, 2, 5)
+        sage: MS._MatrixSpace__matrix_class = Matrix_generic_dense
         sage: random_matrix(K, 2, 5)
         [      1       a       1 2*a + 1       2]
         [    2*a   a + 2       0       2       1]
diff --git a/src/sage/matrix/matrix2.pyx b/src/sage/matrix/matrix2.pyx
index f3623328eb3..294491797b4 100644
--- a/src/sage/matrix/matrix2.pyx
+++ b/src/sage/matrix/matrix2.pyx
@@ -779,7 +779,7 @@ cdef class Matrix(matrix1.Matrix):
             36.0000000000000
 
         The permanent above is directed to the Sloane's sequence :oeis:`A079908`
-        ("The Dancing School Problems") for which the third term is 36: 
+        ("The Dancing School Problems") for which the third term is 36:
 
         ::
 
@@ -3352,8 +3352,6 @@ cdef class Matrix(matrix1.Matrix):
             verbose ...
             verbose 1 (<module>) computing right kernel matrix over an arbitrary field for 3x4 matrix
             ...
-            verbose 1 (<module>) done computing right kernel matrix over an arbitrary field for 3x4 matrix
-            ...
             Vector space of degree 4 and dimension 2 over Finite Field in a of size 5^2
             Basis matrix:
             [      1       0 3*a + 4 2*a + 2]
@@ -3800,13 +3798,25 @@ cdef class Matrix(matrix1.Matrix):
             [      0       1     2*a 3*a + 3]
             sage: A*K.basis_matrix().transpose() == zero_matrix(F, 3, 2)
             True
-            sage: B = copy(A)
+
+        In the following test, we have to force usage of
+        :class:`~sage.matrix.matrix_generic_dense.Matrix_generic_dense`,
+        since the option ``basis = 'pivot'`` would simply yield the same
+        result as the previous test, if the optional meataxe package is
+        installed. ::
+
+            sage: from sage.matrix.matrix_generic_dense import Matrix_generic_dense
+            sage: B = Matrix_generic_dense(A.parent(), A.list(), False, False)
             sage: P = B.right_kernel(basis = 'pivot'); P
             Vector space of degree 4 and dimension 2 over Finite Field in a of size 5^2
             User basis matrix:
             [      4       4       1       0]
             [  a + 2 3*a + 3       0       1]
-            sage: B*P.basis_matrix().transpose() == zero_matrix(F, 3, 2)
+
+        If the optional meataxe package is installed, we again have to make sure
+        to work with a copy of B that has the same type as ``P.basis_matrix()``::
+
+            sage: B.parent()(B.list())*P.basis_matrix().transpose() == zero_matrix(F, 3, 2)
             True
             sage: K == P
             True
@@ -8479,7 +8489,7 @@ explicitly setting the argument to `True` or `False` will avoid this message."""
 
             sage: filename = tmp_filename(ext='.png')
             sage: img.save(filename)
-            sage: open(filename).read().startswith('\x89PNG') 
+            sage: open(filename).read().startswith('\x89PNG')
             True
         """
         cdef int x, y, _x, _y, v, bi, bisq
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 5ab82de2f83..b7ec01a0b14 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -1436,7 +1436,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.cache("left_kernel_matrix", OUT)
         return OUT
 
-    def _echelon_in_place_classical(self, reduced=True):
+    def _echelon_in_place_classical(self, reduced=True, **kwds):
         """
         Change this matrix into echelon form, using classical Gaussian elimination.
 
diff --git a/src/sage/matrix/matrix_space.py b/src/sage/matrix/matrix_space.py
index 22a4e591216..87f0ef311b9 100644
--- a/src/sage/matrix/matrix_space.py
+++ b/src/sage/matrix/matrix_space.py
@@ -988,7 +988,7 @@ def _get_matrix_class(self):
             sage: type(matrix(GF(2), 2, range(4)))
             <type 'sage.matrix.matrix_mod2_dense.Matrix_mod2_dense'>
             sage: type(matrix(GF(64,'z'), 2, range(4)))
-            <type 'sage.matrix.matrix_mod2e_dense.Matrix_mod2e_dense'>
+            <type 'sage.matrix.matrix_gf2e_dense.Matrix_gf2e_dense'>
             sage: type(matrix(GF(125,'z'), 2, range(4)))
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
         """

From 80af75e962f077169a64191d5b9a7db5aa7cc1a7 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sat, 26 Sep 2015 00:48:58 +0200
Subject: [PATCH 20/23] Use and propagate specific return values on error in
 matrix-related MeatAxe functions.

---
 .../meataxe/patches/UseErrorPropagation.patch | 1147 +++++++++++++++++
 src/sage/libs/meataxe.pxd                     |   14 +-
 2 files changed, 1156 insertions(+), 5 deletions(-)
 create mode 100644 build/pkgs/meataxe/patches/UseErrorPropagation.patch

diff --git a/build/pkgs/meataxe/patches/UseErrorPropagation.patch b/build/pkgs/meataxe/patches/UseErrorPropagation.patch
new file mode 100644
index 00000000000..00745e8b967
--- /dev/null
+++ b/build/pkgs/meataxe/patches/UseErrorPropagation.patch
@@ -0,0 +1,1147 @@
+In functions that appear in matrix arithmetic, use specific return values
+on error, and propagate errors. This is *not* done in other parts of
+MeatAxe (e.g., not for greased matrices or polynomials) and not for
+standalone programs.
+
+AUTHOR:
+
+- Simon King, 2015-09-26
+
+diff --git a/src/cfinfo.c b/src/cfinfo.c
+index 293526b..9c1a004 100644
+--- a/src/cfinfo.c
++++ b/src/cfinfo.c
+@@ -215,7 +215,7 @@ int Lat_ReadInfo(Lat_Info *li, const char *basename)
+ 	    }
+ 	    for (i = 0; i < li->NCf; ++i)
+ 	    {
+-		ReadWord(f,&(li->Cf[i].idword),&(li->Cf[i].idpol),fn);
++		if (!ReadWord(f,&(li->Cf[i].idword),&(li->Cf[i].idpol),fn)) return -1;
+ 		if (StfMatch(f,i < li->NCf - 1 ? "," : "];") != 0)
+ 		{
+ 		    MTX_ERROR2("%s: %E",fn,MTX_ERR_FILEFMT);
+@@ -232,7 +232,7 @@ int Lat_ReadInfo(Lat_Info *li, const char *basename)
+ 	    }
+ 	    for (i = 0; i < li->NCf; ++i)
+ 	    {
+-		ReadWord(f,&(li->Cf[i].peakword),&(li->Cf[i].peakpol),fn);
++		if (!ReadWord(f,&(li->Cf[i].peakword),&(li->Cf[i].peakpol),fn)) return -1;
+ 		if (StfMatch(f,i < li->NCf - 1 ? "," : "];") != 0)
+ 		{
+ 		    MTX_ERROR2("%s: %E",fn,MTX_ERR_FILEFMT);
+diff --git a/src/chbasis.c b/src/chbasis.c
+index 34cf886..f1ee2e8 100644
+--- a/src/chbasis.c
++++ b/src/chbasis.c
+@@ -61,7 +61,8 @@ int MrChangeBasis(MatRep_t *rep, const Matrix_t *trans)
+ /** Conjugate a list @em gen of @em ngen square matrices over the same
+  *  field and of the same dimensions by a mattrix @em trans
+  *  and write the result into @em newgen. If @em gen == @em newgen, then
+- *  the previous content of @em newgen will be overridden. **/
++ *  the previous content of @em newgen will be overridden.
++ *  Return -1 on error and 0 on success. **/
+ int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+ 	Matrix_t *newgen[])
+ 
+@@ -83,18 +84,36 @@ int ChangeBasis(const Matrix_t *trans, int ngen, const Matrix_t *gen[],
+     }
+ 
+     Matrix_t *tmp = MatAlloc(trans->Field, trans->Nor, trans->Noc);
++    if (!tmp) return -1;
+     size_t tmpsize = FfCurrentRowSize*trans->Nor;
+     for (i = 0; i < ngen; ++i)
+     {
+         MTX_VERIFY(gen[i]->Nor==trans->Nor);
+         MTX_VERIFY(gen[i]->Noc==trans->Noc);
+         memset(tmp->Data, FF_ZERO, tmpsize);
+-        MatMulStrassen(tmp, trans, gen[i]);
++        if (!MatMulStrassen(tmp, trans, gen[i]))
++        {
++			MatFree(tmp);
++			return -1;
++		}
+         if ((const Matrix_t **)newgen == gen)
+             memset(newgen[i]->Data, FF_ZERO, tmpsize);
+         else
++        {
+             newgen[i] = MatAlloc(trans->Field, trans->Nor, trans->Noc);
+-        MatMulStrassen(newgen[i], tmp, bi);
++            if (!newgen[i])
++            {
++                MatFree(tmp);
++                MatFree(bi);
++                return -1;
++            }
++        }
++        if (!MatMulStrassen(newgen[i], tmp, bi))
++        {
++            MatFree(tmp);
++            MatFree(bi);
++            return -1;
++        }
+     }
+     MatFree(bi);
+     MatFree(tmp);
+diff --git a/src/ffio.c b/src/ffio.c
+index 92f9360..d2e3f1c 100644
+--- a/src/ffio.c
++++ b/src/ffio.c
+@@ -71,8 +71,11 @@ int FfReadRows(FILE *f, PTR buf, int n)
+         if (fread(b,FfTrueRowSize(FfNoc),1,f) != 1) break;
+ 	b += FfCurrentRowSize;
+     }
+-    if (ferror(f)) 
+-	MTX_ERROR("Read failed: %S");
++    if (ferror(f))
++    {
++        MTX_ERROR("Read failed: %S");
++        return -1;
++    }
+     return i;
+ }
+ 
+@@ -106,8 +109,11 @@ int FfWriteRows(FILE *f, PTR  buf, int n)
+         if (fwrite(b,FfTrueRowSize(FfNoc),1,f) != 1) break;
+ 	b += FfCurrentRowSize;
+     }
+-    if (ferror(f)) 
+-	MTX_ERROR("Write failed: %S");
++    if (ferror(f))
++    {
++        MTX_ERROR("Write failed: %S");
++        return -1;
++    }
+     return i;
+ }
+ 
+diff --git a/src/kernel-0.c b/src/kernel-0.c
+index 6ef2f72..431f01a 100644
+--- a/src/kernel-0.c
++++ b/src/kernel-0.c
+@@ -304,7 +304,10 @@ static FILE *OpenTableFile(int fl)
+     /* Create the table file.
+        ---------------------- */
+     if (FfMakeTables(fl) != 0)
+-	MTX_ERROR("Unable to build arithmetic tables");
++	{
++        MTX_ERROR("Unable to build arithmetic tables");
++        return NULL;
++    }
+     fd = SysFopen(fn,FM_READ|FM_LIB);
+     return fd;
+ }
+@@ -363,8 +366,7 @@ static int ReadTableFile(FILE *fd, int field)
+ 	return -1;
+     }
+     FfOrder = field;
+-    FfSetNoc(FfOrder);
+-    return 0;
++    return FfSetNoc(FfOrder);
+ }
+ 
+ 
+@@ -471,7 +473,7 @@ size_t FfTrueRowSize(int noc)
+  ** Embed a subfield.
+  ** @param a Element of the subfield field.
+  ** @param subfield Subfield order. Must be a divisor of the current field order.
+- ** @return @em a, embedded into the current field.
++ ** @return @em a, embedded into the current field, or 255 on error.
+  **/ 
+ 
+ FEL FfEmbed(FEL a, int subfield)
+@@ -482,7 +484,9 @@ FEL FfEmbed(FEL a, int subfield)
+ 	return a;
+     for (i = 0; mtx_embedord[i] != subfield && i < 4; ++i);
+     if (i >= 4)
+-	MTX_ERROR2("Cannot embed GF(%d) into GF(%d)",(int)subfield,(int)FfOrder);
++	{ MTX_ERROR2("Cannot embed GF(%d) into GF(%d)",(int)subfield,(int)FfOrder);
++      return (FEL)255;
++    }
+     return mtx_embed[i][a];
+ }
+ 
+@@ -498,6 +502,7 @@ FEL FfEmbed(FEL a, int subfield)
+  ** <tt>FfSetField(subfield)</tt>.
+  ** @param a Element of the current field.
+  ** @param subfield Subfield order. Must be a divisor of the current field order.
++ ** Return 255 on error.
+  **/
+ 
+ FEL FfRestrict(FEL a, int subfield)
+@@ -511,6 +516,7 @@ FEL FfRestrict(FEL a, int subfield)
+     {
+ 	MTX_ERROR2("Cannot restrict GF(%d) to GF(%d)",(int)FfOrder,
+ 	    (int)subfield);
++        return (FEL)255;
+     }
+     return mtx_restrict[i][a];
+ }
+diff --git a/src/maddmul.c b/src/maddmul.c
+index f5c171d..24ad3a5 100644
+--- a/src/maddmul.c
++++ b/src/maddmul.c
+@@ -59,7 +59,7 @@ Matrix_t *MatAddMul(Matrix_t *dest, const Matrix_t *src, FEL coeff)
+ 	   ------------ */
+ 	PTR dp = dest->Data, sp = src->Data;
+ 	int n;
+-	FfSetField(src->Field);
++	FfSetField(src->Field);  /* No error checking */
+ 	FfSetNoc(src->Noc);
+ 	for (n = src->Nor; n > 0; --n)
+ 	{
+diff --git a/src/maketabF.c b/src/maketabF.c
+index d7af83e..0fa26fb 100644
+--- a/src/maketabF.c
++++ b/src/maketabF.c
+@@ -175,7 +175,7 @@ static void polymod(POLY a, POLY b)
+    testprim() - Test for primitivity.
+    ----------------------------------------------------------------- */
+ 
+-static void testprim()
++static int testprim()
+ {
+     int i, a[256];
+ 
+@@ -187,7 +187,9 @@ static void testprim()
+ 	{
+ 	    fprintf(stderr,"*** a[%d]=%d.",i,a[i]);
+ 	    MTX_ERROR("Polynome is not primitive.");
++        return 1;
+ 	}
++    return 0;
+ }
+ 
+ 
+@@ -195,7 +197,7 @@ static void testprim()
+    initarith() - Initialize index and zech logarithm tables.
+    ----------------------------------------------------------------- */
+ 
+-static void initarith()
++static int initarith()
+ {	int i,elem;
+ 	POLY a;
+ 
+@@ -214,7 +216,7 @@ static void initarith()
+ 		polmultx(a);
+ 		polymod(a,irred);
+         }
+-	testprim();
++	if (testprim()) return 1;
+ 
+ 	/* Calculate zech logarithms
+ 	   ------------------------- */
+@@ -222,6 +224,7 @@ static void initarith()
+ 	{	elem = (int)((i%P)==P-1 ? i+1-P : i+1); /* add 1 */
+ 		zech[indx[i]]=indx[elem]; /* Zech-table=result */
+         }
++    return 0;
+ }
+ 
+ 
+@@ -314,7 +317,7 @@ static BYTE pack(BYTE a[8])
+ 	and initialize tables.
+    ----------------------------------------------------------------- */
+ 
+-static void writeheader()
++static int writeheader()
+ {
+     int i, j;
+ 
+@@ -324,6 +327,7 @@ static void writeheader()
+     {
+ 	perror(filename);
+ 	MTX_ERROR("Cannot open table file");
++    return 1;
+     }
+     for (CPM=1,maxmem=Q; (long)maxmem * Q <= 256L; ++CPM, maxmem *= Q);
+     for (i = 0; irrednrs[i] != (int) Q && irrednrs[i] != 0; ++i);
+@@ -333,7 +337,7 @@ static void writeheader()
+         for (j = 0; j <= MAXGRAD; j++)
+             irred[j] = irreducibles[i][MAXGRAD-j];
+ 	G = P;		/* Generator is X */
+-	initarith();	/* Init index- and Zech-tables */
++	if (initarith()) return 1;	/* Init index- and Zech-tables */
+     }
+     else
+     {	
+@@ -357,6 +361,7 @@ static void writeheader()
+     }
+     MESSAGE(1,("Generator   : %ld\n",info[1]));
+     MESSAGE(1,("Packing     : %ld/byte\n",info[3]));
++    return 0;
+ }
+ 
+ 
+@@ -364,14 +369,14 @@ static void writeheader()
+    checkq() - Set Q and N. Verify that Q is a prime power.
+    ----------------------------------------------------------------- */
+ 
+-static void checkq(long l)
++static int checkq(long l)
+ {
+     long q, d;
+ 
+     if (l < 2 || l > 256)
+     {
+-	fprintf(stderr,"Field order out of range (2-256)\n");
+-	exit(EXIT_ERR);
++	MTX_ERROR1("Field order out of range (2-256): %E", MTX_ERR_RANGE);
++	return 1;
+     }
+ 
+     Q = l;
+@@ -381,9 +386,10 @@ static void checkq(long l)
+        	q /= d;
+     if (q != 1)
+     {
+-	fprintf(stderr,"Illegal Field order\n");
+-	exit(EXIT_ERR);
++	MTX_ERROR("Illegal Field order\n");
++	return 1;
+     }
++    return 0;
+ }
+ 
+ 
+@@ -407,7 +413,7 @@ static void inittables()
+    mkembed() - Calculate embeddings of all subfields.
+    ----------------------------------------------------------------- */
+ 
+-static void mkembed()
++static int mkembed()
+ {
+     int n;	/* Degree of subfield over Z_p */
+     long q; /* subfield order */
+@@ -456,6 +462,7 @@ static void mkembed()
+ 	{
+ 	    fprintf(stderr,"*** q=%ld, Q=%ld.",q,Q);
+ 	    MTX_ERROR("Internal error.");
++        return 1;
+ 	}
+ 
+ 	/* Calculate a generator for the subfield
+@@ -502,13 +509,13 @@ static void mkembed()
+ 	    fflush(stdout);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
+ static int Init(int field)
+ {
+-    checkq(field);
+-    return 0;
++    return checkq(field);
+ }
+ 
+ /* -----------------------------------------------------------------
+@@ -526,7 +533,7 @@ int FfMakeTables(int field)
+        ---------- */
+     if (Init(field) != 0)
+ 	return 1;
+-    writeheader();			/* Open file and write header */
++    if (writeheader()) return 1;			/* Open file and write header */
+     inittables();
+ 
+     /* Make insert table
+@@ -618,7 +625,7 @@ int FfMakeTables(int field)
+ 	}
+     }
+ 
+-    mkembed();
++    if (mkembed()) return 1;
+ 
+     MESSAGE(1,("Writing tables to %s\n",filename));
+     if (
+@@ -639,6 +646,7 @@ int FfMakeTables(int field)
+     {
+ 	perror(filename);
+ 	MTX_ERROR("Error writing table file");
++    return 1;
+     }
+     fclose(fd);
+     return(0);
+diff --git a/src/matadd.c b/src/matadd.c
+index 54dbcb1..2d86d86 100644
+--- a/src/matadd.c
++++ b/src/matadd.c
+@@ -48,7 +48,7 @@ Matrix_t *MatAdd(Matrix_t *dest, const Matrix_t *src)
+        ------------------- */
+     dp = dest->Data;
+     sp = src->Data;
+-    FfSetField(src->Field);
++    FfSetField(src->Field);   /* No error checking */
+     FfSetNoc(src->Noc);
+     for (n = src->Nor; n > 0; --n)
+     {
+diff --git a/src/matclean.c b/src/matclean.c
+index e7307bf..16f02d6 100644
+--- a/src/matclean.c
++++ b/src/matclean.c
+@@ -53,7 +53,7 @@ int MatClean(Matrix_t *mat, const Matrix_t *sub)
+ 
+     /* Clean
+        ----- */
+-    FfSetNoc(mat->Noc);
++    FfSetNoc(mat->Noc);  /* No error checking */
+     for (i = 0; i < mat->Nor; ++i)
+     {
+ 	PTR m = MatGetPtr(mat,i);
+diff --git a/src/matcmp.c b/src/matcmp.c
+index b778ec4..d503285 100644
+--- a/src/matcmp.c
++++ b/src/matcmp.c
+@@ -38,7 +38,7 @@ MTX_DEFINE_FILE_INFO
+  ** not necessarily mean that an error has occured.
+  ** @param a First matrix.
+  ** @param b Second matrix.
+- ** @return 0 if the matrices are equal, nonzero otherwise (see description).
++ ** @return 0 if the matrices are equal, nonzero otherwise (see description), -2 on error.
+  **/
+ 
+ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+@@ -50,7 +50,7 @@ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+     if (!MatIsValid(a) || !MatIsValid(b))
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return -1;
++	return -2;
+     }
+ 
+     /* Compare fields and dimensions
+@@ -65,7 +65,7 @@ int MatCompare(const Matrix_t *a, const Matrix_t *b)
+     /* Compare the entries row by row. We do not use memcmp on the
+        whole matrix because we must ignore padding bytes.
+        ----------------------------------------------------------- */
+-    FfSetField(a->Field);
++    FfSetField(a->Field);  /* No error checking */
+     FfSetNoc(a->Noc);
+     for (i = 0; i < a->Nor; ++i)
+     {
+diff --git a/src/matcopy.c b/src/matcopy.c
+index 75852dd..c3e7850 100644
+--- a/src/matcopy.c
++++ b/src/matcopy.c
+@@ -105,7 +105,7 @@ int MatCopyRegion(Matrix_t *dest, int destrow, int destcol,
+ 	{
+ #ifdef PARANOID
+ 	    FEL f;
+-	    FfSetNoc(src->Noc);
++	    FfSetNoc(src->Noc);  /* No error checking */
+ 	    f = FfExtract(s,k);
+ 	    FfSetNoc(dest->Noc);
+ 	    FfInsert(d,destcol+k-col1,f);
+diff --git a/src/matcore.c b/src/matcore.c
+index 1f27dfd..0dc9d92 100644
+--- a/src/matcore.c
++++ b/src/matcore.c
+@@ -131,7 +131,7 @@ Matrix_t *MatAlloc(int field, int nor, int noc)
+ 	SysFree(m);
+ 	return NULL;
+     }
+-    FfSetNoc(noc);
++    if (FfSetNoc(noc)) return NULL;
+     m->Magic = MAT_MAGIC;
+     m->Field = field;
+     m->Nor = nor;
+diff --git a/src/matcut.c b/src/matcut.c
+index fde0662..f274311 100644
+--- a/src/matcut.c
++++ b/src/matcut.c
+@@ -79,11 +79,12 @@ Matrix_t *MatCut(const Matrix_t *src, int row1, int col1, int nrows, int ncols)
+     /* Initialize pointers to the source and destination matrix 
+        -------------------------------------------------------- */
+     s = MatGetPtr(src,row1);
++    if (!s) return NULL;
+     d = result->Data;
+ 
+     /* Copy the requested data
+        ----------------------- */
+-    FfSetNoc(ncols);
++    if (FfSetNoc(ncols)) return NULL;
+     for (n = nrows; n > 0; --n)
+     {
+ 	if (col1 == 0)
+@@ -95,9 +96,9 @@ Matrix_t *MatCut(const Matrix_t *src, int row1, int col1, int nrows, int ncols)
+ 	    {
+ #ifdef PARANOID
+ 		FEL f;
+-		FfSetNoc(src->Noc);
++		FfSetNoc(src->Noc);  /* No error checking */
+ 		f = FfExtract(s,col1+k);
+-		FfSetNoc(ncols);
++		FfSetNoc(ncols);  /* error was checked above */
+ 		FfInsert(d,k,f);
+ #else
+ 		FfInsert(d,k,FfExtract(s,col1+k));
+diff --git a/src/matech.c b/src/matech.c
+index ed31cf4..ee52ebe 100644
+--- a/src/matech.c
++++ b/src/matech.c
+@@ -124,7 +124,7 @@ int MatEchelonize(Matrix_t *mat)
+ 
+     /* Build the pivot table
+        --------------------- */
+-    FfSetField(mat->Field);
++    FfSetField(mat->Field);  /* No error checking */
+     FfSetNoc(mat->Noc);
+     rank = zmkechelon(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+ 
+@@ -163,13 +163,14 @@ long MatNullity(const Matrix_t *mat)
+  ** This function calculates the dimension of the null-space of a matrix
+  ** and deletes the matrix.
+  ** @param mat Pointer to the matrix.
+- ** @return Nullity of @em mat, or -$ on error.
++ ** @return Nullity of @em mat, or $-1$ on error.
+  **/
+ 
+ long MatNullity__(Matrix_t *mat)
+ {
+     long nul;
+-    MatEchelonize(mat);
++    if (!mat) return -1;
++    if (MatEchelonize(mat)==-1) return -1;
+     nul = mat->Noc - mat->Nor;
+     MatFree(mat);
+     return nul;
+diff --git a/src/matins.c b/src/matins.c
+index 45c31e4..50fe9c1 100644
+--- a/src/matins.c
++++ b/src/matins.c
+@@ -54,7 +54,7 @@ Matrix_t *MatInsert_(Matrix_t *mat, const Poly_t *pol)
+ 	return NULL;
+     }
+ 
+-    FfSetField(mat->Field);
++    FfSetField(mat->Field);  /* No error checking */
+     FfSetNoc(nor);
+ 
+     /* Special case: p(x) = 0
+@@ -81,7 +81,10 @@ Matrix_t *MatInsert_(Matrix_t *mat, const Poly_t *pol)
+     /* Evaluate p(A)
+        ------------- */
+     if (pol->Degree > 1) 
+-	x = MatDup(mat);
++	{
++		x = MatDup(mat);
++		if (!x) return NULL;
++	}
+     if ((f = pol->Data[pol->Degree]) != FF_ONE)
+     {
+ 	for (l = nor, v = mat->Data; l > 0; --l, FfStepPtr(&v))
+@@ -147,6 +150,7 @@ Matrix_t *MatInsert(const Matrix_t *mat, const Poly_t *pol)
+     if (pol->Degree == 0)
+     {
+ 	x = MatAlloc(mat->Field,nor,nor);
++    if (!x) return NULL;
+ 	for (l = 0, v = x->Data; l < nor; ++l, FfStepPtr(&v))
+ 	    FfInsert(v,l,pol->Data[0]);
+ 	return x;
+@@ -155,6 +159,7 @@ Matrix_t *MatInsert(const Matrix_t *mat, const Poly_t *pol)
+     /* Evaluate p(A)
+        ------------- */
+     x = MatDup(mat);
++    if (!x) return NULL;
+     if ((f = pol->Data[pol->Degree]) != FF_ONE)
+     {
+ 	for (l = nor, v = x->Data; l > 0; --l, FfStepPtr(&v))
+diff --git a/src/matinv.c b/src/matinv.c
+index 217eb0e..990cbe7 100644
+--- a/src/matinv.c
++++ b/src/matinv.c
+@@ -114,6 +114,7 @@ Matrix_t *MatInverse(const Matrix_t *mat)
+     /* Copy matrix into workspace
+        -------------------------- */
+     tmp = FfAlloc(mat->Nor);
++    if (!tmp) return NULL;
+     memcpy(tmp,mat->Data,FfCurrentRowSize * mat->Nor);
+ 
+     /* Inversion
+diff --git a/src/matmul.c b/src/matmul.c
+index 20f5e88..bed30fc 100644
+--- a/src/matmul.c
++++ b/src/matmul.c
+@@ -63,7 +63,7 @@ Matrix_t *MatMul(Matrix_t *dest, const Matrix_t *src)
+ 
+     /* Matrix multiplication
+        --------------------- */
+-    FfSetField(src->Field);
++    FfSetField(src->Field); /* no error checking, since the matrix *exists* */
+     FfSetNoc(src->Noc);
+     result = tmp = FfAlloc(dest->Nor);
+     if (result == NULL)
+diff --git a/src/matnull.c b/src/matnull.c
+index 4f28566..2550b96 100644
+--- a/src/matnull.c
++++ b/src/matnull.c
+@@ -27,6 +27,8 @@ MTX_DEFINE_FILE_INFO
+     - |piv| contains a pivot table for the null space.
+     If |flags| is nonzero, the null-space is not reduced to echelon form,
+     and the contents of |piv| are undefined.
++
++    Return -1 on error, the dimension of the null-space on success.
+  ** @see 
+  **/
+ 
+@@ -40,7 +42,7 @@ static long znullsp(PTR matrix, long nor, int *piv, PTR nsp, int flags)
+ 
+     /* Make the identity matrix in <nsp>.
+        ---------------------------------- */
+-    FfSetNoc(nor);
++    if (FfSetNoc(nor)) return -1;
+     x = nsp;
+     for (i = 0; i < nor; ++i)
+     {
+@@ -61,13 +63,12 @@ static long znullsp(PTR matrix, long nor, int *piv, PTR nsp, int flags)
+ 
+ 	for (k = 0; k < i; ++k)
+ 	{
+-	    FfSetNoc(noc);
++	    FfSetNoc(noc);  /* No error checking, since noc used to be the previously assigned number of columns */
+ 	    if ((p = piv[k]) >= 0 && (f = FfExtract(x,p)) != FF_ZERO)
+ 	    {
+ 		f = FfNeg(FfDiv(f,FfExtract(xx,p)));
+-		FfSetNoc(noc);
+ 		FfAddMulRow(x,xx,f);
+-		FfSetNoc(nor);
++		FfSetNoc(nor);  /* we have asserted above that it doesn't fail */
+ 		FfAddMulRow(y,yy,f);
+ 	    }
+ 	    FfSetNoc(noc);
+@@ -151,11 +152,21 @@ Matrix_t *MatNullSpace_(Matrix_t *mat, int flags)
+     if (nsp == NULL) 
+ 	return NULL;
+     nsp->PivotTable = NREALLOC(nsp->PivotTable,int,mat->Nor);
++    if (!nsp->PivotTable)
++    {
++        MatFree(nsp);
++        return NULL;
++    }
+ 
+     /* Calculate the null-space
+        ------------------------ */
+-    FfSetNoc(mat->Noc);
++    FfSetNoc(mat->Noc);  /* No error checking */
+     dim = znullsp(mat->Data,mat->Nor,nsp->PivotTable,nsp->Data,flags);
++    if (dim==-1)
++    {
++        MatFree(nsp);
++        return NULL;
++    }
+     if (flags)
+     {
+ 	SysFree(nsp->PivotTable);
+diff --git a/src/matorder.c b/src/matorder.c
+index 16aec74..24b31a3 100644
+--- a/src/matorder.c
++++ b/src/matorder.c
+@@ -32,7 +32,7 @@ MTX_DEFINE_FILE_INFO
+  ** the order is greater than 1000000, or if the order on any cyclic
+  ** subspace is greater than 1000.
+  ** @param mat Pointer to the matrix.
+- ** @return The order of @em mat, or 1 on error.
++ ** @return The order of @em mat, or -1 on error.
+  **/
+ 
+ int MatOrder(const Matrix_t *mat)
+@@ -59,15 +59,29 @@ int MatOrder(const Matrix_t *mat)
+     FfSetNoc(mat->Noc);
+     nor = mat->Nor;
+     m1 = FfAlloc(nor);
++    if (!m1) return -1;
+     memcpy(m1,mat->Data,FfCurrentRowSize * nor);
+     bend = basis = FfAlloc(nor+1);
++    if (!bend)
++    {
++        SysFree(m1);
++        return -1;
++    }
+ 
+     piv = NALLOC(int,nor+1);
+     done = NALLOC(char,nor);
++    if (!piv || !done)
++    { SysFree(m1);
++      return -1;
++    }
+     memset(done,0,(size_t)nor);
+     v1 = FfAlloc(1);
+     v2 = FfAlloc(1);
+     v3 = FfAlloc(1);
++    if (!v1 || !v2 || !v3)
++    { SysFree(m1);
++      return -1;
++    }
+     tord = ord = 1;
+     dim = 0;
+     j1 = 1;
+diff --git a/src/matpivot.c b/src/matpivot.c
+index abe342a..c843282 100644
+--- a/src/matpivot.c
++++ b/src/matpivot.c
+@@ -71,7 +71,6 @@ static int zmkpivot(PTR matrix, int nor, int noc, int *piv, int *ispiv)
+ 
+ int MatPivotize(Matrix_t *mat)
+ {
+-    int rc;
+     int *newtab;
+     static int *is_pivot = NULL;
+     static int maxnoc = -1;
+@@ -106,9 +105,7 @@ int MatPivotize(Matrix_t *mat)
+        --------------------- */
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+-    rc = zmkpivot(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+-
+-    return rc;
++    return zmkpivot(mat->Data,mat->Nor,mat->Noc,mat->PivotTable,is_pivot);
+ }
+ 
+ /**
+diff --git a/src/matpwr.c b/src/matpwr.c
+index cd66b5e..b06e5b2 100644
+--- a/src/matpwr.c
++++ b/src/matpwr.c
+@@ -119,8 +119,14 @@ Matrix_t *MatPower(const Matrix_t *mat, long n)
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+     tmp = FfAlloc(FfNoc);
++    if (!tmp) return NULL;
+     memcpy(tmp,mat->Data,FfCurrentRowSize * FfNoc);
+     tmp2 = FfAlloc(FfNoc);
++    if (!tmp2)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+     result = MatAlloc(mat->Field,mat->Nor,mat->Noc);
+     if (result != NULL)
+ 	matpwr_(n,tmp,result->Data,tmp2);
+diff --git a/src/matread.c b/src/matread.c
+index 031d100..06e6e6b 100644
+--- a/src/matread.c
++++ b/src/matread.c
+@@ -46,8 +46,9 @@ Matrix_t *MatRead(FILE *f)
+ 	return NULL;
+     if (FfReadRows(f,m->Data,m->Nor) != m->Nor)
+     {
+-	MatFree(m);
+-	return NULL;
++        MTX_ERROR("Number of given rows does not coincide with given row number");
++        MatFree(m);
++        return NULL;
+     }
+     return m;
+ }
+diff --git a/src/mattrace.c b/src/mattrace.c
+index f500248..772a6e4 100644
+--- a/src/mattrace.c
++++ b/src/mattrace.c
+@@ -21,7 +21,7 @@
+  ** This function calculates the sum of all diagonal elements of a matrix.
+  ** Note that the matrix need not be square.
+  ** @param mat Pointer to the matrix.
+- ** @return Trace of @a mat, @c FF_ZERO on error.
++ ** @return Trace of @a mat, @c 255 on error.
+  **/
+ 
+ FEL MatTrace(const Matrix_t *mat)
+@@ -35,7 +35,7 @@ FEL MatTrace(const Matrix_t *mat)
+        ------------------ */
+ #ifdef DEBUG
+     if (!MatIsValid(mat))
+-	return FF_ZERO;
++	return (FEL)255;
+ #endif
+ 
+     maxi = mat->Nor > mat->Noc ? mat->Noc : mat->Nor;
+diff --git a/src/matwrite.c b/src/matwrite.c
+index 1fb6af3..b364e80 100644
+--- a/src/matwrite.c
++++ b/src/matwrite.c
+@@ -44,7 +44,10 @@ int MatWrite(const Matrix_t *mat, FILE *f)
+     FfSetField(mat->Field);
+     FfSetNoc(mat->Noc);
+     if (FfWriteRows(f,mat->Data,mat->Nor) != mat->Nor)
+-	return -1;
++	{
++        MTX_ERROR("Cannot write rows");
++        return -1;
++    }
+     return 0;
+ }
+ 
+@@ -75,7 +78,10 @@ int MatSave(const Matrix_t *mat, const char *fn)
+     i = MatWrite(mat,f);
+     fclose(f);
+     if (i != 0)
+-	MTX_ERROR1("Cannot write matrix to %s",fn);
++	{
++        MTX_ERROR1("Cannot write matrix to %s",fn);
++        return -1;
++    }
+     return i;
+ }
+ 
+diff --git a/src/meataxe.h b/src/meataxe.h
+index 368b37b..0efa7dd 100644
+--- a/src/meataxe.h
++++ b/src/meataxe.h
+@@ -135,8 +135,8 @@ PTR FfSubRowPartialReverse(PTR dest, PTR src, int first, int len);
+ PTR FfAlloc(int nor);
+ int FfCmpRows(PTR p1, PTR p2);
+ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv);
+-void FfCleanRow2(PTR row, PTR matrix, int nor, const int *piv, PTR row2);
+-void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, 
++int FfCleanRow2(PTR row, PTR matrix, int nor, const int *piv, PTR row2);
++int FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, 
+     PTR row2, PTR mat2);
+ void FfCopyRow(PTR dest, PTR src);
+ FEL FfEmbed(FEL a, int subfield);
+diff --git a/src/mmulscal.c b/src/mmulscal.c
+index 281be16..9bff3ff 100644
+--- a/src/mmulscal.c
++++ b/src/mmulscal.c
+@@ -21,7 +21,7 @@
+  ** Multiply a Matrix by a Constant.
+  ** @param dest Pointer to the matrix.
+  ** @param coeff Value to multiply with.
+- ** @return The function returns @a dest.
++ ** @return The function returns @a dest, or NULL on error in debug mode only.
+  **/
+ 
+ Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff)
+diff --git a/src/mtensor.c b/src/mtensor.c
+index b2b9ac7..2fb90ca 100644
+--- a/src/mtensor.c
++++ b/src/mtensor.c
+@@ -85,6 +85,11 @@ Matrix_t *MatTensor(const Matrix_t *m1, const Matrix_t *m2)
+ 	   ---------------------------------------- */
+ 	x1 = m1->Data;
+ 	x3 = MatGetPtr(temat,i2);
++    if (!x3)
++    {
++        MatFree(temat);
++        return NULL;
++    }
+ 	FfSetNoc(temat->Noc);
+ 
+ 	/* Loop through all rows of <m1>
+diff --git a/src/quotient.c b/src/quotient.c
+index f44b556..eea0754 100644
+--- a/src/quotient.c
++++ b/src/quotient.c
+@@ -82,16 +82,23 @@ Matrix_t *QProjection(const Matrix_t *subspace, const Matrix_t *vectors)
+     sdim = subspace->Nor;
+     qdim = subspace->Noc - sdim;
+     result = MatAlloc(subspace->Field,vectors->Nor,qdim);
++    if (!result) return NULL;
+ 
+     /* Calculate the projection
+        ------------------------ */
+     FfSetNoc(subspace->Noc);
+     tmp = FfAlloc(1);
++    if (!tmp) return NULL;
+     non_piv = subspace->PivotTable + subspace->Nor;
+     for (i = 0; i < vectors->Nor; ++i)
+     {
+ 	int k;
+ 	PTR q = MatGetPtr(result,i);
++    if (!q)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FfCopyRow(tmp,MatGetPtr(vectors,i));
+ 	FfCleanRow(tmp,subspace->Data,sdim,subspace->PivotTable);
+ 	for (k = 0; k < qdim; ++k)
+@@ -158,14 +165,20 @@ Matrix_t *QAction(const Matrix_t *subspace, const Matrix_t *gen)
+ 
+     /* Calculate the action on the quotient
+        ------------------------------------ */
+-    FfSetNoc(dim);
++    FfSetNoc(dim);  /* No error checking, since dim is the ->Noc of an existing matrix */
+     tmp = FfAlloc(1);
++    if (!tmp) return NULL;
+     piv = subspace->PivotTable;
+     non_piv = piv + subspace->Nor;
+     for (k = 0; k < qdim; ++k)
+     {
+ 	int l;
+ 	PTR qx = MatGetPtr(action,k);
++    if (!qx)
++    {
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FfCopyRow(tmp,MatGetPtr(gen,non_piv[k]));
+ 	FfCleanRow(tmp,subspace->Data,sdim,piv);
+ 	for (l = 0; l < qdim; ++l)
+diff --git a/src/saction.c b/src/saction.c
+index adae3cf..0aba44d 100644
+--- a/src/saction.c
++++ b/src/saction.c
+@@ -68,8 +68,14 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+     sdim = subspace->Nor;
+     FfSetField(subspace->Field);
+     action = MatAlloc(FfOrder,sdim,sdim);
+-    FfSetNoc(dim);
++    if (!action) return NULL;
++    FfSetNoc(dim);  /* No error checking, since dim is the ->Noc of an existing matrix */
+     tmp = FfAlloc(1);
++    if (!tmp)
++    {
++        MatFree(action);
++        return NULL;
++    }
+ 
+     /* Calaculate the action.
+        ---------------------- */
+@@ -77,6 +83,12 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+     {
+ 	PTR xi = MatGetPtr(subspace,i);
+ 	PTR yi = MatGetPtr(action,i);
++    if (!xi || !yi)
++    {
++        MatFree(action);
++        SysFree(tmp);
++        return NULL;
++    }
+ 	FEL f;
+ 
+ 	/* Calculate the image of the <i>-th row of <subspace>.
+@@ -85,10 +97,20 @@ Matrix_t *SAction(const Matrix_t *subspace, const Matrix_t *gen)
+ 
+ 	/* Clean the image with the subspace and store coefficients.
+ 	   --------------------------------------------------------- */
+-	FfCleanRow2(tmp,subspace->Data,sdim,subspace->PivotTable,yi);
++	if (FfCleanRow2(tmp,subspace->Data,sdim,subspace->PivotTable,yi))
++    {
++        MatFree(action);
++        SysFree(tmp);
++        return NULL;
++    }
+ 	if (FfFindPivot(tmp,&f) >= 0)
+-	    MTX_ERROR("Split(): Subspace not invariant");
++	{
++        MatFree(action);
++        SysFree(tmp);
++        MTX_ERROR("Split(): Subspace not invariant");
++        return NULL;
+     }
++	}
+ 
+     /* Clean up and return the result.
+        ------------------------------- */
+diff --git a/src/stabpwr.c b/src/stabpwr.c
+index ff33bc6..01282ab 100644
+--- a/src/stabpwr.c
++++ b/src/stabpwr.c
+@@ -68,15 +68,18 @@ int StablePower_(Matrix_t *mat, int *pwr, Matrix_t **ker)
+        --------------------------- */
+     p = 1;
+     k1 = MatNullSpace(mat);
+-    MatMul(mat,mat);
++    if (!k1) return -1;
++    if (!MatMul(mat,mat)) return -1;
+     k2 = MatNullSpace(mat);
++    if (!k2) return -1;
+     while (k2->Nor > k1->Nor)
+     {
+ 	p *= 2;
+ 	MatFree(k1);
+ 	k1 = k2;
+-	MatMul(mat,mat);
++	if (!MatMul(mat,mat)) return -1;
+ 	k2 = MatNullSpace(mat);
++    if (!k2) return -1;
+     }
+     MatFree(k2);
+ 
+diff --git a/src/sumint.c b/src/sumint.c
+index 278acd8..905fa79 100644
+--- a/src/sumint.c
++++ b/src/sumint.c
+@@ -77,7 +77,7 @@ int FfSumAndIntersection(PTR wrk1, int *nor1, int *nor2, PTR wrk2, int *piv)
+     {
+ 	FEL f;
+ 	int p;
+-	FfCleanRowAndRepeat(x1,wrk1,k,piv,x2,wrk2);
++	if (FfCleanRowAndRepeat(x1,wrk1,k,piv,x2,wrk2)) return -1;
+ 	if ((p = FfFindPivot(x1,&f)) < 0)
+ 	    continue;	/* Null row - ignore */
+ 	if (k < i)
+diff --git a/src/temap.c b/src/temap.c
+index 7ba445a..4c2d493 100644
+--- a/src/temap.c
++++ b/src/temap.c
+@@ -74,17 +74,21 @@ Matrix_t *TensorMap(Matrix_t *vec, const Matrix_t *a, const Matrix_t *b)
+     for (i = 0; i < vec->Nor; ++i)
+     {
+ 	Matrix_t *tmp = MatTransposed(a);
++    if (!tmp) return NULL;
+ 	Matrix_t *v = VectorToMatrix(vec,i,b->Nor);
+ 	if (v == NULL)
+ 	{
+ 	    MTX_ERROR("Conversion failed");
+-	    break;
++	    return NULL;
+ 	}
+-	MatMul(tmp,v);
++	if (!MatMul(tmp,v)) return NULL;
+ 	MatFree(v);
+-	MatMul(tmp,b);
++	if (!MatMul(tmp,b)) return NULL;
+ 	if (MatrixToVector(tmp,result,i))
+-	    MTX_ERROR("Conversion failed");
++    {
++        MTX_ERROR("Conversion failed");
++        return NULL;
++    }
+ 	MatFree(tmp);
+     }
+     return result;
+diff --git a/src/vec2mat.c b/src/vec2mat.c
+index 1047805..e76ad88 100644
+--- a/src/vec2mat.c
++++ b/src/vec2mat.c
+@@ -63,8 +63,11 @@ Matrix_t *VectorToMatrix(Matrix_t *vecs, int n, int noc)
+ 	return NULL;
+     for (i = 0; i < result->Nor; ++i)
+     {
+-	if (MatCopyRegion(result,i,0, vecs,n,i*noc,1,noc) != 0)
+-	    MTX_ERROR("Copy failed");
++        if (MatCopyRegion(result,i,0, vecs,n,i*noc,1,noc) != 0)
++        {
++            MTX_ERROR("Copy failed");
++            return NULL;
++        }
+     }
+     return result;
+ }
+diff --git a/src/window.c b/src/window.c
+index 9c87694..fbeb943 100644
+--- a/src/window.c
++++ b/src/window.c
+@@ -69,7 +69,11 @@ MatrixWindow_t *WindowAlloc(int fl, int nor, size_t rowsize)
+         MTX_ERROR1("%E",MTX_ERR_NOMEM);
+         return NULL;
+     }
+-    FfSetField(fl);
++    if (FfSetField(fl))
++    {
++        free(out);
++        return NULL;
++    }
+     out->Matrix = MatAlloc(fl, nor, rowsize*sizeof(long)*MPB);
+     if (out->Matrix == NULL)
+     {
+@@ -266,7 +270,8 @@ __asm__("    popl %ebx\n"
+ 
+ /** dest := left+right
+    left and right must be distinct, but one of them may coincide with dest -- under the assumption
+-   that, in that case, the ambient matrices coincide as well. **/
++   that, in that case, the ambient matrices coincide as well.
++   Return dest, or NULL on error (the only error may occur in a compatibility check). **/
+ MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+   PTR x, result, tmp;
+@@ -335,6 +340,7 @@ MatrixWindow_t *WindowSum(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWind
+ /** dest := left-right
+    left and right must be distinct, but one of them may coincide with dest -- under the assumption
+    that, in that case, the ambient matrices coincide as well.
++   Return dest, or NULL on error (the only error may occur in a compatibility check).
+ **/
+ MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+@@ -407,7 +413,7 @@ MatrixWindow_t *WindowDif(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWind
+    can write the result into it. Moreover, the chunk of memory pointed at by dest MUST be disjoint
+    from the chunks for left and right!
+ 
+-   Dimensions are not tested!
++   Dimensions are not tested, always dest will be returned (no error value).
+ **/
+ MatrixWindow_t *WindowAddMul(MatrixWindow_t *dest, MatrixWindow_t *left, MatrixWindow_t *right)
+ {
+@@ -617,7 +623,7 @@ int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t
+   S2->RowSize = A_sub_rowsize;
+   S2->Matrix = X->Matrix;
+   S2->ULCorner = X->ULCorner;
+-  WindowDif(S2, A00, A10);
++  WindowDif(S2, A00, A10); /* No error checking, as we know that the windows are compatible */
+   /*
+   printf("1.  S2 = A00-A10 in X\n");
+   WindowShow(X);
+@@ -653,7 +659,7 @@ int StrassenStep(MatrixWindow_t *dest_win, MatrixWindow_t *A_win, MatrixWindow_t
+   S0->RowSize = A_sub_rowsize;
+   S0->Matrix = X->Matrix;
+   S0->ULCorner = X->ULCorner;
+-  WindowSum(S0, A10, A11);
++  WindowSum(S0, A10, A11); /* no error checking here and below, as we know the dimensions of the windows */
+   /*
+   printf("4.  S0 = A10+A11 in X\n");
+   WindowShow(X);
+diff --git a/src/zcleanrow.c b/src/zcleanrow.c
+index b4dcb30..d36a165 100644
+--- a/src/zcleanrow.c
++++ b/src/zcleanrow.c
+@@ -63,10 +63,10 @@ void FfCleanRow(PTR row, PTR matrix, int nor, const int *piv)
+  ** @param nor Number of rows.
+  ** @param piv Pivot table for @em matrix.
+  ** @param row2 Pointer to row where the operations are recorded.
+- ** @return Always 0.
++ ** @return 0, or 1 on error.
+  **/
+ 
+-void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
++int FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+ {
+     int i;
+     PTR x;
+@@ -74,7 +74,7 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+     if (row2 == NULL || piv == NULL)
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return;
++	return 1;
+     }
+     for (i = 0, x = mat; i < nor; ++i, FfStepPtr(&x))
+     {
+@@ -86,6 +86,7 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+ 	    FfInsert(row2,i,f);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
+@@ -100,10 +101,10 @@ void FfCleanRow2(PTR row, PTR mat, int nor, const int *piv, PTR row2)
+  ** @param piv Pivot table for @em mat.
+  ** @param row2 Pointer to the second row to be cleaned.
+  ** @param mat2 Matrix to the second matrix.
+- ** @return Always 0.
++ ** @return 0, or 1 on error.
+  **/
+ 
+-void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PTR mat2)
++int FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PTR mat2)
+ {
+     int i;
+     PTR x, x2;
+@@ -112,7 +113,7 @@ void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PT
+     if (row2 == NULL || piv == NULL || row2 == NULL || mat2 == NULL)
+     {
+ 	MTX_ERROR1("%E",MTX_ERR_BADARG);
+-	return;
++	return 1;
+     }
+ #endif
+     for (i = 0, x = mat, x2 = mat2; i < nor; ++i, FfStepPtr(&x), FfStepPtr(&x2))
+@@ -125,6 +126,7 @@ void FfCleanRowAndRepeat(PTR row, PTR mat, int nor, const int *piv, PTR row2, PT
+ 	    FfAddMulRow(row2,x2,f);
+ 	}
+     }
++    return 0;
+ }
+ 
+ 
diff --git a/src/sage/libs/meataxe.pxd b/src/sage/libs/meataxe.pxd
index fc76bfc781e..68878f3fa19 100644
--- a/src/sage/libs/meataxe.pxd
+++ b/src/sage/libs/meataxe.pxd
@@ -56,8 +56,8 @@ cdef extern from "meataxe.h":
     # FEL FfMul(FEL a, FEL b)
     # FEL FfDiv(FEL a, FEL b)
     # FEL FfInv(FEL a)
-    # FEL FfEmbed(FEL a, int subfield)
-    # FEL FfRestrict(FEL a, int subfield)
+    # FEL FfEmbed(FEL a, int subfield) except 255
+    # FEL FfRestrict(FEL a, int subfield) except 255
     FEL FfFromInt(int l)
     int FfToInt(FEL f)
 
@@ -97,8 +97,8 @@ cdef extern from "meataxe.h":
     ## Basic memory operations
     Matrix_t *MatAlloc(int field, int nor, int noc) except NULL
     int MatFree(Matrix_t *mat)
-    PTR MatGetPtr(Matrix_t *mat, int row)
-    int MatCompare(Matrix_t *a, Matrix_t *b) except? -1
+    PTR MatGetPtr(Matrix_t *mat, int row) except NULL
+    int MatCompare(Matrix_t *a, Matrix_t *b) except -2
     int MatCopyRegion(Matrix_t *dest, int destrow, int destcol, Matrix_t *src, int row1, int col1, int nrows, int ncols) except -1
     Matrix_t *MatCut(Matrix_t *src, int row1, int col1, int nrows, int ncols) except NULL
     Matrix_t *MatCutRows(Matrix_t *src, int row1, int nrows) except NULL
@@ -115,11 +115,15 @@ cdef extern from "meataxe.h":
     Matrix_t *MatMul(Matrix_t *dest, Matrix_t *src) except NULL
     Matrix_t *MatMulScalar(Matrix_t *dest, FEL coeff) except NULL
     Matrix_t *MatPower(Matrix_t *mat, long n) except NULL
-    FEL MatTrace(Matrix_t *mat)
+    int StablePower(Matrix_t *mat, int *pwr, Matrix_t **ker) except -1
+    FEL MatTrace(Matrix_t *mat) except 255
     Matrix_t *MatMulStrassen(Matrix_t *dest, Matrix_t *A, Matrix_t *B) except NULL
     void StrassenSetCutoff(size_t size)
 
     ## "Higher" Arithmetic
+    Matrix_t *MatTensor(Matrix_t *m1, Matrix_t *m2) except NULL
+    Matrix_t *TensorMap(Matrix_t *vec, Matrix_t *a, Matrix_t *b) except NULL
+    
     int MatClean(Matrix_t *mat, Matrix_t *sub) except -1
     int MatEchelonize(Matrix_t *mat) except -1
     int MatOrder(Matrix_t *mat) except? -1

From 6649c821107b0c9c023083bf3075857e409d0f8f Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Mon, 5 Oct 2015 15:49:57 +0200
Subject: [PATCH 21/23] Remove overcautious commands in spkg-install; rely on
 default error return values in matrix_gfpn_dense

---
 build/pkgs/meataxe/spkg-install       | 24 -------------
 src/sage/matrix/matrix_gfpn_dense.pyx | 52 ++++++++++-----------------
 2 files changed, 18 insertions(+), 58 deletions(-)

diff --git a/build/pkgs/meataxe/spkg-install b/build/pkgs/meataxe/spkg-install
index 163f180e50f..7733e9e44cb 100755
--- a/build/pkgs/meataxe/spkg-install
+++ b/build/pkgs/meataxe/spkg-install
@@ -45,30 +45,6 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-# Just to be sure, we also create other folders, although
-# they are standard SageMath folders
-
-mkdir -p $MTXBIN
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating directory for meataxe binaries."
-    exit 1
-fi
-
-mkdir -p "$SAGE_LOCAL/include"
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating SageMath's include directory."
-    exit 1
-fi
-
-mkdir -p "$SAGE_LOCAL/lib"
-
-if [ $? -ne 0 ]; then
-    echo >&2 "Error creating SageMath's lib folder."
-    exit 1
-fi
-
 ## Install! Aparently MeatAxe would rebuild everything when
 ## testing, and "make check" also installs. So, if a test
 ## is requested then we do it in one go.
diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index b7ec01a0b14..1d4fd5e6516 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -459,8 +459,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             except (OSError,IOError):
                 return
             self.Data = MatLoad(FILE)
-            if FfSetField(self.Data.Field):
-                raise ValueError("Invalid data in file {}".format(FILE))
+            FfSetField(self.Data.Field)
             B = GF(self.Data.Field, 'z')
             parent = MatrixSpace(B, self.Data.Nor, self.Data.Noc)
             self._is_immutable = False
@@ -574,8 +573,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         retval._cache = dict(self._cache.iteritems()) if self._cache is not None else {}
         if self.Data:
             retval.Data = MatDup(self.Data)
-            if not retval.Data:
-                raise MemoryError, "Error copying a %s instance"%repr(type(self))
         else:
             retval.Data = NULL
         return retval
@@ -1046,10 +1043,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise NotImplementedError, "The matrices must not be empty"
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._cache = {}
-        if MatAdd(Left.Data, Right.Data) != NULL:
-            return Left
-        else:
-            raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatAdd(Left.Data, Right.Data)
+        return Left
 
     cpdef ModuleElement _sub_(self, ModuleElement right):
         """
@@ -1073,10 +1068,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense Left = Self.__copy__()
         Left._is_immutable = False
         Left._cache = {}
-        if MatAddMul(Left.Data, Right.Data, mtx_taddinv[1]) != NULL:
-            return Left
-        else:
-            raise ArithmeticError, "Matrix sizes or fields not compatible"
+        MatAddMul(Left.Data, Right.Data, mtx_taddinv[1])
+        return Left
 
     def __neg__(self):
         """
@@ -1120,9 +1113,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
         OUT._cache = {}
-        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left))) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(left)))
+        return OUT
 
     cpdef ModuleElement _lmul_(self, RingElement right):
         """
@@ -1144,9 +1136,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         FfSetField(self.Data.Field)
         cdef Matrix_gfpn_dense OUT = self.__copy__()
         OUT._cache = {}
-        if MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right))) != NULL:
-            return OUT
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(OUT.Data, FfFromInt(self._converter.field_to_int(right)))
+        return OUT
 
     cdef int _strassen_default_cutoff(self, sage.matrix.matrix0.Matrix right) except -2:
         # Surprisingly, Winograd-Strassen can compete with school book
@@ -1178,12 +1169,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cdef Matrix_gfpn_dense OUT = self._new(self._nrows, right._ncols)
         sig_on()
         OUT.Data = MatDup(self.Data)
-        if OUT.Data == NULL:
-            sig_off()
-            raise MemoryError
-        if not MatMul(OUT.Data,right.Data):
-            sig_off()
-            raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMul(OUT.Data,right.Data)
         sig_off()
         OUT._is_immutable = False
         OUT._cache = {}
@@ -1222,8 +1208,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         cutoff = cutoff//sizeof(long)
         StrassenSetCutoff(cutoff)
         sig_on()
-        if MatMulStrassen(OUT.Data, self.Data, right.Data) == NULL:
-            raise ArithmeticError("Error multiplying matrices by Strassen-Winograd algorithm")
+        MatMulStrassen(OUT.Data, self.Data, right.Data)
         sig_off()
         return OUT
 
@@ -1239,9 +1224,8 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             r = FfFromInt(n)
         left = self.__copy__()
         left._cache = {}
-        if MatMulScalar(left.Data, r) != NULL:
-            return left
-        raise ArithmeticError("Matrix sizes or fields not compatible")
+        MatMulScalar(left.Data, r)
+        return left
 
     def __div__(Matrix_gfpn_dense self, p):
         """
@@ -1318,7 +1302,10 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         sig_on()
         try:
             OUT.Data = MatInverse(self.Data)
-        except:
+        except ZeroDivisionError:
+            # Attempting to invert singular matrices happens
+            # in the tests, and we make the special case here
+            # so that the sig_on/off count is fine.
             sig_off()
             raise
         sig_off()
@@ -1424,8 +1411,6 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             raise ValueError("The matrix must not be empty")
         OUT = type(self).__new__(type(self))
         OUT.Data = MatNullSpace(self.Data)
-        if OUT.Data == NULL:
-            raise ArithmeticError("Error computing left kernel matrix")
         OUT._nrows = OUT.Data.Nor
         OUT._ncols = OUT.Data.Noc
         OUT._is_immutable = False
@@ -1541,8 +1526,7 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             self.cache('rank', 0)
             self.cache('pivots', ())
             return self
-        if MatEchelonize(self.Data) == -1:
-            raise ArithmeticError("Error echelonizing this matrix")
+        MatEchelonize(self.Data)
         self._cache = {}
         # Now, self.Data is in semi-echelon form.
         r = self.Data.Nor

From d24260c5dee3faadd274545abdef757e2077d7ca Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sun, 11 Oct 2015 16:48:40 +0200
Subject: [PATCH 22/23] Fix pickling of meataxe matrices

---
 src/sage/matrix/matrix_gfpn_dense.pyx | 51 ++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index 1d4fd5e6516..d2a1d6e8347 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -577,7 +577,28 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
             retval.Data = NULL
         return retval
 
-    ## Pickling and string representation is taken care of by implementing get_unsafe
+    def __reduce__(self):
+        """
+        TESTS::
+
+            sage: M = MatrixSpace(GF(9,'x'),10,10).random_element()
+            sage: M == loads(dumps(M))   # indirect doctest
+            True
+            sage: M is loads(dumps(M))
+            False
+        """
+        cdef char* d
+        cdef int i,NR
+        cdef PTR p
+        if self.Data:
+            FfSetField(self.Data.Field)
+            FfSetNoc(self.Data.Noc)
+            return mtx_unpickle, (self._parent, self.Data.Nor, self.Data.Noc,
+                        PyString_FromStringAndSize(<char*>self.Data.Data,self.Data.RowSize * self.Data.Nor),
+                        not self._is_immutable) # for backward compatibility with the group cohomology package
+        else:
+            return mtx_unpickle, (0, 0, 0, '', not self._is_immutable)
+
     cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j):
         """
         Get an element without checking.
@@ -1579,3 +1600,31 @@ cdef class Matrix_gfpn_dense(Matrix_dense):
         self.cache('pivots', tuple(self.Data.PivotTable[i] for i in range(r)))
         self.cache('in_echelon_form',True)
 
+def mtx_unpickle(f, int nr, int nc, str Data, bint m):
+    """
+    Helper function for unpickling.
+
+    TESTS::
+
+        sage: M = MatrixSpace(GF(9,'x'),10,10).random_element()
+        sage: M == loads(dumps(M))   # indirect doctest
+        True
+        sage: M is loads(dumps(M))
+        False
+    """
+    cdef Matrix_gfpn_dense OUT
+    OUT = Matrix_gfpn_dense.__new__(Matrix_gfpn_dense)
+    if isinstance(f, (int, long)):
+        # This is for old pickles created with the group cohomology spkg
+        Matrix_dense.__init__(OUT, MatrixSpace(GF(f, 'z'), nr, nc))
+    else:
+        Matrix_dense.__init__(OUT, f)
+        f = OUT._base_ring.order()
+    OUT.Data = MatAlloc(f, nr, nc)
+    OUT._is_immutable = not m
+    OUT._converter = FieldConverter(OUT._base_ring)
+    cdef char *x
+    if Data:
+        x = PyString_AsString(Data)
+        memcpy(OUT.Data.Data, x, OUT.Data.RowSize*OUT.Data.Nor)
+    return OUT

From 74edf19ac9217428c482cef93e77226cca84aab3 Mon Sep 17 00:00:00 2001
From: Simon King <simon.king@uni-jena.de>
Date: Sat, 16 Jan 2016 14:04:34 +0100
Subject: [PATCH 23/23] Make two doctests optional

---
 src/sage/matrix/matrix_gfpn_dense.pyx | 2 +-
 src/sage/matrix/matrix_space.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sage/matrix/matrix_gfpn_dense.pyx b/src/sage/matrix/matrix_gfpn_dense.pyx
index d2a1d6e8347..bbf35cca230 100644
--- a/src/sage/matrix/matrix_gfpn_dense.pyx
+++ b/src/sage/matrix/matrix_gfpn_dense.pyx
@@ -210,7 +210,7 @@ cdef class PrimeFieldConverter_class(FieldConverter_class):
             sage: from sage.matrix.matrix_gfpn_dense import PrimeFieldConverter_class  # optional: meataxe
             sage: F = GF(5)
             sage: C = PrimeFieldConverter_class(F)  # optional: meataxe
-            sage: C.int_to_field(int(2))
+            sage: C.int_to_field(int(2))            # optional: meataxe
             2
             sage: F(2)
             2
diff --git a/src/sage/matrix/matrix_space.py b/src/sage/matrix/matrix_space.py
index 87f0ef311b9..e2585bbcd86 100644
--- a/src/sage/matrix/matrix_space.py
+++ b/src/sage/matrix/matrix_space.py
@@ -989,7 +989,7 @@ def _get_matrix_class(self):
             <type 'sage.matrix.matrix_mod2_dense.Matrix_mod2_dense'>
             sage: type(matrix(GF(64,'z'), 2, range(4)))
             <type 'sage.matrix.matrix_gf2e_dense.Matrix_gf2e_dense'>
-            sage: type(matrix(GF(125,'z'), 2, range(4)))
+            sage: type(matrix(GF(125,'z'), 2, range(4)))     # optional: meataxe
             <type 'sage.matrix.matrix_gfpn_dense.Matrix_gfpn_dense'>
         """
         R = self.base_ring()