diff --git a/src/DistributedAssemblers.jl b/src/DistributedAssemblers.jl
index 6024659d..3fd9b0e5 100644
--- a/src/DistributedAssemblers.jl
+++ b/src/DistributedAssemblers.jl
@@ -1,21 +1,46 @@
 
-struct DistributedAssemblyStrategy
-  strategies::DistributedData{<:AssemblyStrategy}
+struct DistributedAssemblyStrategy{T<:AssemblyStrategy}
+  strategies::DistributedData{T}
 end
 
 function get_distributed_data(dstrategy::DistributedAssemblyStrategy)
   dstrategy.strategies
 end
 
-struct DistributedAssembler{GM,GV,LM,LV} <: Assembler
-  global_matrix_type::Type{GM}
-  global_vector_type::Type{GV}
-  local_matrix_type ::Type{LM}
-  local_vector_type ::Type{LV}
+struct DistributedAssembler{GM,GV,LM,LV,AS} <: Assembler
+  global_matrix_type      :: Type{GM}
+  global_vector_type      :: Type{GV}
+  local_matrix_type       :: Type{LM}
+  local_vector_type       :: Type{LV}
+  assembly_strategy_type  :: Type{AS}
   trial::DistributedFESpace
   test::DistributedFESpace
   assems::DistributedData{<:Assembler}
-  strategy::DistributedAssemblyStrategy
+  strategy::DistributedAssemblyStrategy{AS}
+end
+
+"""
+    allocate_local_vector(::Type{GV},indices) where GV
+
+Allocate the local vector which holds the local contributions to
+the global vector of type{GV}. The global vector is indexable using
+indices.
+"""
+function allocate_local_vector(::Type{GV}, indices::DistributedIndexSet) where GV
+   @abstractmethod
+end
+
+function assemble_global_matrix(::Type{AS}, ::Type{GM},
+                                ::DistributedData,
+                                ::DistributedIndexSet,
+                                ::DistributedIndexSet) where {AS,GM}
+   @abstractmethod
+end
+
+function assemble_global_vector(::Type{AS}, ::Type{GM},
+                                ::DistributedData,
+                                ::DistributedIndexSet) where {AS,GM}
+   @abstractmethod
 end
 
 function get_distributed_data(dassem::DistributedAssembler)
@@ -141,21 +166,31 @@ function Gridap.FESpaces.assemble_matrix_and_vector(dassem::DistributedAssembler
   end
 
   gids = dassem.test.gids
-  b = allocate_vector(dassem.global_vector_type,gids)
+  db = allocate_local_vector(dassem.global_vector_type,gids)
 
   dIJV = allocate_coo_vectors(dassem.global_matrix_type,dn)
-  do_on_parts(dassem,dIJV,ddata,b) do part, assem, IJV, data, b
+  do_on_parts(dassem,dIJV,ddata,db) do part, assem, IJV, data, b
     I,J,V = IJV
     fill_matrix_and_vector_coo_numeric!(I,J,V,b,assem,data)
   end
   finalize_coo!(dassem.global_matrix_type,dIJV,dassem.test.gids,dassem.trial.gids)
-  A = sparse_from_coo(dassem.global_matrix_type,dIJV,dassem.test.gids,dassem.trial.gids)
+
+  A = assemble_global_matrix(dassem.assembly_strategy_type,
+                             dassem.global_matrix_type,
+                             dIJV,
+                             dassem.test.gids,
+                             dassem.trial.gids)
+
+  b = assemble_global_vector(dassem.assembly_strategy_type,
+                             dassem.global_vector_type,
+                             db,
+                             dassem.test.gids)
 
   # TO-THINK: Mandatory steps required for PETSc vectors.
   #           Should we define our own interface? E.g., finalize_vector!(b)?
   # Note: PETSc.jl provides a fall-back for AbstractArray
-  PETSc.AssemblyBegin(b)
-  PETSc.AssemblyEnd(b)
+  #PETSc.AssemblyBegin(b)
+  #PETSc.AssemblyEnd(b)
 
   A,b
 end
@@ -169,19 +204,27 @@ end
 # Each proc owns a set of matrix / vector rows (and all cols in these rows)
 # Each proc computes locally all values in the owned rows
 # This typically requires to loop also over ghost cells
-struct RowsComputedLocally <: AssemblyStrategy
+struct RowsComputedLocally{GlobalDoFs} <: AssemblyStrategy
   part::Int
   gids::IndexSet
 end
 
-function Gridap.FESpaces.row_map(a::RowsComputedLocally,row)
+function Gridap.FESpaces.row_map(a::RowsComputedLocally{true},row)
   a.gids.lid_to_gid[row]
 end
 
-function Gridap.FESpaces.col_map(a::RowsComputedLocally,col)
+function Gridap.FESpaces.col_map(a::RowsComputedLocally{true},col)
   a.gids.lid_to_gid[col]
 end
 
+function Gridap.FESpaces.row_map(a::RowsComputedLocally{false},row)
+  row
+end
+
+function Gridap.FESpaces.col_map(a::RowsComputedLocally{false},col)
+  col
+end
+
 function Gridap.FESpaces.row_mask(a::RowsComputedLocally,row)
   a.part == a.gids.lid_to_owner[row]
 end
@@ -190,29 +233,36 @@ function Gridap.FESpaces.col_mask(a::RowsComputedLocally,col)
   true
 end
 
-function RowsComputedLocally(V::DistributedFESpace)
-  dgids = V.gids
-  strategies = DistributedData(dgids) do part, gids
-    RowsComputedLocally(part,gids)
-  end
-  DistributedAssemblyStrategy(strategies)
+function RowsComputedLocally(V::DistributedFESpace; global_dofs=true)
+   dgids = V.gids
+   strategies = DistributedData(dgids) do part, gids
+     RowsComputedLocally{global_dofs}(part,gids)
+   end
+   DistributedAssemblyStrategy(strategies)
 end
 
-
-struct OwnedCellsStrategy <: AssemblyStrategy
+struct OwnedCellsStrategy{GlobalDoFs} <: AssemblyStrategy
   part::Int
   dof_gids::IndexSet
   cell_gids::IndexSet
 end
 
-function Gridap.FESpaces.row_map(a::OwnedCellsStrategy,row)
+function Gridap.FESpaces.row_map(a::OwnedCellsStrategy{true},row)
   a.dof_gids.lid_to_gid[row]
 end
 
-function Gridap.FESpaces.col_map(a::OwnedCellsStrategy,col)
+function Gridap.FESpaces.col_map(a::OwnedCellsStrategy{true},col)
   a.dof_gids.lid_to_gid[col]
 end
 
+function Gridap.FESpaces.row_map(a::OwnedCellsStrategy{false},row)
+  row
+end
+
+function Gridap.FESpaces.col_map(a::OwnedCellsStrategy{false},col)
+  col
+end
+
 function Gridap.FESpaces.row_mask(a::OwnedCellsStrategy,row)
   true
 end
@@ -221,11 +271,11 @@ function Gridap.FESpaces.col_mask(a::OwnedCellsStrategy,col)
   true
 end
 
-function OwnedCellsStrategy(M::DistributedDiscreteModel, V::DistributedFESpace)
+function OwnedCellsStrategy(M::DistributedDiscreteModel, V::DistributedFESpace; global_dofs=true)
   dcell_gids = M.gids
   ddof_gids  = V.gids
   strategies = DistributedData(ddof_gids,dcell_gids) do part, dof_gids, cell_gids
-    OwnedCellsStrategy(part,dof_gids,cell_gids)
+    OwnedCellsStrategy{global_dofs}(part,dof_gids,cell_gids)
   end
   DistributedAssemblyStrategy(strategies)
 end
@@ -240,14 +290,22 @@ function Gridap.FESpaces.SparseMatrixAssembler(
   local_vector_type ::Type,
   dtrial::DistributedFESpace,
   dtest::DistributedFESpace,
-  dstrategy::DistributedAssemblyStrategy)
+  dstrategy::DistributedAssemblyStrategy{T}) where T
 
   assems = DistributedData(
     dtrial.spaces,dtest.spaces,dstrategy) do part, U, V, strategy
     SparseMatrixAssembler(local_matrix_type,local_vector_type,U,V,strategy)
   end
 
-  DistributedAssembler(global_matrix_type,global_vector_type,local_matrix_type,local_vector_type,dtrial,dtest,assems,dstrategy)
+  DistributedAssembler(global_matrix_type,
+                       global_vector_type,
+                       local_matrix_type,
+                       local_vector_type,
+                       T,
+                       dtrial,
+                       dtest,
+                       assems,
+                       dstrategy)
 end
 
 function Gridap.FESpaces.SparseMatrixAssembler(
@@ -255,6 +313,13 @@ function Gridap.FESpaces.SparseMatrixAssembler(
   vector_type::Type,
   dtrial::DistributedFESpace,
   dtest::DistributedFESpace,
-  dstrategy::DistributedAssemblyStrategy)
-  Gridap.FESpaces.SparseMatrixAssembler(matrix_type, vector_type, matrix_type, vector_type, dtrial, dtest, dstrategy)
+  dstrategy::DistributedAssemblyStrategy{AS}) where AS
+  Gridap.FESpaces.SparseMatrixAssembler(matrix_type,
+                                        vector_type,
+                                        matrix_type,
+                                        vector_type,
+                                        AS,
+                                        dtrial,
+                                        dtest,
+                                        dstrategy)
 end
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index ec296bc4..5ce97156 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -1,3 +1,5 @@
+__precompile__(false)
+
 module GridapDistributed
 
 using Gridap
diff --git a/src/MPIPETScAlgebraInterfaces.jl b/src/MPIPETScAlgebraInterfaces.jl
index 723b8fd2..47075db9 100644
--- a/src/MPIPETScAlgebraInterfaces.jl
+++ b/src/MPIPETScAlgebraInterfaces.jl
@@ -21,6 +21,7 @@ function Gridap.Algebra.allocate_vector(
   vec
 end
 
+
 """
     fill_entries!(a,v)
 
@@ -32,108 +33,3 @@ function Gridap.Algebra.fill_entries!(a::MPIPETScDistributedVector{Float64},v)
   end
   a
 end
-
-# """
-#     allocate_in_range(::Type{V},matrix) where V
-#
-# Allocate a vector of type `V` in the range of matrix `matrix`.
-# """
-# function allocate_in_range(::Type{V},matrix) where V
-#   n = size(matrix,1)
-#   allocate_vector(V,n)
-# end
-#
-# """
-#     allocate_in_domain(::Type{V},matrix) where V
-#
-# Allocate a vector of type `V` in the domain of matrix `matrix`.
-# """
-# function allocate_in_domain(::Type{V},matrix) where V
-#   n = size(matrix,2)
-#   allocate_vector(V,n)
-# end
-# """
-#     copy_entries!(a,b)
-#
-# Copy the entries of array `b` into array `a`. Returns `a`.
-# """
-# function copy_entries!(a,b)
-#   if a !== b
-#     copyto!(a,b)
-#   end
-#   a
-# end
-#
-# """
-#     add_entries!(a,b,combine=+)
-#
-# Perform the operation `combine` element-wise in the entries of arrays `a` and `b`
-# and store the result in array `a`. Returns `a`.
-# """
-# function add_entries!(a,b,combine=+)
-#   @assert length(a) == length(b)
-#   @inbounds for i in eachindex(a)
-#     a[i] = combine(a[i],b[i])
-#   end
-#   a
-# end
-#
-# """
-#     add_entry!(A,v,i,j,combine=+)
-#
-# Add an entry given its position and the operation to perform.
-# """
-# function add_entry!(A,v,i::Integer,j::Integer,combine=+)
-#   aij = A[i,j]
-#   A[i,j] = combine(aij,v)
-# end
-#
-# """
-#     add_entry!(A,v,i,combine=+)
-#
-# Add an entry given its position and the operation to perform.
-# """
-# function add_entry!(A,v,i::Integer,combine=+)
-#   ai = A[i]
-#   A[i] = combine(ai,v)
-# end
-#
-# """
-#     scale_entries!(a,v)
-#
-# Scale the entries of array `a` with the value `v`. Returns `a`.
-# """
-# function scale_entries!(a,b)
-#   @inbounds for i in eachindex(a)
-#     a[i] = b*a[i]
-#   end
-#   a
-# end
-#
-# # Base.mul!
-#
-# """
-#     muladd!(c,a,b)
-#
-# Matrix multiply a*b and add to result to c. Returns c.
-# """
-# function muladd!(c,a,b)
-#   _muladd!(c,a,b)
-#   c
-# end
-#
-# @static if VERSION >= v"1.3"
-#   function _muladd!(c,a,b)
-#     mul!(c,a,b,1,1)
-#   end
-# else
-#   function _muladd!(c,a,b)
-#     @assert length(c) == size(a,1)
-#     @assert length(b) == size(a,2)
-#     @inbounds for j in 1:size(a,2)
-#       for i in 1:size(a,1)
-#         c[i] += a[i,j]*b[j]
-#       end
-#     end
-#   end
-# end
diff --git a/src/MPIPETScDistributedAssemblersInterfaces.jl b/src/MPIPETScDistributedAssemblersInterfaces.jl
index 57d9cefa..12201a89 100644
--- a/src/MPIPETScDistributedAssemblersInterfaces.jl
+++ b/src/MPIPETScDistributedAssemblersInterfaces.jl
@@ -10,12 +10,76 @@ function Gridap.Algebra.allocate_coo_vectors(
   end
 end
 
+function allocate_local_vector(
+  ::Type{PETSc.Vec{Float64}},
+  indices::MPIPETScDistributedIndexSet,
+)
+  DistributedData(indices) do part,index
+   fill(0.0,length(index.lid_to_gid))
+  end
+end
+
 function Gridap.Algebra.finalize_coo!(
   ::Type{PETSc.Mat{Float64}},IJV::MPIPETScDistributedData,m::MPIPETScDistributedIndexSet,n::MPIPETScDistributedIndexSet)
 end
 
-function Gridap.Algebra.sparse_from_coo(
-  ::Type{PETSc.Mat{Float64}},IJV::MPIPETScDistributedData,m::MPIPETScDistributedIndexSet,n::MPIPETScDistributedIndexSet)
+function assemble_global_matrix(
+  ::Type{RowsComputedLocally{false}},
+  ::Type{PETSc.Mat{Float64}},
+  IJV::MPIPETScDistributedData,
+  m::MPIPETScDistributedIndexSet,
+  n::MPIPETScDistributedIndexSet,
+)
+  ngrows = num_gids(m)
+  ngcols = num_gids(n)
+  nlrows = num_owned_entries(m)
+  nlcols = nlrows
+
+  I, J, V = IJV.part
+  ncols_Alocal = maximum(J)
+  for i = 1:length(I)
+    I[i] = m.app_to_petsc_locidx[I[i]]
+  end
+  Alocal = sparse_from_coo(
+    Gridap.Algebra.SparseMatrixCSR{0,Float64,Int64},
+    I,
+    J,
+    V,
+    nlrows,
+    ncols_Alocal,
+  )
+  for i = 1:length(Alocal.colval)
+    Alocal.colval[i] = m.lid_to_gid_petsc[Alocal.colval[i]+1] - 1
+  end
+
+  p = Ref{PETSc.C.Mat{Float64}}()
+  f(buf)= if isempty(buf)
+      Ptr{PETSc.C.PetscInt}(0)
+    else
+      isa(buf,Vector{PETSc.C.PetscInt}) ? buf : PETSc.C.PetscInt[ i for i in buf ]
+    end
+
+  PETSc.C.chk(PETSc.C.MatCreateMPIAIJWithArrays(
+    get_comm(m).comm,
+    nlrows,
+    nlcols,
+    ngrows,
+    ngcols,
+    f(Alocal.rowptr),
+    f(Alocal.colval),
+    Alocal.nzval,
+    p,
+  ))
+  A=PETSc.Mat(p[])
+end
+
+function assemble_global_matrix(
+  ::Type{OwnedCellsStrategy{false}},
+  ::Type{PETSc.Mat{Float64}},
+  IJV::MPIPETScDistributedData,
+  m::MPIPETScDistributedIndexSet,
+  n::MPIPETScDistributedIndexSet,
+)
   ngrows = num_gids(m)
   ngcols = num_gids(n)
   nlrows = num_owned_entries(m)
@@ -29,54 +93,42 @@ function Gridap.Algebra.sparse_from_coo(
   A.insertmode = PETSc.C.ADD_VALUES
 
   # TO-DO: Not efficient!, one call to PETSc function per each injection
-  do_on_parts(get_comm(m),IJV) do part, IJV
-     I,J,V = IJV
-     for (i,j,v) in zip(I,J,V)
-       A[i,j]=v
-     end
+  I,J,V = IJV.part
+  for (i,j,v) in zip(I,J,V)
+    A[m.lid_to_gid_petsc[i],n.lid_to_gid_petsc[j]]=v
   end
 
   PETSc.AssemblyBegin(A, PETSc.C.MAT_FINAL_ASSEMBLY)
   PETSc.AssemblyEnd(A, PETSc.C.MAT_FINAL_ASSEMBLY)
-
   A
+end
 
+function assemble_global_vector(
+  ::Type{OwnedCellsStrategy{false}},
+  ::Type{PETSc.Vec{Float64}},
+  db::MPIPETScDistributedData,
+  indices::MPIPETScDistributedIndexSet)
+  vec = allocate_vector(PETSc.Vec{Float64},indices)
+  PETSc.setindex0!(vec, db.part, indices.lid_to_gid_petsc .- 1)
+  PETSc.AssemblyBegin(vec)
+  PETSc.AssemblyEnd(vec)
+  vec
 end
 
-@noinline function Gridap.FESpaces._assemble_matrix_and_vector_fill!(
-  ::Type{M},nini,I,J,V,b,vals_cache,rows_cache,cols_cache,cell_vals,cell_rows,cell_cols,strategy) where M <: SparseMatrixCSR
-  n = nini
-  for cell in 1:length(cell_cols)
-    rows = getindex!(rows_cache,cell_rows,cell)
-    cols = getindex!(cols_cache,cell_cols,cell)
-    vals = getindex!(vals_cache,cell_vals,cell)
-    matvals, vecvals = vals
-    for (j,gidcol) in enumerate(cols)
-      if gidcol > 0 && col_mask(strategy,gidcol)
-        _gidcol = col_map(strategy,gidcol)
-        for (i,gidrow) in enumerate(rows)
-          if gidrow > 0 && row_mask(strategy,gidrow)
-            _gidrow = row_map(strategy,gidrow)
-            if is_entry_stored(M,gidrow,gidcol)
-              n += 1
-              @inbounds v = matvals[i,j]
-              @inbounds I[n] = _gidrow
-              @inbounds J[n] = _gidcol
-              @inbounds V[n] = v
-            end
-          end
-        end
-      end
-    end
-    for (i,gidrow) in enumerate(rows)
-      if gidrow > 0 && row_mask(strategy,gidrow)
-        _gidrow = row_map(strategy,gidrow)
-        # TO-DO!!!
-        #bi = vecvals[i]
-        #b[_gidrow] += bi
-        b[_gidrow] = vecvals[i]
-      end
-    end
-  end
-  n
+function assemble_global_vector(
+  ::Type{RowsComputedLocally{false}},
+  ::Type{PETSc.Vec{Float64}},
+  db::MPIPETScDistributedData,
+  indices::MPIPETScDistributedIndexSet)
+  vec = allocate_vector(PETSc.Vec{Float64},indices)
+
+  part = MPI.Comm_rank(get_comm(indices).comm)+1
+  owned_pos = (indices.parts.part.lid_to_owner .== part)
+  bowned    = db.part[owned_pos]
+  l2g_petsc = indices.lid_to_gid_petsc[owned_pos] .- 1
+
+  PETSc.setindex0!(vec, bowned, l2g_petsc)
+  PETSc.AssemblyBegin(vec)
+  PETSc.AssemblyEnd(vec)
+  vec
 end
diff --git a/test/MPIPETScDistributedPoissonTests.jl b/test/MPIPETScDistributedPoissonTests.jl
index 9d992885..400e07c8 100644
--- a/test/MPIPETScDistributedPoissonTests.jl
+++ b/test/MPIPETScDistributedPoissonTests.jl
@@ -48,9 +48,9 @@ function run(assembly_strategy::AbstractString)
 
 
   if (assembly_strategy == "RowsComputedLocally")
-    strategy = RowsComputedLocally(V)
+    strategy = RowsComputedLocally(V; global_dofs=false)
   elseif (assembly_strategy == "OwnedCellsStrategy")
-    strategy = OwnedCellsStrategy(model, V)
+    strategy = OwnedCellsStrategy(model, V; global_dofs=false)
   else
     @assert false "Unknown AssemblyStrategy: $(assembly_strategy)"
   end