Skip to content

Commit

Permalink
Xlsx Reader Merge Range For Entire Column(s) or Row(s)
Browse files Browse the repository at this point in the history
Fix PHPOffice#2501. Merge range can be supplied as entire rows or columns, e.g. `1:1` or `A:C`. PhpSpreadsheet is expecting a row and a column to be specified for both parts of the range, and fails when the unexpected format shows up.

The code to clear cells within the merge range is very inefficient in terms of both memory and time, especially when the range is large (e.g. for an entire row or column). More efficient code is substituted. It is possible that we can get even more efficient by deleting the cleared cells rather than setting them to null. However, that needs more research, and there is no reason to delay this fix while I am researching.

When Xlsx Writer encounters a null cell, it writes it to the output file. For cell merges (especially involving whole rows or columns), this results in a lot of useless output. It is changed to skip the output of null cells when (a) the cell style matches its row's style, or (b) the row style is not specified and the cell style matches its column's style.
  • Loading branch information
oleibman committed Jan 17, 2022
1 parent 778e24c commit 3ffd5eb
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 33 deletions.
56 changes: 45 additions & 11 deletions src/PhpSpreadsheet/Worksheet/Worksheet.php
Original file line number Diff line number Diff line change
Expand Up @@ -1662,26 +1662,60 @@ public function mergeCells($range)
{
// Uppercase coordinate
$range = strtoupper($range);
// Convert 'A:C' to 'A1:C1048576'
$range = self::pregReplace('/^([A-Z]+):([A-Z]+)$/', '${1}1:${2}1048576', $range);
// Convert '1:3' to 'A1:XFD3'
$range = self::pregReplace('/^(\\d+):(\\d+)$/', 'A${1}:XFD${2}', $range);

if (strpos($range, ':') !== false) {
if (preg_match('/^([A-Z]+)(\\d+):([A-Z]+)(\\d+)$/', $range, $matches) === 1) {
$this->mergeCells[$range] = $range;

// make sure cells are created

// get the cells in the range
$aReferences = Coordinate::extractAllCellReferencesInRange($range);
$firstRow = (int) $matches[2];
$lastRow = (int) $matches[4];
$firstColumn = $matches[1];
$lastColumn = $matches[3];
$firstColumnIndex = Coordinate::columnIndexFromString($firstColumn);
$lastColumnIndex = Coordinate::columnIndexFromString($lastColumn);
$numberRows = $lastRow - $firstRow;
$numberColumns = $lastColumnIndex - $firstColumnIndex;

// create upper left cell if it does not already exist
$upperLeft = $aReferences[0];
$upperLeft = "$firstColumn$firstRow";
if (!$this->cellExists($upperLeft)) {
$this->getCell($upperLeft)->setValueExplicit(null, DataType::TYPE_NULL);
}

// Blank out the rest of the cells in the range (if they exist)
$count = count($aReferences);
for ($i = 1; $i < $count; ++$i) {
if ($this->cellExists($aReferences[$i])) {
$this->getCell($aReferences[$i])->setValueExplicit(null, DataType::TYPE_NULL);
if ($numberRows > $numberColumns) {
foreach ($this->getColumnIterator($firstColumn, $lastColumn) as $column) {
foreach ($column->getCellIterator($firstRow) as $cell) {
if ($cell !== null) {
$row = $cell->getRow();
if ($row > $lastRow) {
break;
}
$thisColumn = $cell->getColumn();
$thisColumnIndex = Coordinate::columnIndexFromString($thisColumn);
if ($upperLeft !== "$thisColumn$row") {
$cell->setValueExplicit(null, DataType::TYPE_NULL);
}
}
}
}
} else {
foreach ($this->getRowIterator($firstRow, $lastRow) as $row) {
foreach ($row->getCellIterator($firstColumn) as $cell) {
if ($cell !== null) {
$column = $cell->getColumn();
$columnIndex = Coordinate::columnIndexFromString($column);
if ($columnIndex > $lastColumnIndex) {
break;
}
$thisRow = $cell->getRow();
if ($upperLeft !== "$column$thisRow") {
$cell->setValueExplicit(null, DataType::TYPE_NULL);
}
}
}
}
}
} else {
Expand Down
20 changes: 16 additions & 4 deletions src/PhpSpreadsheet/Writer/Xlsx/Worksheet.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
Expand Down Expand Up @@ -1118,7 +1119,7 @@ private function writeSheetData(XMLWriter $objWriter, PhpspreadsheetWorksheet $w
if (isset($cellsByRow[$currentRow])) {
foreach ($cellsByRow[$currentRow] as $cellAddress) {
// Write cell
$this->writeCell($objWriter, $worksheet, $cellAddress, $aFlippedStringTable);
$this->writeCell($objWriter, $worksheet, $cellAddress, $aFlippedStringTable, $rowDimension->getXfIndex());
}
}

Expand Down Expand Up @@ -1235,15 +1236,26 @@ private function writeCellFormula(XMLWriter $objWriter, string $cellValue, Cell
* @param string $cellAddress Cell Address
* @param string[] $flippedStringTable String table (flipped), for faster index searching
*/
private function writeCell(XMLWriter $objWriter, PhpspreadsheetWorksheet $worksheet, string $cellAddress, array $flippedStringTable): void
private function writeCell(XMLWriter $objWriter, PhpspreadsheetWorksheet $worksheet, string $cellAddress, array $flippedStringTable, ?int $rowXfIndex): void
{
// Cell
$pCell = $worksheet->getCell($cellAddress);
$objWriter->startElement('c');
$objWriter->writeAttribute('r', $cellAddress);

// Sheet styles
$xfi = $pCell->getXfIndex();
if ($pCell->getDataType() === Datatype::TYPE_NULL) {
if ($rowXfIndex === null) {
if (preg_match('/^[A-Z]+/', $cellAddress, $matches) === 1) {
$col = $matches[0];
$rowXfIndex = $worksheet->getColumnDimension($col)->getXfIndex();
}
}
if ($xfi === $rowXfIndex) {
return;
}
}
$objWriter->startElement('c');
$objWriter->writeAttribute('r', $cellAddress);
self::writeAttributeIf($objWriter, $xfi, 's', $xfi);

// If cell value is supplied, write cell value
Expand Down
104 changes: 86 additions & 18 deletions tests/PhpSpreadsheetTests/Calculation/MergedCellTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,119 @@
namespace PhpOffice\PhpSpreadsheetTests\Calculation;

use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Exception as SpreadException;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
use PHPUnit\Framework\TestCase;

class MergedCellTest extends TestCase
{
/**
* @var Spreadsheet
* @param mixed $expectedResult
*
* @dataProvider providerWorksheetFormulaeColumns
*/
protected $spreadSheet;

protected function setUp(): void
public function testMergedCellColumns(string $formula, $expectedResult): void
{
$this->spreadSheet = new Spreadsheet();
$spreadSheet = new Spreadsheet();

$dataSheet = $this->spreadSheet->getActiveSheet();
$dataSheet->setCellValue('A1', 1.1);
$dataSheet = $spreadSheet->getActiveSheet();
$dataSheet->setCellValue('A5', 3.3);
$dataSheet->setCellValue('A3', 3.3);
$dataSheet->setCellValue('A2', 2.2);
$dataSheet->setCellValue('A1', 1.1);
$dataSheet->setCellValue('B2', 2.2);
$dataSheet->setCellValue('B1', 1.1);
$dataSheet->setCellValue('C2', 4.4);
$dataSheet->setCellValue('C1', 3.3);
$dataSheet->mergeCells('A2:A4');
$dataSheet->setCellValue('A5', 3.3);
$dataSheet->mergeCells('B:B');
$worksheet = $spreadSheet->getActiveSheet();

$worksheet->setCellValue('A7', $formula);

$result = $worksheet->getCell('A7')->getCalculatedValue();
self::assertSame($expectedResult, $result);
$spreadSheet->disconnectWorksheets();
}

public function providerWorksheetFormulaeColumns(): array
{
return [
['=SUM(A1:A5)', 6.6],
['=COUNT(A1:A5)', 3],
['=COUNTA(A1:A5)', 3],
['=SUM(A3:A4)', 0],
['=A2+A3+A4', 2.2],
['=A2/A3', Functions::DIV0()],
['=SUM(B1:C2)', 8.8],
];
}

/**
* @param mixed $expectedResult
*
* @dataProvider providerWorksheetFormulae
* @dataProvider providerWorksheetFormulaeRows
*/
public function testMergedCellBehaviour(string $formula, $expectedResult): void
public function testMergedCellRows(string $formula, $expectedResult): void
{
$worksheet = $this->spreadSheet->getActiveSheet();
$spreadSheet = new Spreadsheet();

$dataSheet = $spreadSheet->getActiveSheet();
$dataSheet->setCellValue('A1', 1.1);
$dataSheet->setCellValue('B1', 2.2);
$dataSheet->setCellValue('C1', 3.3);
$dataSheet->setCellValue('E1', 3.3);
$dataSheet->setCellValue('A2', 1.1);
$dataSheet->setCellValue('B2', 2.2);
$dataSheet->setCellValue('A3', 3.3);
$dataSheet->setCellValue('B3', 4.4);
$dataSheet->mergeCells('B1:D1');
$dataSheet->mergeCells('A2:B2');
$worksheet = $spreadSheet->getActiveSheet();

$worksheet->setCellValue('A7', $formula);

$result = $worksheet->getCell('A7')->getCalculatedValue();
self::assertSame($expectedResult, $result);
$spreadSheet->disconnectWorksheets();
}

public function providerWorksheetFormulae(): array
public function providerWorksheetFormulaeRows(): array
{
return [
['=SUM(A1:A5)', 6.6],
['=COUNT(A1:A5)', 3],
['=COUNTA(A1:A5)', 3],
['=SUM(A3:A4)', 0],
['=A2+A3+A4', 2.2],
['=A2/A3', Functions::DIV0()],
['=SUM(A1:E1)', 6.6],
['=COUNT(A1:E1)', 3],
['=COUNTA(A1:E1)', 3],
['=SUM(C1:D1)', 0],
['=B1+C1+D1', 2.2],
['=B1/C1', Functions::DIV0()],
['=SUM(A2:B3)', 8.8],
];
}

private function setBadRange(Worksheet $sheet, string $range): void
{
try {
$sheet->mergeCells($range);
self::fail("Expected invalid merge range $range");
} catch (SpreadException $e) {
self::assertSame('Merge must be set on a range of cells.', $e->getMessage());
}
}

public function testMergedBadRange(): void
{
$spreadSheet = new Spreadsheet();

$dataSheet = $spreadSheet->getActiveSheet();
$this->setBadRange($dataSheet, 'B1');
$this->setBadRange($dataSheet, 'Invalid');
$this->setBadRange($dataSheet, '1');
$this->setBadRange($dataSheet, 'C');
$this->setBadRange($dataSheet, 'B1:C');
$this->setBadRange($dataSheet, 'B:C2');

$spreadSheet->disconnectWorksheets();
}
}
70 changes: 70 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Xlsx/Issue2501Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Reader\Xlsx;

use PhpOffice\PhpSpreadsheet\IOFactory;
use PHPUnit\Framework\TestCase;

class Issue2501Test extends TestCase
{
/**
* @var string
*/
private static $testbook = 'tests/data/Reader/XLSX/issue.2501.b.xlsx';

public function testPreliminaries(): void
{
$file = 'zip://';
$file .= self::$testbook;
$file .= '#xl/worksheets/sheet1.xml';
$data = file_get_contents($file);
// confirm that file contains expected merge ranges
if ($data === false) {
self::fail('Unable to read file');
} else {
self::assertStringContainsString('<mergeCells count="3"><mergeCell ref="A:A"/><mergeCell ref="B:D"/><mergeCell ref="E2:E4"/></mergeCells>', $data);
}
$file = 'zip://';
$file .= self::$testbook;
$file .= '#xl/worksheets/sheet2.xml';
$data = file_get_contents($file);
// confirm that file contains expected merged ranges
if ($data === false) {
self::fail('Unable to read file');
} else {
self::assertStringContainsString('<mergeCells count="3"><mergeCell ref="1:1"/><mergeCell ref="2:4"/><mergeCell ref="B5:D5"/></mergeCells>', $data);
}
}

public function testIssue2501(): void
{
// Merged cell range specified as 1:1"
$filename = self::$testbook;
$reader = IOFactory::createReader('Xlsx');
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getSheetByName('Columns');
$expected = [
'A1:A1048576',
'B1:D1048576',
'E2:E4',
];
if ($sheet === null) {
self::fail('Unable to find sheet Columns');
} else {
self::assertSame($expected, array_values($sheet->getMergeCells()));
}
$sheet = $spreadsheet->getSheetByName('Rows');
$expected = [
'A1:XFD1',
'A2:XFD4',
'B5:D5',
];
if ($sheet === null) {
self::fail('Unable to find sheet Rows');
} else {
self::assertSame($expected, array_values($sheet->getMergeCells()));
}

$spreadsheet->disconnectWorksheets();
}
}
Binary file added tests/data/Reader/XLSX/issue.2501.b.xlsx
Binary file not shown.

0 comments on commit 3ffd5eb

Please sign in to comment.