diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Dbf.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Dbf.cs index 8d1f50d..721e511 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Dbf.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Dbf.cs @@ -1,18 +1,25 @@ -namespace NetTopologySuite.IO.Esri.Dbf +using System.Text; + +namespace NetTopologySuite.IO.Esri.Dbf { + /// + /// Manages configurations and constants specific to the structure and operation of DBF files in the dBASE III format. + /// internal static class Dbf { - public readonly static int TableDescriptorSize = 32; // Number of bytes in the table header + internal readonly static int TableDescriptorSize = 32; // Number of bytes in the table header internal readonly static int FieldDescriptorSize = 32; // Number of bytes in the field descriptor internal readonly static int MaxFieldCount = 255; - public readonly static byte Dbase3Version = 0x03; // dBASE III - public readonly static byte HeaderTerminatorMark = 0x0D; + internal readonly static byte Dbase3Version = 0x03; // dBASE III + internal readonly static byte HeaderTerminatorMark = 0x0D; + + internal readonly static byte DeletedRecordMark = 0x2A; // '*' + internal readonly static byte ValidRecordMark = 0x20; // ' ' + internal readonly static byte EndOfFileMark = 0x1A; - public readonly static byte DeletedRecordMark = 0x2A; // '*' - public readonly static byte ValidRecordMark = 0x20; // ' ' - public readonly static byte EndOfFileMark = 0x1A; + internal static readonly int MaxFieldNameLength = 10; - public static readonly int MaxFieldNameLength = 10; + internal readonly static Encoding DefaultEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); } } diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs index 829f835..a302378 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs @@ -29,7 +29,7 @@ static DbfEncoding() // https://support.esri.com/en/technical-article/000013192 - AddLanguageDriverId(0, Encoding.UTF8); // For unknown LDID + AddLanguageDriverId(0, Dbf.DefaultEncoding); // For unknown LDID AddLanguageDriverId(0x03, Encoding.Default); // OS Default AddLanguageDriverId(0x57, Encoding.Default); // OS Default diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs index 6aec92d..880ee44 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs @@ -106,7 +106,7 @@ private void Initialize(Stream stream, Encoding encoding = null) RecordSize = Buffer.ReadDbfRecordSize(); Buffer.Advance(17); - Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Encoding.UTF8; // null => Try to read encoding from DBF's reserved bytes + Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Dbf.DefaultEncoding; // null => Try to read encoding from DBF's reserved bytes Buffer.Advance(2); // --- File header is done, read field descriptor header now --- diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfStreamExtensions.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfStreamExtensions.cs index bdb6ebd..dae8b73 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfStreamExtensions.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfStreamExtensions.cs @@ -83,7 +83,7 @@ public static Encoding ReadDbfEncoding(this Stream stream) public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field, Encoding encoding) { - encoding = encoding ?? Encoding.UTF8; + encoding = encoding ?? Dbf.DefaultEncoding; var name = field.Name.PadRight(Dbf.MaxFieldNameLength, char.MinValue); // Field name must have empty space zero-filled @@ -97,7 +97,7 @@ public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field, } public static DbfField ReadDbaseFieldDescriptor(this Stream stream, Encoding encoding) { - encoding = encoding ?? Encoding.UTF8; + encoding = encoding ?? Dbf.DefaultEncoding; var name = stream.ReadString(Dbf.MaxFieldNameLength, encoding)?.Trim(); stream.Advance(1); // Reserved (field name terminator) diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs index dacbf1e..469e4f6 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs @@ -51,7 +51,7 @@ public class DbfWriter : ManagedDisposable /// DBF file encoding. Defaults to UTF8. public DbfWriter(Stream stream, IReadOnlyList fields, Encoding encoding = null) { - Encoding = encoding ?? Encoding.UTF8; + Encoding = encoding ?? Dbf.DefaultEncoding; IntializeFields(fields); DbfStream = stream ?? throw new ArgumentNullException("Uninitialized dBASE stream.", nameof(stream)); WriteHeader(); @@ -66,7 +66,7 @@ public DbfWriter(Stream stream, IReadOnlyList fields, Encoding encodin /// DBF file encoding. Defaults to UTF8. public DbfWriter(string dbfPath, IReadOnlyList fields, Encoding encoding = null) { - Encoding = encoding ?? Encoding.UTF8; + Encoding = encoding ?? Dbf.DefaultEncoding; IntializeFields(fields); WriteCpgEncoding(dbfPath, encoding); try diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Fields/DbfCharacterField.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Fields/DbfCharacterField.cs index aff4170..f19e3ab 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Fields/DbfCharacterField.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Fields/DbfCharacterField.cs @@ -71,7 +71,7 @@ internal override void WriteValue(Stream stream) private Encoding _encoding = null; internal Encoding Encoding { - get { return _encoding ?? Encoding.UTF8; } + get { return _encoding ?? Dbf.DefaultEncoding; } set { if (value == null) diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriter.T.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriter.T.cs index 41fe241..caffaf1 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriter.T.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriter.T.cs @@ -88,7 +88,10 @@ internal ShapefileWriter(string shpPath, ShapefileWriterOptions options) ShpWriter = CreateShpWriter(shpStream, shxStream); // It calls this.ShapeType if (!string.IsNullOrWhiteSpace(options.Projection)) - File.WriteAllText(Path.ChangeExtension(shpPath, ".prj"), options.Projection); + { + var prjPath = Path.ChangeExtension(shpPath, ".prj"); + File.WriteAllText(prjPath, options.Projection, options.Encoding); + } } catch { diff --git a/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriterOptions.cs b/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriterOptions.cs index bd139ce..d03f813 100644 --- a/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriterOptions.cs +++ b/src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriterOptions.cs @@ -21,14 +21,14 @@ public class ShapefileWriterOptions /// public List Fields { get; } = new List(); - private Encoding _encoding = Encoding.UTF8; + private Encoding _encoding = Dbf.Dbf.DefaultEncoding; /// /// DBF file encoding. /// public Encoding Encoding { get => _encoding; - set => _encoding = value ?? Encoding.UTF8; + set => _encoding = value ?? Dbf.Dbf.DefaultEncoding; } /// diff --git a/test/NetTopologySuite.IO.Esri.Test/Issues/Issue053.cs b/test/NetTopologySuite.IO.Esri.Test/Issues/Issue053.cs new file mode 100644 index 0000000..a348344 --- /dev/null +++ b/test/NetTopologySuite.IO.Esri.Test/Issues/Issue053.cs @@ -0,0 +1,102 @@ +using NetTopologySuite.IO.Esri.Dbf.Fields; +using NetTopologySuite.IO.Esri.Shapefiles.Writers; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace NetTopologySuite.IO.Esri.Test.Issues; + +/// +/// https://github.com/NetTopologySuite/NetTopologySuite.IO.Esri/issues/53 +/// +internal class Issue053 +{ + [Test] + public void Projection_Utf8_BOM() + { + var fields = new List(); + var fidField = fields.AddNumericInt32Field("fid"); + var options = new ShapefileWriterOptions(ShapeType.Polygon, fields.ToArray()) + { + Projection = "GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",6378137.0,298.257223563]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]]" + }; + + var shpPath = TestShapefiles.GetTempShpPath(); + using (var shpWriter = Shapefile.OpenWrite(shpPath, options)) + { + shpWriter.Geometry = SampleGeometry.SampleMultiPolygon; + fidField.NumericValue = 1; + shpWriter.Write(); + } + + var expectedProjectionString = options.Projection; + var expectedProjectionBytes = options.Encoding.GetBytes(options.Projection); + + var prjPath = Path.ChangeExtension(shpPath, ".prj"); + var storedProjectionString = File.ReadAllText(prjPath); + var storedProjectionBytes = File.ReadAllBytes(prjPath); + + TestShapefiles.DeleteShp(shpPath); + + Assert.AreEqual(expectedProjectionString, storedProjectionString); + Assert.AreEqual(expectedProjectionBytes, storedProjectionBytes); + } + + [Test] + public static void Utf8_BOM_Default() + { + var encoding = Encoding.UTF8; + var filePath = Path.GetTempFileName(); + var expectedString = "abc"; + var expectedBytes = encoding.GetBytes(expectedString); + WriteFile(filePath, expectedString, encoding); + + var storedString = File.ReadAllText(filePath, encoding); + var storedBytes = File.ReadAllBytes(filePath); + + Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading + Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored by default + } + + [Test] + public static void Utf8_BOM_Included() + { + var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true); + var filePath = Path.GetTempFileName(); + var expectedString = "abc"; + var expectedBytes = encoding.GetBytes(expectedString); + WriteFile(filePath, expectedString, encoding); + + var storedString = File.ReadAllText(filePath, encoding); + var storedBytes = File.ReadAllBytes(filePath); + + Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading + Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored explicitly + } + + [Test] + public static void Utf8_BOM_Excluded() + { + var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + var filePath = Path.GetTempFileName(); + var expectedString = "abc"; + var expectedBytes = encoding.GetBytes(expectedString); + WriteFile(filePath, expectedString, encoding); + + var storedString = File.ReadAllText(filePath, encoding); + var storedBytes = File.ReadAllBytes(filePath); + + Assert.AreEqual(expectedString, storedString); + Assert.AreEqual(expectedBytes, storedBytes); + } + + private static void WriteFile(string filePath, string content, Encoding encoding) + { + using (StreamWriter writer = new StreamWriter(filePath, false, encoding)) + { + writer.Write(content); + } + } +}