diff --git a/Sky.Data.Csv/CsvReader.cs b/Sky.Data.Csv/CsvReader.cs index a67c749..71c5f5b 100644 --- a/Sky.Data.Csv/CsvReader.cs +++ b/Sky.Data.Csv/CsvReader.cs @@ -24,9 +24,9 @@ public class CsvReader : IEnumerable, IDisposable private readonly CsvReaderSettings mCsvSettings; private readonly String mFilePath; - private readonly IDataResolver dataResolver; - private readonly Dictionary> cachedRows = new Dictionary>(); - private Boolean fileHeaderAlreadySkipped = false; + private readonly IDataResolver mDataResolver; + private readonly Dictionary> mCachedRows = new Dictionary>(1024); + private Boolean mFileHeaderAlreadySkipped = false; private void ThrowException(String rowText, Int32 rowIndex, Int32 chPos) { @@ -96,9 +96,10 @@ private List ParseOneRow(String oneRowText) } else if (nextChar == '\"') mCsvTextBuilder.Append(oneRowText[charPos = charPos + 1]); - //Code should not hit this point, it indicates an error else if (!this.mCsvSettings.IgnoreErrors) ThrowException(oneRowText, this.LineIndex, charPos); + else + mCsvTextBuilder.Append(theChar); } } #endregion @@ -131,7 +132,7 @@ protected static void CheckFilePath(String filePath) } protected CsvReader(Stream stream, CsvReaderSettings settings, IDataResolver dataResolver) { - this.dataResolver = dataResolver; + this.mDataResolver = dataResolver; this.mCsvSettings = settings = settings ?? new CsvReaderSettings(); this.mCsvSettings.UseCache = settings.UseCache || settings.SkipDuplicates; EnsureParameters(stream, settings, dataResolver); @@ -167,9 +168,11 @@ public List ReadRow() var commentHint = this.mCsvSettings.CommentHint; while (true) { - if (!this.EnsureBuffer()) return null; + if (this.mBufferPosition >= this.mBufferCharCount) + if (!this.EnsureBuffer()) return null; #region Read one real CSV record line + var quoted = false; mCsvTextBuilder.Length = 0; while (this.mBufferPosition < this.mBufferCharCount) { @@ -186,7 +189,26 @@ public List ReadRow() //for macintosh csv format, it uses \r as line break break; } - else mCsvTextBuilder.Append(firstChar); + //else mCsvTextBuilder.Append(firstChar); + else if (!quoted && firstChar == '\n') break; + else + { + mCsvTextBuilder.Append(firstChar); + if (firstChar == '\"') + { + if (!quoted) quoted = true; + else + { + if (this.mBufferPosition >= this.mBufferCharCount) + if (!this.EnsureBuffer()) break; + + if (this.mBuffer[this.mBufferPosition] == '\"') + mCsvTextBuilder.Append(this.mBuffer[this.mBufferPosition++]); + else + quoted = false; + } + } + } //if there is no line break, we should read to the end of file. if (this.mBufferPosition >= this.mBufferCharCount) @@ -205,22 +227,22 @@ public List ReadRow() continue; ++this.RowIndex; //header is counted for row numbers - if (!fileHeaderAlreadySkipped && this.mCsvSettings.HasHeader) + if (!mFileHeaderAlreadySkipped && this.mCsvSettings.HasHeader) { - fileHeaderAlreadySkipped = true; + mFileHeaderAlreadySkipped = true; continue; } //if a row is in cache, it's already read, process skip duplicates - if (this.mCsvSettings.SkipDuplicates && cachedRows.ContainsKey(oneRowText)) + if (this.mCsvSettings.SkipDuplicates && mCachedRows.ContainsKey(oneRowText)) continue; //if use cache and the row is already read, use the existing value - if (this.mCsvSettings.UseCache && cachedRows.ContainsKey(oneRowText)) - return cachedRows[oneRowText]; + if (this.mCsvSettings.UseCache && mCachedRows.ContainsKey(oneRowText)) + return mCachedRows[oneRowText]; var temporaryData = this.ParseOneRow(oneRowText); //if use cache and the row is not read, add it to cache - if (this.mCsvSettings.UseCache) cachedRows[oneRowText] = temporaryData; + if (this.mCsvSettings.UseCache) mCachedRows[oneRowText] = temporaryData; #endregion return temporaryData; @@ -306,7 +328,7 @@ public IEnumerator GetEnumerator() { for (var row = this.ReadRow(); row != null; row = this.ReadRow()) { - yield return this.dataResolver.Deserialize(row); + yield return this.mDataResolver.Deserialize(row); }; } #endregion diff --git a/Sky.Data.Csv/CsvResolver.cs b/Sky.Data.Csv/CsvResolver.cs index 86475ed..426b5b7 100644 --- a/Sky.Data.Csv/CsvResolver.cs +++ b/Sky.Data.Csv/CsvResolver.cs @@ -28,6 +28,12 @@ public interface IDataResolver /// /// The list of String values. /// The deserialized object. + TData Deserialize(IEnumerable data); + /// + /// Deserialize the specified list of String values to an object. + /// + /// The list of String values. + /// The deserialized object. TData Deserialize(List data); } @@ -38,14 +44,19 @@ public interface IDataResolver /// The generic type of which objects will be serialized and deserialized. public abstract class AbstractDataResolver : IDataResolver { - public TData Deserialize(params String[] data) + public abstract TData Deserialize(List data); + + public virtual TData Deserialize(IEnumerable data) { - return Deserialize(new List(data)); + return Deserialize((List)data ?? new List(data)); } - public abstract TData Deserialize(List data); - public abstract List Serialize(TData data); + + public virtual TData Deserialize(params String[] data) + { + return Deserialize(new List(data)); + } } /// diff --git a/Sky.Data.Csv/CsvWriter.cs b/Sky.Data.Csv/CsvWriter.cs index 0134177..06ed389 100644 --- a/Sky.Data.Csv/CsvWriter.cs +++ b/Sky.Data.Csv/CsvWriter.cs @@ -15,9 +15,9 @@ public class CsvWriter : IDisposable { private readonly StreamWriter mWriter; private readonly CsvWriterSettings mCsvSettings; - private readonly Char[] needQuoteChars; + private readonly Char[] mNeedQuoteChars; - private readonly IDataResolver dataResolver; + private readonly IDataResolver mDataResolver; private static void EnsureParameters(Stream stream, CsvWriterSettings settings, IDataResolver dataResolver) { @@ -51,11 +51,11 @@ protected static void CheckFilePath(String filePath, CsvWriterSettings settings) } protected CsvWriter(Stream stream, CsvWriterSettings settings, IDataResolver dataResolver) { - this.dataResolver = dataResolver; + this.mDataResolver = dataResolver; this.mCsvSettings = settings = settings ?? new CsvWriterSettings(); EnsureParameters(stream, settings, dataResolver); settings.BufferSize = Math.Min(4096 * 1024, Math.Max(settings.BufferSize, 4096)); - needQuoteChars = new Char[] { '\n', '\"', settings.Seperator }; + mNeedQuoteChars = new Char[] { '\n', '\"', settings.Seperator }; this.mWriter = new StreamWriter(stream, settings.Encoding, settings.BufferSize); if (stream is FileStream) { @@ -146,7 +146,7 @@ public CsvWriter WriteRows(IEnumerable data) /// The current CsvWriter instance. public CsvWriter WriteRow(T data) { - return this.WriteRow(this.dataResolver.Serialize(data)); + return this.WriteRow(this.mDataResolver.Serialize(data)); } /// /// Write a list of String values as a CSV record to the current CSV file. @@ -162,7 +162,7 @@ public CsvWriter WriteRow(IEnumerable data) { var valueString = originalCellValueString ?? String.Empty; - if (Array.Exists(needQuoteChars, c => valueString.IndexOf(c) >= 0)) + if (Array.Exists(mNeedQuoteChars, c => valueString.IndexOf(c) >= 0)) { valueString = String.Format("\"{0}\"", valueString.Replace("\"", "\"\"").Replace("\r\n", "\r")); diff --git a/TestData.Csv/csv-lumentest.rar b/TestData.Csv/csv-lumentest.rar new file mode 100644 index 0000000..5a549cb Binary files /dev/null and b/TestData.Csv/csv-lumentest.rar differ diff --git a/TestData.Csv/csv-ms-dos-complex.csv b/TestData.Csv/csv-ms-dos-complex.csv new file mode 100644 index 0000000..ee19c6d --- /dev/null +++ b/TestData.Csv/csv-ms-dos-complex.csv @@ -0,0 +1,10 @@ +·,~,`,!,&,@,#,$,%,^,*,(,), + +_,+,-,=,[,],{,},;,:,a'b,,,","";",abc +REM this is a comment line +#4er,>,.,?,/,\,|, ,s,a,d,f,g +"""""",',;;,#$%^,(*&^,}{{IU,""":""P +OY$E",i,"./, ';l; k; ajldfp","k hk h +k hkj h +kh s",,.' ;'adl 'f l'a;l 'la'l' al'df,s +one more line \ No newline at end of file