Skip to content

Commit

Permalink
[tool] 크레마 바이너리의 문자열 해시코드가 달라지는 현상 개선
Browse files Browse the repository at this point in the history
OS 에 따라, .NET Core, .NET Framework, Mono 에 따라 크레마 바이너의 문자열 해시코드 값이 달라진다.
이로 인하여 바이너리가 달라지는 문제가 발생한다.
이를 개선하기 위해 문자열의 해시코드 대신 고유한 순차적인 번호로 해시코드를 계산한다.
  • Loading branch information
powerumc committed Aug 23, 2019
1 parent 402a969 commit e6250bf
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,12 @@
//OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using Ntreev.Crema.Data;
using Ntreev.Crema.Data.Xml;
using Ntreev.Library.IO;
using Ntreev.Library;
using Ntreev.Crema.Data.Xml.Schema;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Runtime.Serialization;
using System.Text;
using System.Xml;
using System.Data;
using System.ComponentModel;
using System.ComponentModel.Composition;
using Ntreev.Crema.Runtime.Serialization;
using System.Threading.Tasks;

namespace Ntreev.Crema.Runtime.Serialization.Binary
{
Expand All @@ -44,22 +33,31 @@ class BinaryDataSerializer : IDataSerializer

private BinaryTableHeader tableHeader = new BinaryTableHeader();
private BinaryTableInfo tableInfo = new BinaryTableInfo();
private HashSet<string> strings = new HashSet<string>();
private IDictionary<int, string> strings = new Dictionary<int, string>();
private static IDictionary<int, string> globalStrings = new Dictionary<int, string>();
private List<BinaryColumnInfo> columns;
private int stringsIndex = 0;

[ImportingConstructor]
public BinaryDataSerializer()
{

}

private BinaryDataSerializer(int stringsIndex)
{
this.stringsIndex = stringsIndex;
}

public string Name
{
get { return "bin"; }
}

public void Serialize(Stream stream, SerializationSet dataSet)
public int Serialize(Stream stream, SerializationSet dataSet, int stringsIndex = 0)
{
this.stringsIndex = stringsIndex;

var fileHeader = new BinaryFileHeader();
var tables = dataSet.Tables;
var tableIndexes = new List<BinaryTableIndex>(tables.Length);
Expand All @@ -81,16 +79,13 @@ public void Serialize(Stream stream, SerializationSet dataSet)

var t = new Dictionary<string, Stream>();

Parallel.ForEach(tables, item =>
tables.ToList().ForEach(item =>
{
var memory = new MemoryStream();
var formatter = new BinaryDataSerializer();
formatter.SerializeTable(memory, item, dataSet.Types);
var formatter = new BinaryDataSerializer(this.stringsIndex);
this.stringsIndex = formatter.SerializeTable(memory, item, dataSet.Types);
memory.Position = 0;
lock (t)
{
t.Add(item.Name, memory);
}
t.Add(item.Name, memory);
});

foreach (var item in tables)
Expand All @@ -112,9 +107,11 @@ public void Serialize(Stream stream, SerializationSet dataSet)

writer.Seek((int)fileHeader.IndexOffset, SeekOrigin.Begin);
writer.WriteArray(tableIndexes.ToArray());

return this.stringsIndex;
}

private void SerializeTable(Stream stream, SerializationTable dataTable, SerializationType[] types)
private int SerializeTable(Stream stream, SerializationTable dataTable, SerializationType[] types)
{
var columns = dataTable.Columns;
var rows = dataTable.Rows;
Expand Down Expand Up @@ -153,6 +150,8 @@ private void SerializeTable(Stream stream, SerializationTable dataTable, Seriali
writer.Seek((int)this.tableHeader.TableInfoOffset, SeekOrigin.Begin);
writer.WriteValue(this.tableInfo);
writer.SetPosition(lastPosition);

return this.stringsIndex;
}

private void CollectColumns(SerializationColumn[] columns)
Expand Down Expand Up @@ -313,9 +312,25 @@ private int GetStringID(string text)
lock (this.strings)
{
text = text.Replace(Environment.NewLine, "\n");
if (this.strings.Contains(text) == false)
this.strings.Add(text);
return text.GetHashCode();
if (this.strings.Values.Contains(text) == false)
{
this.stringsIndex++;
if (globalStrings.Values.Contains(text))
{
var id = globalStrings.First(o => o.Value == text).Key;
this.strings.Add(id, text);
return id;
}
else
{
globalStrings.Add(this.stringsIndex, text);
this.strings.Add(this.stringsIndex, text);
return this.stringsIndex;
}
}

var item = this.strings.First(o => o.Value == text);
return item.Key;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//Released under the MIT License.
//
//Copyright (c) 2018 Ntreev Soft co., Ltd.
//
//Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
//documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
//rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
//persons to whom the Software is furnished to do so, subject to the following conditions:
//
//The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
//Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
//WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
//COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
//OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;

namespace Ntreev.Crema.Runtime.Serialization.Binary
{
public static class BinaryWriterExtension
{
public static long GetPosition(this BinaryWriter writer)
{
return writer.Seek(0, SeekOrigin.Current);
}

public static void SetPosition(this BinaryWriter writer, long pos)
{
writer.Seek((int)pos, SeekOrigin.Begin);
}

public static void WriteResourceString(this BinaryWriter writer, KeyValuePair<int, string> value)
{
var bytes = Encoding.UTF8.GetBytes(value.Value);
writer.WriteValue(value.Key);
writer.WriteValue(bytes.Length);
writer.Write(bytes, 0, bytes.Length);
}

public static void WriteResourceStrings(this BinaryWriter writer, KeyValuePair<int, string>[] strings)
{
writer.WriteValue(strings.Length);
foreach (var item in strings)
{
writer.WriteResourceString(item);
}
}

public static void WriteArray<T>(this BinaryWriter writer, T[] values)
where T : struct
{
foreach (T value in values)
{
writer.WriteValue(value);
}
}

public static void WriteValue<T>(this BinaryWriter writer, T value)
where T : struct
{
var bytes = BinaryWriterExtension.GetBytes<T>(value);
writer.Write(bytes, 0, bytes.Length);
}

public static byte[] GetBytes<TStruct>(TStruct data)
where TStruct : struct
{
var structSize = Marshal.SizeOf(typeof(TStruct));
var buffer = new byte[structSize];
var handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
Marshal.StructureToPtr(data, handle.AddrOfPinnedObject(), false);
handle.Free();
return buffer;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ public interface IDataSerializer
{
string Name { get; }

void Serialize(Stream stream, SerializationSet dataSet);
int Serialize(Stream stream, SerializationSet dataSet, int stringsIndex = 0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,21 @@ namespace Ntreev.Crema.Runtime.Serialization
{
public static class IDataSerializerExtensions
{
public static void Serialize(this IDataSerializer serializer, string filename, SerializationSet dataSet)
public static int Serialize(this IDataSerializer serializer, string filename, SerializationSet dataSet, int stringsIndex = 0)
{
FileUtility.Backup(filename);
try
{
FileUtility.Prepare(filename);
using (var stream = File.OpenWrite(filename))
{
serializer.Serialize(stream, dataSet);
return serializer.Serialize(stream, dataSet, stringsIndex);
}
}
catch
{
FileUtility.Restore(filename);
return 0;
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
<Compile Include="Binary\BinaryTableHeader.cs" />
<Compile Include="Binary\BinaryTableIndex.cs" />
<Compile Include="Binary\BinaryTableInfo.cs" />
<Compile Include="Binary\BinaryWriterExtension.cs" />
<Compile Include="IDataSerializerExtensions.cs" />
<Compile Include="IDataSerializer.cs" />
<Compile Include="Json\CremaJsonFormatter.cs" />
Expand Down
3 changes: 2 additions & 1 deletion tools/Ntreev.Crema.Commands/GetDataCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,12 @@ private void SerializePerTable(SerializationSet metaData)
}
}

var stringsIndex = 0;
foreach (var dataSet in filteredMetaDataList)
{
var filepath = Path.Combine(this.Filename, $"{dataSet.Tables[0].Name}.{DataSplitSetting.Ext}");
var serializer = this.GetDataSerializer(this.OutputType);
serializer.Serialize(filepath, dataSet);
stringsIndex = serializer.Serialize(filepath, dataSet, stringsIndex);
}
}

Expand Down

0 comments on commit e6250bf

Please sign in to comment.