Skip to content

Commit

Permalink
Fixreadwritefile, especially for nsapps.ini read and write back (#78)
Browse files Browse the repository at this point in the history
* added entry point for loggers
* most testing for readwritefile.py ok, including read and write of nsapps.ini...
* correction of pyproject.toml

---------

Co-authored-by: Doug Ransom <[email protected]>
  • Loading branch information
quintijn and dougransom authored Jun 17, 2024
1 parent 0db85ea commit 983cf48
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dynamic = ["version", "description"]
requires-python = ">=3.9"
readme = "readme.md"
dependencies= [
## "natlink>=5.3.4",
"natlink>=5.3.4",
"FreeSimpleGUI>=5.1.0",
"pydebugstring >= 1.0.0.1",
"dtactions>=1.6.1",
Expand Down
58 changes: 38 additions & 20 deletions src/natlinkcore/readwritefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
import os
import sys

# replacement strings
WINDOWS_LINE_ENDING = '\r\n'
UNIX_LINE_ENDING = '\n'

class ReadWriteFile:
"""instance to read any text file and/or and write text into same or new file
Expand Down Expand Up @@ -78,7 +82,7 @@ def readAnything(self, input_path, encoding=None):

with open(self.input_path, mode='rb') as file: # b is important -> binary
self.rawText = file.read()
tRaw = fixCrLf(self.rawText)
tRaw = self.rawText
#
for codingscheme in self.encodings:
result = DecodeEncode(tRaw, codingscheme)
Expand All @@ -87,9 +91,15 @@ def readAnything(self, input_path, encoding=None):
pass
if result and ord(result[0]) == 65279: # BOM, remove
result = result[1:]
self.bom = tRaw[0:3]
if codingscheme.replace('-','').lower() == 'utf8':
self.bom = [239, 187, 191]
elif codingscheme.replace('-', '').lower() == 'utf16le':
self.bom = [255, 254]
else:
raise OSError('file "{input_path}", BOM (byte order mark) found at start of file, but not "utf8" or "utf16le": "{codingscheme}"')
self.text = result
self.encoding = codingscheme
result = result.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING)
return result
print(f'readAnything: no valid encoding found for file: {input_path}')
self.text = ''
Expand Down Expand Up @@ -126,6 +136,13 @@ def writeAnything(self, filepath, content, encoding=None, errors=None):
if not isinstance(content, str):
raise TypeError("writeAnything, content should be str, not %s (%s)"% (type(content), filepath))

if sys.platform == 'win32':
# convert \n into \r\n:
content = content.replace(UNIX_LINE_ENDING, WINDOWS_LINE_ENDING)
# content = content.replace(b'\r\r\n', b'\r\n') # just to be sure



if self.encoding != 'ascii':
i = self.encodings.index(self.encoding)
# take 'ascii' and next encoding (will be 'utf-8')
Expand All @@ -148,28 +165,29 @@ def writeAnything(self, filepath, content, encoding=None, errors=None):
else:
tRaw = content.encode(encoding=firstEncoding, errors=errors)

if sys.platform == 'win32':
tRaw = tRaw.replace(b'\n', b'\r\n')
tRaw = tRaw.replace(b'\r\r\n', b'\r\n')

if self.bom:
# print('add bom for tRaw')
tRaw = self.bom + tRaw
outfile = open(filepath, 'wb')
# what difference does a bytearray make? (QH)
outfile.write(bytearray(tRaw))
outfile.close()
bombytes = bytearray(self.bom)
tRaw = bombytes + tRaw # now a bytesarray
with open(filepath, 'wb') as f:
# what difference does a bytearray make? (QH)
f.write(tRaw)

def fixCrLf(tRaw):
"""replace crlf into lf
"""
if b'\r\r\n' in tRaw:
print('readAnything, fixCrLf: fix crcrlf')
tRaw = tRaw.replace(b'\r\r\n', b'\r\n')
if b'\r' in tRaw:
# print 'readAnything, self.fixCrLf, remove cr'
tRaw = tRaw.replace(b'\r', b'')
return tRaw
# def fixCrLf(tRaw):
# """replace crlf into lf
# """
# if b'\r\n' in tRaw:
# print('readAnything, fixCrLf: fix crlf')
# tRaw = tRaw.replace(b'\r\n', b'\n')
#
# if b'\r\r\n' in tRaw:
# print('readAnything, fixCrLf: fix crcrlf')
# tRaw = tRaw.replace(b'\r\r\n', b'\r\n')
# if b'\r' in tRaw:
# # print 'readAnything, self.fixCrLf, remove cr'
# tRaw = tRaw.replace(b'\r', b'')
# return tRaw

def DecodeEncode(tRaw, filetype):
"""return the decoded string or False
Expand Down
Binary file added tests/mock_readwritefile/nsapps_aaron.ini
Binary file not shown.
Binary file added tests/mock_readwritefile/nsapps_short.ini
Binary file not shown.
42 changes: 41 additions & 1 deletion tests/test_readwritefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import configparser
import pytest
import filecmp
from natlinkcore.readwritefile import ReadWriteFile
from pathlib import Path

Expand Down Expand Up @@ -95,8 +96,41 @@ def test_other_encodings_write_file(tmp_path):
assert text == 'latin1 café'


def test_nsapps_utf16(tmp_path):
"""try the encodings from the nsapps ini file, version of Aaron
"""
testDir = tmp_path / testFolderName
testDir.mkdir()
# file_in = 'nsapps_aaron.ini'
file_in = 'nsapps_aaron.ini'
oldFile = mock_readwritefiledir/file_in
rwfile = ReadWriteFile(encodings=['utf-16le', 'utf-16be', 'utf-8']) # optional encoding
text = rwfile.readAnything(oldFile)
bom = rwfile.bom
encoding = rwfile.encoding
assert text[0] == ';'

assert bom == [255, 254]
assert encoding == 'utf-16le'


newFile1 = 'output1' + file_in
newPath1 = testDir/newFile1
rwfile.writeAnything(newPath1, text)

assert filecmp.cmp(oldFile, newPath1)

rwfile2 = ReadWriteFile(encodings=['utf-16le']) # optional encoding
text2 = rwfile2.readAnything(newPath1)
bom2 = rwfile2.bom
encoding2 = rwfile2.encoding

tRaw = rwfile.rawText
tRaw2 = rwfile2.rawText

assert text2[0] == ';'
assert bom2 == [255, 254]
assert encoding2 == 'utf-16le'

def test_latin1_cp1252_write_file(tmp_path):
testDir = tmp_path / testFolderName
Expand Down Expand Up @@ -129,7 +163,13 @@ def test_read_write_file(tmp_path):
#write to our temp folder
rwfile.writeAnything(Fout_path, text)
#make sure they are the same
assert open(F_path, 'rb').read() == open(Fout_path, 'rb').read()
org = open(F_path, 'rb').read()
new = open(Fout_path, 'rb').read()
for i, (o,n) in enumerate(zip(org, new)):
if o != n:
parto = org[i:i+2]
partn = new[i:i+2]
raise ValueError(f'old: "{F_path}", new: "{Fout_path}", differ at pos {i}: Old: "{o}", new: "{n}", partold (i:i+2): "{parto}", partnew: "{partn}"')

def test_acoustics_ini(tmp_path):
F='acoustic.ini'
Expand Down

0 comments on commit 983cf48

Please sign in to comment.