forked from jonabbey/eudora2unix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEudoraTOC.py
executable file
·341 lines (295 loc) · 9.8 KB
/
EudoraTOC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/usr/bin/env python
"""
For interpreting Eudora mailbox '.toc' files.
Structure elements are read as characters because integers are all
big-endian (like the Mac), and I want this to run on little-ending
machines (like the IBM PC).
This code could be much improved by making use of Python's built-
in endian struct declarations
Note that some Mac versions keep the toc info in the resource forks
of the mailbox files. This info has to be put in a toc file before
this script can work. A utilitiy for doing this is available at
<ftp://ftp.eudora.com/eudora/eudoralight/mac/extras/utils/TOCConvert.sea.hqx>
For Windows Eudora TOC file format, see
http://wso.williams.edu/~eudora/kens/toc.html
Note the Mac and Windows formats are quite different
See RFC 2076 for a discussion of Status header
"""
__author__ = "Stevan White <[email protected]>"
__date__ = "2003-03-06"
__version__ = "1.3"
import sys
import re
import string
from struct import *
if sys.hexversion < 33686000:
sys.stderr.write( "Aborted: Python version must be at least 2.2.1" \
+ os.linesep )
sys.exit( 1 )
# Eudora Status field (Mac 3.1 version): 1 = '*', 2 = ' ', 4 = 'D', 8 = 'F'
# IN case of 8, interpretation is 'F' when incoming, 'S' when outgoing
# displayed in first column of the mailbox window
# '*' means unread or unsent, is marked with big dot.
# ' ' means read or sent, is otherwise blank
# 'D' means redirected
# 'F' means forwarded
# others, I don't know how they're encoded
# 'Q' means queued
# 'S' means sent
# '-' means never sent
# There are regions of the TOC structure that seem to change every time
# the file is written. Some of these are in areas that are in the space
# for a null-terminated string. This suggests that the structure is read
# piecewise into uninitialized memory. An obstacle for reverse-engineering.
mac_folder = string.join( (
'2s', # version, always 0x0001
'42x', #
'14x', #
'B', # name len
'45s', # name ('mailbox names must be 27 characters or less')
'174x', # (to make total 278)
), '' )
win_folder = string.join( (
'2s', # version, 0x3000 for Pro 5, 0x2a00 for Lite 1.x
'6x', #
'32s', # name
'2x', # type, int 0 - In, 1 - Out, 2 - Trash, 3 - User
'2x', #
'2x', # int class 0 - User, 1 - System
'8x', # window size
'2x', # col_S_width
'2x', # col_P_width
'2x', # col_A_width
'2x', # col_Label_width (Pro only)
'2x', # col_Who_width
'2x', # col_Date_width
'2x', # col_K_width
'2x', # col_V_width
'2x', # (all 0)
'30x', # (all 0)
'2x', # n_mess
), '' )
# Other info that must be stored here:
# Signature on/off, Word Wrap on/off, Keep Copy on/off,
# Text Attachment in Body on/off
# Quoted Printable on/off
#
# Mime/Binhex, Attachment present
mac_entry = string.join( (
'4s', # offset to message in corresponding mailbox
'4s', # length of message in corresponding mailbox
'4x', # offset to body of message
'B', # status ... & 1 = U, & 2 = R, & 4 = & 8 = S
'x', # Date length
'32s', # Date (0-terminated?)
'6x', #
'x', # misc: "bla" or "full headers" if value is 8
'x', #
'8x', # window size
'B', # priority unset = 0; Hi MedHi Norm MedLo Lo = 40 80 120 160 200
'x', #
'14x', # (shorts?)
'B', # To length
'46s', # To - truncated to 46 bytes
'16x', #
'x', #
'B', # Subject length
'58s', # Subject - truncated to 58 bytes
'18x', #
'x', #
), '' )
win_entry = string.join( (
'4s', # offset to message in corresponding mailbox
'4s', # length of message in corresponding mailbox
'xxxx', # GMT
'B', # status (only the first bytes seem to be status)
'x', # (disagree with interp on above web page)
'2x', # switches
'B', # priority
'x', #
'32s', # Date (0-terminated?)
'64s', # To
'64s', # Subject
'8x', # window size
'2x', #
'4x', #
'26x', # (all 0)
), '' )
# Big-Endian integer conversions
def toIntBig( c ):
i = unpack( '4B', c )
return i[0] << 24 | i[1] << 16 | i[2] << 8 | i[3]
def toShortBig( c ):
i = unpack( 'BB', c )
return i[0] << 8 | i[1]
def toIntLittle( c ):
i = unpack( '4B', c )
return i[3] << 24 | i[2] << 16 | i[1] << 8 | i[0]
def toShortLittle( c ):
i = unpack( 'BB', c )
return i[1] << 8 | i[0]
def unpackstr( str, i = 0 ):
""" got from comp.lang.python Michael P. Reilly 1999/05/14 """
if not str:
return None
for c in str:
if c == '\000':
break
i = i + 1
return str[:i]
def printMacFolder( out, folder ):
( version, nlen, name ) = unpack( mac_folder, folder )
print >> out, "Eudora Mac TOC version 0x%x" % ( toShortBig( version ), )
print >> out, "Folder: %.*s" % ( nlen, name, )
print >> out, ""
def printWinFolder( out, folder ):
( version, name ) = unpack( win_folder, folder )
print >> out, "Eudora Windows TOC version 0x%x" % ( toShortLittle( version ), )
print >> out, "Folder: " + unpackstr( name )
print >> out, ""
def printMacEntry( out, entry ):
( offset, length, status, date, priority, to_len, to,
subject_len, subject ) = unpack( mac_entry, entry )
print >> out, "offset: %d" % ( toIntBig( offset ), )
print >> out, "length: %d" % ( toIntBig( length ), )
print >> out, "status: ",
if status == 0xa: # unsent
pass
if status == 0x9: # sent
print >> out, "OR",
if status == 0x1: # popped, unread
print >> out, "O",
if status == 0x2: # popped, read
print >> out, "OR",
if status == 0x3: # popped, replied
print >> out, "OR",
if status == 0x4: # popped, redirected
print >> out, "OR",
if status == 0x8: # popped, forwarded
print >> out, "OR",
print >> out
print >> out, "valueofstatus: 0x%x" % ( status, )
# can't decide on this. in some mailboxes, entry.date_length seems to
# contain a necessary truncation of a junk date string, in others,
# it is 0
# printf( "Date: %.*s", entry.date_length, entry.Date );
print >> out, "Date: %s" % ( unpackstr( date ), )
print >> out, "To: %.*s" % ( to_len, to, )
print >> out, "Subject: %.*s" % ( subject_len, subject, )
print >> out, "priority: %d" % ( priority / 40, )
print >> out
def printWinEntry( out, entry ):
( offset, length, status, priority, date, to, subject ) = unpack( win_entry, entry )
print >> out, "offset: %d" % ( toIntLittle( offset ), )
print >> out, "length: %d" % ( toIntLittle( length ), )
print >> out, "status: ",
if status == 0x1: # popped, unread
print >> out, "O",
if status == 0x2: # popped, replied
print >> out, "OR",
if status == 0x3: # popped, forwarded
print >> out, "OR",
if status == 0x4: # popped, redirected
print >> out, "OR",
if status == 0x5: # toc rebuilt
print >> out, "",
if status == 0x6: # saved
print >> out, "",
if status == 0x7: # queued
print >> out, "",
if status == 0x8: # sent
print >> out, "",
if status == 0x9: # unsent
print >> out, "",
if status == 0xa: # time queued
pass
print >> out
print >> out, "valueofstatus: 0x%x" %( status, )
# can't decide on this. in some mailboxes, entry.date_length seems to
# contain a necessary truncation of a junk date string, in others, it is 0
# printf( "Date: %.*s", entry.date_length, entry.Date );
print >> out, "Date: %s" % ( unpackstr( date ), )
print >> out, "To: %s" % ( unpackstr( to ), )
print >> out, "Subject: %s" % ( unpackstr( subject ), )
print >> out, "priority: %d" % ( priority, )
print >> out
class TOCError(Exception):
""" Problem occurred concerning a Eudora TOC file. """
def __init__(self, value):
self.args = value
def __str__(self):
return `self.args`
def args(self):
return self.args
def parse( infile, isMac, outfile ):
"""
Parse a Eudora '.toc' file, and pull out important info into a text
file '.toc.txt'
Determines whether the toc file is from Mac or Windows based on the
version found int he first two bytes of the file.
This version number seems to be made to look good in hex, but not
doesn't really mean much as an integer. For example,
Mac Eudora Lite 3.x has version 0001, Windows Eudora Pro 5.o has 0030
As a guess, if the upper byte is nonzero, it's Windows, otherwise,
it's Mac.
From prior list:
Eudora toc file versions I've seen:
MAC_EUDORA_LITE_3 = 0x0001
MAC_EUDORA_LITE_131 = 0x0000 # (a Poor choice!)
WIN_EUDORA_LITE_1 = 0x2a00
WIN_EUDORA_5 = 0x0300
Windows 6.1.0.6 used 0x0000 also. It's possible that no file format identifier shows Mac vs Windows?
Note that Windows Eudora 6.1.0.6 invalidates all this theory, so we rely on the passed-in isMac variable instead.
"""
file = None
returnVal = 0
out = sys.stdout
print >> out, ('Parsing %s ...' % infile)
try:
file = open( infile, "rb" )
except IOError, ( errno, strerror ):
raise TOCError( "EudoraTOC: couldn't open file " + infile )
if outfile:
print 'Writing %s' % outfile
try:
out = open( outfile, "w" )
except IOError, ( errno, strerror ):
raise TOCError( "EudoraTOC: couldn't open file "
+ outfile )
if isMac:
foldersize = calcsize( mac_folder )
entrysize = calcsize( mac_entry )
else:
foldersize = calcsize( win_folder );
entrysize = calcsize( win_entry );
print >> out, "Expect sizes: folder %d %x, entry %d %x" % ( foldersize, foldersize, entrysize, entrysize )
file.read(18)
'''
folder = file.read( foldersize )
if len( folder ) == 0:
raise TOCError( "EudoraTOC: couldn't read header" )
if isMac:
printMacFolder( out, folder )
else:
printWinFolder( out, folder )
'''
while True:
entry = file.read( entrysize )
if len( entry ) <= 0:
break
try:
if isMac:
printMacEntry( out, entry )
else:
printWinEntry( out, entry )
except:
sys.stderr.write('Error parsing %s - corrupt?\n' % infile)
if file:
file.close()
return returnVal
if __name__ == '__main__': # i.e. if script called directly
try:
sys.exit( parse( sys.argv[1], int(sys.argv[2]), sys.argv[3] ) )
except TOCError, errstr:
print errstr
sys.exit( 1 )