-
Notifications
You must be signed in to change notification settings - Fork 166
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make mso detection work similar to what file/file does
https://github.com/file/file/blob/7c62d696b06e53fc5be015c41a57513278ac6c54/magic/Magdir/msooxml The algorithms is not 100% percent reliable. For example, a zero compression zip containing a docx will still sometimes be detected as docx instead of zip (it depends on how many files and the order of files in the zip) Second thing in this PR is removing some test data fixtures. From now, I'll try as much as possible to write regular unit tests without relying on test file fixtures. #575 (comment) related #550 #575 closes #400
- Loading branch information
1 parent
c78cb11
commit 89677d6
Showing
10 changed files
with
217 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
package magic | ||
|
||
import ( | ||
"archive/zip" | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"testing" | ||
) | ||
|
||
func createZip(files []string) (*bytes.Buffer, error) { | ||
buf := bytes.NewBuffer(nil) | ||
w := zip.NewWriter(buf) | ||
|
||
for _, f := range files { | ||
_, err := w.Create(f) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
||
return buf, w.Close() | ||
} | ||
|
||
func createZipUncompressed(content *bytes.Buffer) (*bytes.Buffer, error) { | ||
buf := bytes.NewBuffer(nil) | ||
w := zip.NewWriter(buf) | ||
|
||
for i := 0; i < 5; i++ { | ||
file, err := w.CreateHeader(&zip.FileHeader{ | ||
Name: fmt.Sprintf("file%d", i), | ||
Method: zip.Store, | ||
}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if _, err := io.Copy(file, content); err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
||
return buf, w.Close() | ||
} | ||
|
||
func TestZeroZip(t *testing.T) { | ||
tcases := []struct { | ||
name string | ||
files []string | ||
xlsx bool | ||
docx bool | ||
pptx bool | ||
jar bool | ||
}{{ | ||
name: "empty zip", | ||
files: nil, | ||
}, { | ||
name: "no [Content_Types].xml", | ||
files: []string{"foo", "word/"}, | ||
}, { | ||
name: "no _rels/", | ||
files: []string{"foo", "word/"}, | ||
}, { | ||
name: "no docProps", | ||
files: []string{"foo", "word/"}, | ||
}, { | ||
name: "no customXml", | ||
files: []string{"foo", "word/"}, | ||
}, { | ||
name: "customXml, but no word/", | ||
files: []string{"customXml"}, | ||
}, { | ||
name: "customXml, and other files, but no word/", | ||
files: []string{"customXml", "1", "2", "3"}, | ||
}, { | ||
name: "customXml, and other files, but word/ is the 7th file", // we only check until 6th file | ||
files: []string{"customXml", "1", "2", "3", "4", "5", "word/"}, | ||
}, { | ||
name: "customXml, word/ xl/ pptx/ after 5 files", | ||
files: []string{"1", "2", "3", "4", "5", "customXml", "word/", "xl/", "ppt/"}, | ||
}, { | ||
name: "customXml, word/", | ||
files: []string{"customXml", "word/"}, | ||
docx: true, | ||
}, { | ||
name: "customXml, word/with_suffix", | ||
files: []string{"customXml", "word/with_suffix"}, | ||
docx: true, | ||
}, { | ||
name: "customXml, word/", | ||
files: []string{"customXml", "word/media"}, | ||
docx: true, | ||
}, { | ||
name: "customXml, xl/", | ||
files: []string{"customXml", "xl/media"}, | ||
xlsx: true, | ||
}, { | ||
name: "customXml, ppt/", | ||
files: []string{"customXml", "ppt/media"}, | ||
pptx: true, | ||
}, { | ||
name: "META-INF", | ||
files: []string{"META-INF/MANIFEST.MF"}, | ||
jar: true, | ||
}, { | ||
name: "1 2 3 4 5 6 META-INF", // we only check first 6 files | ||
files: []string{"1", "2", "3", "4", "5", "6", "META-INF/MANIFEST.MF"}, | ||
jar: false, | ||
}} | ||
|
||
for _, tc := range tcases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
buf, err := createZip(tc.files) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
docx := Docx(buf.Bytes(), 0) | ||
xlsx := Xlsx(buf.Bytes(), 0) | ||
pptx := Pptx(buf.Bytes(), 0) | ||
jar := Jar(buf.Bytes(), 0) | ||
|
||
if tc.docx != docx || tc.xlsx != xlsx || tc.pptx != pptx || tc.jar != jar { | ||
t.Errorf(`expected %t %t %t %t; | ||
got %t %t %t %t`, tc.docx, tc.xlsx, tc.pptx, tc.jar, docx, xlsx, pptx, jar) | ||
} | ||
|
||
// #400 - xlsx, docx, pptx put as is (compression lvl 0) inside a zip | ||
// It should continue to get detected as regular zip, not xlsx or docx or pptx. | ||
uncompressedZip, err := createZipUncompressed(buf) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
docx = Docx(uncompressedZip.Bytes(), 0) | ||
xlsx = Xlsx(uncompressedZip.Bytes(), 0) | ||
pptx = Pptx(uncompressedZip.Bytes(), 0) | ||
jar = Jar(uncompressedZip.Bytes(), 0) | ||
|
||
if docx || xlsx || pptx || jar { | ||
t.Errorf(`uncompressedZip: expected false, false, false; | ||
got %t %t %t %t`, docx, xlsx, pptx, jar) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.