From c4abedce45588ba0e51641616eabf81321b03795 Mon Sep 17 00:00:00 2001 From: Keith Kelly Date: Thu, 26 Sep 2024 11:02:20 -0400 Subject: [PATCH] feat: Add parquet file detection (#578) Adds parquet file detection. See [docs](https://parquet.apache.org/docs/file-format/) for specification Co-authored-by: Keith Kelly --- internal/magic/binary.go | 2 ++ supported_mimes.md | 3 ++- testdata/parquet.parquet | Bin 0 -> 1681 bytes tree.go | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 testdata/parquet.parquet diff --git a/internal/magic/binary.go b/internal/magic/binary.go index f1e94498..96040e6c 100644 --- a/internal/magic/binary.go +++ b/internal/magic/binary.go @@ -21,6 +21,8 @@ var ( SWF = prefix([]byte("CWS"), []byte("FWS"), []byte("ZWS")) // Torrent has bencoded text in the beginning. Torrent = prefix([]byte("d8:announce")) + // PAR1 matches a parquet file. + Par1 = prefix([]byte{0x50, 0x41, 0x52, 0x31}) ) // Java bytecode and Mach-O binaries share the same magic number. diff --git a/supported_mimes.md b/supported_mimes.md index a0799605..e6a65f6c 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 175 Supported MIME types +## 176 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -143,6 +143,7 @@ Extension | MIME type | Aliases **.glb** | model/gltf-binary | - **.cab** | application/x-installshield | - **.jxr** | image/jxr | image/vnd.ms-photo +**.parquet** | application/vnd.apache.parquet | application/x-parquet **.txt** | text/plain | - **.html** | text/html | - **.svg** | image/svg+xml | - diff --git a/testdata/parquet.parquet b/testdata/parquet.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7681f59006298bc2da52b22058ee2ef6f28768ed GIT binary patch literal 1681 zcmcIl-EQJW6dnk*(p4{_YK@G<1+unLF9-z#*=5ze80Z3Kfoz;z+qkMC{{;-T@%kr* zM0uk=K>I3HFZ&=pGbSjka?^{BWaj+&JKs5T#(cz5gDtRiwl-!N7JZKpSID7nb3f-% zwvKi?i`bVeSB0Pxc$hMG{OIA1bGw)#`tcbnIA6RXa7-?L^a`E+lPj}@QX4TRC9V+L zQOxFxO!2Q=@$cNJbb9<>kV{qIj@UecpA*;+0_18NaZF?`Y)>bi!Vmc@4JtCFa;g0P z<|lP9r^issON2^QjtO+%(j)HoqkG=*EqilquHD4*BmQmW*L!{iHjwyG_vbcWt?<6? z*CU;}hR8 z*H*?L@q+E%u{M^r853rn9qX1J>kxj=+dhOl{+zt~`BVSw4gc_j%pnV&oH0Q-;IX%U z#l-RB^A_#I0gt`;2@TSuENf`%@j9f;T*nCYa7%~vj9a)n$9;_FxHqS^Zuh{S z665>5|KbwdM2AIDs9(f5#o&OQfb`xF9|^=CQrNn$MbAyNf!tDs>8&x8W5b^~7f#o( zx1&&3+ADFno{RoyVR)7+I_X^W+6zPZOkxsC*+k!ly(64EA2`oxq)LNwdGFTR=X5e^g532$xEw8{jsJ>3&5z)u7K|_ax|sA zGz2LyJd@(dvNRZfnmadNaW}UR-;bexkGvrFK)&1Idb%P{V=Qt1A4P&tBs8^&{)2*iT>qb)~=L6g}!3@sM z^jEYNzyx`8Yi()mf~Jn{bfuHfGhV)Lj+Z!@^hTXSj+$%w)4&i~6l3DFw&Uf*xydGV zKt{>j)dM0OEf`uV!47jb!`&25hzW-CalX5FK`eAAvzMrv%`fd^rqlU_i!t1V5^FaD pdG`)+l|T=_BzAme2EiG`UrnU{{noM!w3KX literal 0 HcmV?d00001 diff --git a/tree.go b/tree.go index 6bf13619..d5a86670 100644 --- a/tree.go +++ b/tree.go @@ -23,7 +23,7 @@ var root = newMIME("application/octet-stream", "", avi, flv, mkv, asf, aac, voc, m3u, rmvb, gzip, class, swf, crx, ttf, woff, woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, lnk, macho, qcp, icns, hdr, mrc, mdb, accdb, zstd, cab, - rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr, + rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr, parquet, // Keep text last because it is the slowest check. text, ) @@ -258,4 +258,6 @@ var ( xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf) glb = newMIME("model/gltf-binary", ".glb", magic.Glb) jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo") + parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1). + alias("application/x-parquet") )