diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 73b81f0..1b0f9ad 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ jobs: os: [macOS, Windows, Ubuntu] steps: - uses: actions/checkout@v2 - - uses: royratcliffe/swi-prolog-pack-cover@failed-in-file + - uses: royratcliffe/swi-prolog-pack-cover@main env: GHAPI_PAT: ${{ secrets.GHAPI_PAT }} COVFAIL_GISTID: ${{ secrets.COVFAIL_GISTID }} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c1a7d1f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# Change Log + +Uses [Semantic Versioning](https://semver.org/). Always [keep a change +log](https://keepachangelog.com/en/1.0.0/). + +## [0.1.1] - 2022-03-13 +### Added +- More testing +- MIT license +### Fixed +- Floating-point from bytes + +## [0.1.0] - 2022-03-06 +### Added +- `msgpackc` module +- `memfilesio` module diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..440cb31 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,22 @@ +# MIT License + +Copyright (c) 2022, Roy Ratcliffe, Northumberland, United Kingdom + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + + > The above copyright notice and this permission notice shall be + > included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 1629005..e78e259 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,62 @@ ![cov](https://shields.io/endpoint?url=https://gist.githubusercontent.com/royratcliffe/ccccef2ac1329551794f2a466ee61014/raw/cov.json) ![fail](https://shields.io/endpoint?url=https://gist.githubusercontent.com/royratcliffe/ccccef2ac1329551794f2a466ee61014/raw/fail.json) -Primarily implemented in Prolog but with core highly-optimised C support functions for handling endian transformations via machine-code byte swapping, re-interpreting between ordered bytes (octets) and IEEE-754 floating-point numbers and integers of different bit-widths. +## Usage + +Install the Prolog pack in SWI-Prolog using: + +```prolog +pack_install(msgpackc). +``` + +Pack messages via Definite-Clause Grammar `msgpack//1` using compound terms. +Prolog grammars operate by "unifying" terms with codes, in this case only byte +codes rather than Unicodes. Unification works in both directions and even with +partial knowns. The grammar back-tracks through all possible solutions +non-deterministically until it finds one, else fails. + +The implementation supports all the MessagePack formats including timestamps and +any other extensions. The multi-file predicate hook `msgpack:type_ext_hook/3` +unifies arbitrary types and bytes with their terms. + +## Brief examples + +All the following succeed. + +```prolog +?- [library(msgpackc)]. +true. + +?- phrase(msgpack(float(1e9)), Bytes). +Bytes = [202, 78, 110, 107, 40]. + +?- phrase(msgpack(float(1e18)), Bytes). +Bytes = [203, 67, 171, 193, 109, 103, 78, 200, 0]. + +?- phrase(msgpack(float(Float)), [203, 67, 171, 193, 109, 103, 78, 200, 0]). +Float = 1.0e+18. + +?- phrase(msgpack(array([str("hello"), str("world")])), Bytes), phrase(msgpack(Term), Bytes). +Bytes = [146, 165, 104, 101, 108, 108, 111, 165, 119|...], +Term = array([str("hello"), str("world")]). +``` + +## Project goals + +Primarily implemented in Prolog but with core highly-optimised C support +functions for handling endian transformations via machine-code byte swapping, +re-interpreting between ordered bytes (octets) and IEEE-754 floating-point +numbers and integers of different bit-widths. The goal of this delicate balance between Prolog and C, between definite-clause grammar and low-level bit manipulation, aims to retain the flexibility and elegance of forward and backward unification between Message Pack and byte streams while gleaning the performance benefits of -a C-based foreign support library. Much of the pure C Message Pack -implementation concerns storage and memory management. To a large -extent, any Prolog implementation can ignore memory. Prolog was not -designed for deeply-embedded hardware targets with extreme memory +a C-based foreign support library. + +Much of the pure C Message Pack implementation concerns storage and memory +management. To a large extent, any Prolog implementation can ignore memory. +Prolog was not designed for deeply-embedded hardware targets with extreme memory limitations. ## Functors, fundamentals and primitives @@ -36,6 +82,7 @@ msgpack(nil) --> msgpack_nil, !. msgpack(bool(false)) --> msgpack_false, !. msgpack(bool(true)) --> msgpack_true, !. msgpack(int(Int)) --> msgpack_int(Int), !. +msgpack(float(Float)) --> msgpack_float(Float), !. msgpack(str(Str)) --> msgpack_str(Str), !. msgpack(bin(Bin)) --> msgpack_bin(Bin), !. msgpack(array(Array)) --> msgpack_array(msgpack, Array), !. @@ -51,6 +98,28 @@ terms. The fundamental layer via `msgpack_object//1` attempts to match messages to fundamental types. +## Integer space + +The `msgpack//1` implementation does the correct thing when attempting to render +integers at integer boundaries; it correctly fails. + +```prolog +A is 1 << 64, phrase(sequence(msgpack, [int(A)]), B) +``` + +Prolog utilises the GNU Multiple Precision Arithmetic library when values fall +outside the bit-width limits of the host machine. Term `A` exceeds 64 bits in +the example above; Prolog happily computes the correct value within integer +space but it requires 65 bits at least in order to store the value in an +ordinary flat machine word. Hence fails the phrase when attempting to find a +solution to `int(A)` since no available representation of a Message Pack integer +accomodates a 65-bit value. + +The same phrase for `float(A)` _will_ succeed however by rendering a Message +Pack 32-bit float. A float term accepts integers. They convert to equivalent +floating-point values; in that case matching IEEE-754 big-endian sequence `[95, +0, 0, 0]` as a Prolog byte-code list. + ## Useful links * [MessagePack specification](https://github.com/msgpack/msgpack/blob/master/spec.md) diff --git a/c/msgpackc.c b/c/msgpackc.c index a30c262..17d7381 100644 --- a/c/msgpackc.c +++ b/c/msgpackc.c @@ -50,31 +50,36 @@ a note for the direct dependency. * * Fails if it sees integer byte values outside the acceptable range, * zero through 255 inclusive. Failure always updates the given byte - * buffer with the value of the bytes successfully seen. + * buffer with the value of the bytes successfully seen. Automatically + * fails if negative because `PL_get_uint64()` fails for signed + * integers. */ int get_list_bytes(term_t Bytes0, term_t Bytes, size_t count, uint8_t *bytes) { term_t Tail = PL_copy_term_ref(Bytes0); term_t Byte = PL_new_term_ref(); while (count--) - { int value; + { uint64_t value; if (!PL_get_list(Tail, Byte, Tail) || - !PL_get_integer(Byte, &value) || value < 0 || value > UINT8_MAX) PL_fail; + !PL_get_uint64(Byte, &value) || value > UINT8_MAX) PL_fail; *bytes++ = value; } return PL_unify(Bytes, Tail); } /* - * Relies on the compiler to correctly expand an eight-bit byte to a - * signed integer _without_ performing sign extension. + * Relies on the compiler to correctly expand an eight-bit byte to an + * unsigned integer _without_ performing sign extension. Relies on the C + * compiler to zero-extend `unsigned char` to `unsigned long long` and + * no need to check for failure since all unsigned integers subsume all + * proper integer byte values. */ int unify_list_bytes(term_t Bytes0, term_t Bytes, size_t count, const uint8_t *bytes) { term_t Tail = PL_copy_term_ref(Bytes0); term_t Byte = PL_new_term_ref(); while (count--) - if (!PL_unify_list(Tail, Byte, Tail) || !PL_unify_integer(Byte, *bytes++)) PL_fail; + if (!PL_unify_list(Tail, Byte, Tail) || !PL_unify_uint64(Byte, *bytes++)) PL_fail; return PL_unify(Bytes, Tail); } diff --git a/prolog/msgpackc.pl b/prolog/msgpackc.pl index 32d12ab..286b6c1 100644 --- a/prolog/msgpackc.pl +++ b/prolog/msgpackc.pl @@ -103,11 +103,11 @@ msgpack_map(3, ?, ?, ?), msgpack_dict(3, ?, ?, ?). -:- multifile type_ext_hook/3. +:- multifile msgpack:type_ext_hook/3. -%! msgpack(?Object:compound)// is nondet. +%! msgpack(?Term:compound)// is nondet. % -% Where Object is a compound arity-1 functor, never a list term. The +% Where Term is a compound arity-1 functor, never a list term. The % functor carries the format choice. % % Packing arrays and maps necessarily recurses. Array elements are @@ -153,6 +153,10 @@ % % Prolog has no native type for raw binary objects in the vein of R's % raw vector. +% +% Notice that integer comes before float. This is important because +% Prolog integers can render as floats and vice versa provided that +% the integer is signed; it fails if unsigned. msgpack_object(nil) --> msgpack_nil, !. msgpack_object(false) --> msgpack_false, !. @@ -253,11 +257,11 @@ % double representation is redundant because the 32-bit representation % fully meets the resolution requirements of the float value. % -% The arity-1 version of the predicate duplicates the encoding -% assumptions. The structure aims to implement precision width -% selection but _without_ re-rendering. It first unifies a 64-bit -% float with eight bytes. Parsing from bytes to Float will fail if -% the bytes run out at the end of the byte stream. +% The arity-1 (+) mode version of the predicate duplicates the +% encoding assumptions. The structure aims to implement precision +% width selection but _without_ re-rendering. It first unifies a +% 64-bit float with eight bytes. Parsing from bytes to Float will fail +% if the bytes run out at the end of the byte stream. % % Predicates float32//1 and float64//1 unify with integer-valued % floats as well as floating-point values. This provides an @@ -269,7 +273,7 @@ }, !, [0xcb|Bytes]. -msgpack_float(Float) --> [0xca], float32(Float). +msgpack_float(Float) --> msgpack_float(_, Float), !. msgpack_float(32, Float) --> [0xca], float32(Float). msgpack_float(64, Float) --> [0xcb], float64(Float). @@ -410,7 +414,7 @@ { var(Str), ! }, - byte(Format), + uint8(Format), { fixstr_format_length(Format, Length), length(Bytes, Length) }, @@ -425,7 +429,7 @@ length(Bytes, Length), fixstr_format_length(Format, Length) }, - byte(Format), + [Format], sequence(byte, Bytes). fixstr_format_length(Format, Length), var(Format) => @@ -582,7 +586,7 @@ { var(Array), ! }, - byte(Format), + uint8(Format), { fixarray_format_length(Format, Length), length(Array, Length) }, @@ -718,13 +722,13 @@ msgpack_ext(Term) --> { ground(Term), !, - type_ext_hook(Type, Ext, Term) + msgpack:type_ext_hook(Type, Ext, Term) }, msgpack_ext(Type, Ext). msgpack_ext(Term) --> msgpack_ext(Type, Ext), !, - { type_ext_hook(Type, Ext, Term) + { msgpack:type_ext_hook(Type, Ext, Term) }. %! msgpack_ext(?Type, ?Ext)// is semidet. @@ -787,7 +791,7 @@ ext_width_format(16, 0xc8). ext_width_format(32, 0xc9). -%! type_ext_hook(Type:integer, Ext:list, Term) is semidet. +%! msgpack:type_ext_hook(Type:integer, Ext:list, Term) is semidet. % % Parses the extension byte block. % @@ -795,61 +799,65 @@ % also called Unix epoch time. Three alternative encodings exist: 4 % bytes, 8 bytes and 12 bytes. -type_ext_hook(-1, Ext, timestamp(Epoch)) :- +msgpack:type_ext_hook(-1, Ext, timestamp(Epoch)) :- once(phrase(timestamp(Epoch), Ext)). timestamp(Epoch) --> { var(Epoch) }, - int32(Epoch). + epoch(Epoch). timestamp(Epoch) --> - { var(Epoch) + { number(Epoch), + Epoch >= 0, + tv(Epoch, Seconds, NanoSeconds) }, + sec_nsec(Seconds, NanoSeconds). + +epoch(Epoch) --> + int32(Epoch). +epoch(Epoch) --> uint64(UInt64), { NanoSeconds is UInt64 >> 34, NanoSeconds < 1 000 000 000, Seconds is UInt64 /\ ((1 << 34) - 1), tv(Epoch, Seconds, NanoSeconds) }. -timestamp(Epoch) --> - { var(Epoch) - }, +epoch(Epoch) --> int32(NanoSeconds), int64(Seconds), { tv(Epoch, Seconds, NanoSeconds) }. -timestamp(Epoch) --> - { number(Epoch), - tv(Epoch, Seconds, 0) + +sec_nsec(Seconds, 0) --> + { Seconds < (1 << 32) }, int32(Seconds). -timestamp(Epoch) --> - { number(Epoch), - Epoch >= 0, - tv(Epoch, Seconds, NanoSeconds), - Seconds < (1 << 34), +sec_nsec(Seconds, NanoSeconds) --> + { Seconds < (1 << 34), UInt64 is (NanoSeconds << 34) \/ Seconds }, uint64(UInt64). -timestamp(Epoch) --> - { number(Epoch), - tv(Epoch, Seconds, NanoSeconds) - }, +sec_nsec(Seconds, NanoSeconds) --> int32(NanoSeconds), int64(Seconds). -%! tv(Epoch, Sec, NSec) is det. +%! tv(?Epoch:number, ?Sec:number, ?NSec:number) is det. +% +% Uses floor/1 when computing Sec and round/1 for NSec. Time only +% counts completed seconds and time runs up. Asking for the +% integer part of a float does *not* give an integer. It gives the +% float-point value that matches the integer. % -% Uses floor/1 when computing NSec. Time only counts completed -% nanoseconds and time runs up. Asking for the integer part of a float -% does *not* give an integer. +% The arguments have number type by design. The predicate supports +% negatives; Epoch of -1.1 for example gives -1 seconds, -100,000,000 +% nanoseconds. tv(Epoch, Sec, NSec), var(Epoch) => abs(NSec) < 1 000 000 000, Epoch is Sec + (NSec / 1e9). tv(Epoch, Sec, NSec), number(Epoch) => Sec is floor(float_integer_part(Epoch)), - NSec is floor(1e9 * float_fractional_part(Epoch)). + NSec is round(1e9 * float_fractional_part(Epoch)). %! fix_format_length(Fix, Format, Length) is semidet. % diff --git a/prolog/msgpackc.plt b/prolog/msgpackc.plt index 7b0ebbf..61ab6c1 100644 --- a/prolog/msgpackc.plt +++ b/prolog/msgpackc.plt @@ -1,5 +1,7 @@ :- begin_tests(msgpackc). :- use_module(msgpackc). +:- use_module(library(plunit)). +:- use_module(library(dcg/high_order)). test(msgpack, true(A == [0b1001 0001, 123])) :- phrase(msgpackc:msgpack(array([int(123)])), A). @@ -8,10 +10,15 @@ test(msgpack, true(A == [0b1001 0001, 0b1001 0001, 123])) :- test(msgpack, true(B == map([int(1)-str("x")]))) :- phrase(msgpack(map([int(1)-str("x")])), A), phrase(msgpack(B), A). +test(msgpack, true(Float == 1.0e+18)) :- + phrase(msgpack(float(Float)), [203, 67, 171, 193, 109, 103, 78, 200, 0]). test(msgpack, true(B == map([str("a")-int(1)]))) :- phrase(msgpack_object(_{a:1}), A), phrase(msgpack(B), A). +test(sequence_msgpack, true(A == [192, 192, 192])) :- + phrase(sequence(msgpack, [nil, nil, nil]), A). + test(msgpack_object, true(A == [0x80])) :- phrase(msgpack_object(_{}), A). test(msgpack_object, true(A == B{})) :- @@ -101,4 +108,17 @@ test(msgpack_bin, true(A == [])) :- test(msgpack_bin, true(A == [1, 2, 3])) :- phrase(msgpack_bin(8, A), [0xc4, 3, 1, 2, 3]). +test(timestamp, true(A == [214, 255, 0, 0, 0, 0])) :- + phrase(sequence(msgpack, [timestamp(0)]), A). + +endian(Endian) :- term_hash(aap, Hash), endian(Hash, Endian). + +endian(9270206, little). +endian(16674642, big). + +test(le, [ condition(endian(little)), + true(A == [65, 66, 67, 68, 69, 70, 71, 72]) + ]) :- + phrase(msgpackc:float64(2.39373654120722785592079162598e6), A). + :- end_tests(msgpackc).