Skip to content

Commit

Permalink
Respect character encoding of all strings sent to the server.
Browse files Browse the repository at this point in the history
Previously all strings sent to the server were sent in their internal
binary representation, without respecting the character encoding
of strings. Now the encoding of all strings is compared with the
current connection encoding and converted if they are different.

Since coders are independent from any database connection, this
adds a second parameter to PG::Coder#encode, that allows to define
the destination encoding, which previously was always ASCII_8BIT.
This encoding should be set to the connection encoding, in practice.

This also adds a lot of tests for encoding and decoding data.

This implements issue ged#231 : https://bitbucket.org/ged/ruby-pg/issues/231
  • Loading branch information
larskanis committed Dec 25, 2015
1 parent 8feae6c commit e5cb1df
Show file tree
Hide file tree
Showing 10 changed files with 382 additions and 133 deletions.
6 changes: 3 additions & 3 deletions ext/pg.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ typedef struct {
} t_pg_result;


typedef int (* t_pg_coder_enc_func)(t_pg_coder *, VALUE, char *, VALUE *);
typedef int (* t_pg_coder_enc_func)(t_pg_coder *, VALUE, char *, VALUE *, int);
typedef VALUE (* t_pg_coder_dec_func)(t_pg_coder *, char *, int, int, int, int);
typedef VALUE (* t_pg_fit_to_result)(VALUE, VALUE);
typedef VALUE (* t_pg_fit_to_query)(VALUE, VALUE);
Expand Down Expand Up @@ -324,8 +324,8 @@ void init_pg_binary_decoder _(( void ));
VALUE lookup_error_class _(( const char * ));
VALUE pg_bin_dec_bytea _(( t_pg_coder*, char *, int, int, int, int ));
VALUE pg_text_dec_string _(( t_pg_coder*, char *, int, int, int, int ));
int pg_coder_enc_to_s _(( t_pg_coder*, VALUE, char *, VALUE *));
int pg_text_enc_identifier _(( t_pg_coder*, VALUE, char *, VALUE *));
int pg_coder_enc_to_s _(( t_pg_coder*, VALUE, char *, VALUE *, int));
int pg_text_enc_identifier _(( t_pg_coder*, VALUE, char *, VALUE *, int));
t_pg_coder_enc_func pg_coder_enc_func _(( t_pg_coder* ));
t_pg_coder_dec_func pg_coder_dec_func _(( t_pg_coder*, int ));
void pg_define_coder _(( const char *, void *, VALUE, VALUE ));
Expand Down
14 changes: 7 additions & 7 deletions ext/pg_binary_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ VALUE rb_mPG_BinaryEncoder;
*
*/
static int
pg_bin_enc_boolean(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_bin_enc_boolean(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
char mybool;
switch(value){
Expand All @@ -44,7 +44,7 @@ pg_bin_enc_boolean(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate
*
*/
static int
pg_bin_enc_int2(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_bin_enc_int2(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
if(out){
write_nbo16(NUM2INT(*intermediate), out);
Expand All @@ -63,7 +63,7 @@ pg_bin_enc_int2(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
*
*/
static int
pg_bin_enc_int4(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_bin_enc_int4(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
if(out){
write_nbo32(NUM2LONG(*intermediate), out);
Expand All @@ -82,7 +82,7 @@ pg_bin_enc_int4(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
*
*/
static int
pg_bin_enc_int8(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_bin_enc_int8(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
if(out){
write_nbo64(NUM2LL(*intermediate), out);
Expand All @@ -100,7 +100,7 @@ pg_bin_enc_int8(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
*
*/
static int
pg_bin_enc_from_base64(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_bin_enc_from_base64(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
int strlen;
VALUE subint;
Expand All @@ -109,13 +109,13 @@ pg_bin_enc_from_base64(t_pg_coder *conv, VALUE value, char *out, VALUE *intermed

if(out){
/* Second encoder pass, if required */
strlen = enc_func(this->elem, value, out, intermediate);
strlen = enc_func(this->elem, value, out, intermediate, enc_idx);
strlen = base64_decode( out, out, strlen );

return strlen;
} else {
/* First encoder pass */
strlen = enc_func(this->elem, value, NULL, &subint);
strlen = enc_func(this->elem, value, NULL, &subint, enc_idx);

if( strlen == -1 ){
/* Encoded string is returned in subint */
Expand Down
39 changes: 30 additions & 9 deletions ext/pg_coder.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,21 +114,32 @@ pg_composite_decoder_allocate( VALUE klass )
*
*/
static VALUE
pg_coder_encode(VALUE self, VALUE value)
pg_coder_encode(int argc, VALUE *argv, VALUE self)
{
VALUE res;
VALUE intermediate;
VALUE value;
int len, len2;
int enc_idx;
t_pg_coder *this = DATA_PTR(self);

if(argc < 1 || argc > 2){
rb_raise(rb_eArgError, "wrong number of arguments (%i for 1..2)", argc);
}else if(argc == 1){
enc_idx = rb_ascii8bit_encindex();
}else{
enc_idx = rb_to_encoding_index(argv[1]);
}
value = argv[0];

if( NIL_P(value) )
return Qnil;

if( !this->enc_func ){
rb_raise(rb_eRuntimeError, "no encoder function defined");
}

len = this->enc_func( this, value, NULL, &intermediate );
len = this->enc_func( this, value, NULL, &intermediate, enc_idx );

if( len == -1 ){
/* The intermediate value is a String that can be used directly. */
Expand All @@ -137,7 +148,8 @@ pg_coder_encode(VALUE self, VALUE value)
}

res = rb_str_new(NULL, len);
len2 = this->enc_func( this, value, RSTRING_PTR(res), &intermediate);
PG_ENCODING_SET_NOCHECK(res, enc_idx);
len2 = this->enc_func( this, value, RSTRING_PTR(res), &intermediate, enc_idx );
if( len < len2 ){
rb_bug("%s: result length of first encoder run (%i) is less than second run (%i)",
rb_obj_classname( self ), len, len2 );
Expand Down Expand Up @@ -165,8 +177,8 @@ static VALUE
pg_coder_decode(int argc, VALUE *argv, VALUE self)
{
char *val;
VALUE tuple = -1;
VALUE field = -1;
int tuple = -1;
int field = -1;
VALUE res;
t_pg_coder *this = DATA_PTR(self);

Expand Down Expand Up @@ -359,10 +371,19 @@ pg_define_coder( const char *name, void *func, VALUE base_klass, VALUE nsp )


static int
pg_text_enc_in_ruby(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
pg_text_enc_in_ruby(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
*intermediate = rb_funcall( conv->coder_obj, s_id_encode, 1, value );
StringValue( *intermediate );
int arity = rb_obj_method_arity(conv->coder_obj, s_id_encode);
if( arity == 1 ){
VALUE out_str = rb_funcall( conv->coder_obj, s_id_encode, 1, value );
StringValue( out_str );
*intermediate = rb_str_export_to_enc(out_str, rb_enc_from_index(enc_idx));
}else{
VALUE enc = rb_enc_from_encoding(rb_enc_from_index(enc_idx));
VALUE out_str = rb_funcall( conv->coder_obj, s_id_encode, 2, value, enc );
StringValue( out_str );
*intermediate = out_str;
}
return -1;
}

Expand Down Expand Up @@ -442,7 +463,7 @@ init_pg_coder()
* This accessor is only used in PG::Coder#inspect .
*/
rb_define_attr( rb_cPG_Coder, "name", 1, 1 );
rb_define_method( rb_cPG_Coder, "encode", pg_coder_encode, 1 );
rb_define_method( rb_cPG_Coder, "encode", pg_coder_encode, -1 );
rb_define_method( rb_cPG_Coder, "decode", pg_coder_decode, -1 );

/* Document-class: PG::SimpleCoder < PG::Coder */
Expand Down
Loading

0 comments on commit e5cb1df

Please sign in to comment.