-
Notifications
You must be signed in to change notification settings - Fork 3.6k
/
Copy pathtype.h
2360 lines (1868 loc) · 83.7 KB
/
type.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <atomic>
#include <climits>
#include <cstdint>
#include <iosfwd>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "arrow/result.h"
#include "arrow/type_fwd.h" // IWYU pragma: export
#include "arrow/util/checked_cast.h"
#include "arrow/util/endian.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h" // IWYU pragma: keep
namespace arrow {
namespace detail {
/// \defgroup numeric-datatypes Datatypes for numeric data
/// @{
/// @}
/// \defgroup binary-datatypes Datatypes for binary/string data
/// @{
/// @}
/// \defgroup temporal-datatypes Datatypes for temporal data
/// @{
/// @}
/// \defgroup nested-datatypes Datatypes for nested data
/// @{
/// @}
class ARROW_EXPORT Fingerprintable {
public:
virtual ~Fingerprintable();
const std::string& fingerprint() const {
auto p = fingerprint_.load();
if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
return *p;
}
return LoadFingerprintSlow();
}
const std::string& metadata_fingerprint() const {
auto p = metadata_fingerprint_.load();
if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
return *p;
}
return LoadMetadataFingerprintSlow();
}
protected:
const std::string& LoadFingerprintSlow() const;
const std::string& LoadMetadataFingerprintSlow() const;
virtual std::string ComputeFingerprint() const = 0;
virtual std::string ComputeMetadataFingerprint() const = 0;
mutable std::atomic<std::string*> fingerprint_{NULLPTR};
mutable std::atomic<std::string*> metadata_fingerprint_{NULLPTR};
};
} // namespace detail
/// EXPERIMENTAL: Layout specification for a data type
struct ARROW_EXPORT DataTypeLayout {
enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };
/// Layout specification for a single data type buffer
struct BufferSpec {
BufferKind kind;
int64_t byte_width; // For FIXED_WIDTH
bool operator==(const BufferSpec& other) const {
return kind == other.kind &&
(kind != FIXED_WIDTH || byte_width == other.byte_width);
}
bool operator!=(const BufferSpec& other) const { return !(*this == other); }
};
static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }
/// A vector of buffer layout specifications, one for each expected buffer
std::vector<BufferSpec> buffers;
/// Whether this type expects an associated dictionary array.
bool has_dictionary = false;
explicit DataTypeLayout(std::vector<BufferSpec> v) : buffers(std::move(v)) {}
};
/// \brief Base class for all data types
///
/// Data types in this library are all *logical*. They can be expressed as
/// either a primitive physical type (bytes or bits of some fixed size), a
/// nested type consisting of other data types, or another data type (e.g. a
/// timestamp encoded as an int64).
///
/// Simple datatypes may be entirely described by their Type::type id, but
/// complex datatypes are usually parametric.
class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
public detail::Fingerprintable,
public util::EqualityComparable<DataType> {
public:
explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
~DataType() override;
/// \brief Return whether the types are equal
///
/// Types that are logically convertible from one to another (e.g. List<UInt8>
/// and Binary) are NOT equal.
bool Equals(const DataType& other, bool check_metadata = false) const;
/// \brief Return whether the types are equal
bool Equals(const std::shared_ptr<DataType>& other, bool check_metadata = false) const;
/// \brief Return the child field at index i.
const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
/// \brief Return the children fields associated with this type.
const FieldVector& fields() const { return children_; }
/// \brief Return the number of children fields associated with this type.
int num_fields() const { return static_cast<int>(children_.size()); }
/// \brief Apply the TypeVisitor::Visit() method specialized to the data type
Status Accept(TypeVisitor* visitor) const;
/// \brief A string representation of the type, including any children
virtual std::string ToString() const = 0;
/// \brief Return hash value (excluding metadata in child fields)
size_t Hash() const;
/// \brief A string name of the type, omitting any child fields
///
/// \since 0.7.0
virtual std::string name() const = 0;
/// \brief Return the data type layout. Children are not included.
///
/// \note Experimental API
virtual DataTypeLayout layout() const = 0;
/// \brief Return the type category
Type::type id() const { return id_; }
/// \brief Return the type category of the storage type
virtual Type::type storage_id() const { return id_; }
/// \brief Returns the type's fixed byte width, if any. Returns -1
/// for non-fixed-width types, and should only be used for
/// subclasses of FixedWidthType
virtual int32_t byte_width() const {
int32_t num_bits = this->bit_width();
return num_bits > 0 ? num_bits / 8 : -1;
}
/// \brief Returns the type's fixed bit width, if any. Returns -1
/// for non-fixed-width types, and should only be used for
/// subclasses of FixedWidthType
virtual int bit_width() const { return -1; }
// \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
// context.
std::shared_ptr<DataType> GetSharedPtr() const {
return const_cast<DataType*>(this)->shared_from_this();
}
protected:
// Dummy version that returns a null string (indicating not implemented).
// Subclasses should override for fast equality checks.
std::string ComputeFingerprint() const override;
// Generic versions that works for all regular types, nested or not.
std::string ComputeMetadataFingerprint() const override;
Type::type id_;
FieldVector children_;
private:
ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
};
/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
/// dynamically created shared_ptr<DataType> if it needs to.
struct ARROW_EXPORT TypeHolder {
const DataType* type = NULLPTR;
std::shared_ptr<DataType> owned_type;
TypeHolder() = default;
TypeHolder(const TypeHolder& other) = default;
TypeHolder& operator=(const TypeHolder& other) = default;
TypeHolder(TypeHolder&& other) = default;
TypeHolder& operator=(TypeHolder&& other) = default;
TypeHolder(std::shared_ptr<DataType> owned_type) // NOLINT implicit construction
: type(owned_type.get()), owned_type(std::move(owned_type)) {}
TypeHolder(const DataType* type) // NOLINT implicit construction
: type(type) {}
Type::type id() const { return this->type->id(); }
std::shared_ptr<DataType> GetSharedPtr() const {
return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
}
const DataType& operator*() const { return *this->type; }
operator bool() const { return this->type != NULLPTR; }
bool operator==(const TypeHolder& other) const {
if (type == other.type) return true;
if (type == NULLPTR || other.type == NULLPTR) return false;
return type->Equals(*other.type);
}
bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }
bool operator==(const DataType& other) const {
if (this->type == NULLPTR) return false;
return other.Equals(*this->type);
}
bool operator!=(const DataType& other) const { return !(*this == other); }
bool operator==(const std::shared_ptr<DataType>& other) const {
return *this == *other;
}
bool operator!=(const TypeHolder& other) const { return !(*this == other); }
std::string ToString() const {
return this->type ? this->type->ToString() : "<NULLPTR>";
}
static std::string ToString(const std::vector<TypeHolder>&);
static std::vector<TypeHolder> FromTypes(
const std::vector<std::shared_ptr<DataType>>& types);
};
ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const DataType& type);
ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const TypeHolder& type);
/// \brief Return the compatible physical data type
///
/// Some types may have distinct logical meanings but the exact same physical
/// representation. For example, TimestampType has Int64Type as a physical
/// type (defined as TimestampType::PhysicalType).
///
/// The return value is as follows:
/// - if a `PhysicalType` alias exists in the concrete type class, return
/// an instance of `PhysicalType`.
/// - otherwise, return the input type itself.
std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);
/// \brief Base class for all fixed-width data types
class ARROW_EXPORT FixedWidthType : public DataType {
public:
using DataType::DataType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~FixedWidthType() override;
};
/// \brief Base class for all data types representing primitive values
class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
public:
using FixedWidthType::FixedWidthType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~PrimitiveCType() override;
};
/// \brief Base class for all numeric data types
class ARROW_EXPORT NumberType : public PrimitiveCType {
public:
using PrimitiveCType::PrimitiveCType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~NumberType() override;
};
/// \brief Base class for all integral data types
class ARROW_EXPORT IntegerType : public NumberType {
public:
using NumberType::NumberType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~IntegerType() override;
virtual bool is_signed() const = 0;
};
/// \brief Base class for all floating-point data types
class ARROW_EXPORT FloatingPointType : public NumberType {
public:
using NumberType::NumberType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~FloatingPointType() override;
enum Precision { HALF, SINGLE, DOUBLE };
virtual Precision precision() const = 0;
};
/// \brief Base class for all parametric data types
class ParametricType {};
class ARROW_EXPORT NestedType : public DataType, public ParametricType {
public:
using DataType::DataType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~NestedType() override;
};
/// \brief The combination of a field name and data type, with optional metadata
///
/// Fields are used to describe the individual constituents of a
/// nested DataType or a Schema.
///
/// A field's metadata is represented by a KeyValueMetadata instance,
/// which holds arbitrary key-value pairs.
class ARROW_EXPORT Field : public detail::Fingerprintable,
public util::EqualityComparable<Field> {
public:
Field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR)
: detail::Fingerprintable(),
name_(std::move(name)),
type_(std::move(type)),
nullable_(nullable),
metadata_(std::move(metadata)) {}
~Field() override;
/// \brief Return the field's attached metadata
std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
/// \brief Return whether the field has non-empty metadata
bool HasMetadata() const;
/// \brief Return a copy of this field with the given metadata attached to it
std::shared_ptr<Field> WithMetadata(
const std::shared_ptr<const KeyValueMetadata>& metadata) const;
/// \brief EXPERIMENTAL: Return a copy of this field with the given metadata
/// merged with existing metadata (any colliding keys will be overridden by
/// the passed metadata)
std::shared_ptr<Field> WithMergedMetadata(
const std::shared_ptr<const KeyValueMetadata>& metadata) const;
/// \brief Return a copy of this field without any metadata attached to it
std::shared_ptr<Field> RemoveMetadata() const;
/// \brief Return a copy of this field with the replaced type.
std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;
/// \brief Return a copy of this field with the replaced name.
std::shared_ptr<Field> WithName(const std::string& name) const;
/// \brief Return a copy of this field with the replaced nullability.
std::shared_ptr<Field> WithNullable(bool nullable) const;
/// \brief Options that control the behavior of `MergeWith`.
/// Options are to be added to allow type conversions, including integer
/// widening, promotion from integer to float, or conversion to or from boolean.
struct ARROW_EXPORT MergeOptions : public util::ToStringOstreamable<MergeOptions> {
/// If true, a Field of NullType can be unified with a Field of another type.
/// The unified field will be of the other type and become nullable.
/// Nullability will be promoted to the looser option (nullable if one is not
/// nullable).
bool promote_nullability = true;
/// Allow a decimal to be unified with another decimal of the same
/// width, adjusting scale and precision as appropriate. May fail
/// if the adjustment is not possible.
bool promote_decimal = false;
/// Allow a decimal to be promoted to a float. The float type will
/// not itself be promoted (e.g. Decimal128 + Float32 = Float32).
bool promote_decimal_to_float = false;
/// Allow an integer to be promoted to a decimal.
///
/// May fail if the decimal has insufficient precision to
/// accommodate the integer (see promote_numeric_width).
bool promote_integer_to_decimal = false;
/// Allow an integer of a given bit width to be promoted to a
/// float; the result will be a float of an equal or greater bit
/// width to both of the inputs. Examples:
/// - int8 + float32 = float32
/// - int32 + float32 = float64
/// - int32 + float64 = float64
/// Because an int32 cannot always be represented exactly in the
/// 24 bits of a float32 mantissa.
bool promote_integer_to_float = false;
/// Allow an unsigned integer of a given bit width to be promoted
/// to a signed integer that fits into the signed type:
/// uint + int16 = int16
/// When widening is needed, set promote_numeric_width to true:
/// uint16 + int16 = int32
bool promote_integer_sign = false;
/// Allow an integer, float, or decimal of a given bit width to be
/// promoted to an equivalent type of a greater bit width.
bool promote_numeric_width = false;
/// Allow strings to be promoted to binary types. Promotion of fixed size
/// binary types to variable sized formats, and binary to large binary,
/// and string to large string.
bool promote_binary = false;
/// Second to millisecond, Time32 to Time64, Time32(SECOND) to Time32(MILLI), etc
bool promote_temporal_unit = false;
/// Allow promotion from a list to a large-list and from a fixed-size list to a
/// variable sized list
bool promote_list = false;
/// Unify dictionary index types and dictionary value types.
bool promote_dictionary = false;
/// Allow merging ordered and non-ordered dictionaries.
/// The result will be ordered if and only if both inputs
/// are ordered.
bool promote_dictionary_ordered = false;
/// Get default options. Only NullType will be merged with other types.
static MergeOptions Defaults() { return MergeOptions(); }
/// Get permissive options. All options are enabled, except
/// promote_dictionary_ordered.
static MergeOptions Permissive();
/// Get a human-readable representation of the options.
std::string ToString() const;
};
/// \brief Merge the current field with a field of the same name.
///
/// The two fields must be compatible, i.e:
/// - have the same name
/// - have the same type, or of compatible types according to `options`.
///
/// The metadata of the current field is preserved; the metadata of the other
/// field is discarded.
Result<std::shared_ptr<Field>> MergeWith(
const Field& other, MergeOptions options = MergeOptions::Defaults()) const;
Result<std::shared_ptr<Field>> MergeWith(
const std::shared_ptr<Field>& other,
MergeOptions options = MergeOptions::Defaults()) const;
FieldVector Flatten() const;
/// \brief Indicate if fields are equals.
///
/// \param[in] other field to check equality with.
/// \param[in] check_metadata controls if it should check for metadata
/// equality.
///
/// \return true if fields are equal, false otherwise.
bool Equals(const Field& other, bool check_metadata = false) const;
bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = false) const;
/// \brief Indicate if fields are compatibles.
///
/// See the criteria of MergeWith.
///
/// \return true if fields are compatible, false otherwise.
bool IsCompatibleWith(const Field& other) const;
bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;
/// \brief Return a string representation ot the field
/// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
/// print keys and values in the output
std::string ToString(bool show_metadata = false) const;
/// \brief Return the field name
const std::string& name() const { return name_; }
/// \brief Return the field data type
const std::shared_ptr<DataType>& type() const { return type_; }
/// \brief Return whether the field is nullable
bool nullable() const { return nullable_; }
std::shared_ptr<Field> Copy() const;
private:
std::string ComputeFingerprint() const override;
std::string ComputeMetadataFingerprint() const override;
// Field name
std::string name_;
// The field's data type
std::shared_ptr<DataType> type_;
// Fields can be nullable
bool nullable_;
// The field's metadata, if any
std::shared_ptr<const KeyValueMetadata> metadata_;
ARROW_DISALLOW_COPY_AND_ASSIGN(Field);
};
ARROW_EXPORT void PrintTo(const Field& field, std::ostream* os);
namespace detail {
template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
class ARROW_EXPORT CTypeImpl : public BASE {
public:
static constexpr Type::type type_id = TYPE_ID;
using c_type = C_TYPE;
using PhysicalType = DERIVED;
CTypeImpl() : BASE(TYPE_ID) {}
int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
DataTypeLayout layout() const override {
return DataTypeLayout(
{DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(C_TYPE))});
}
std::string name() const override { return DERIVED::type_name(); }
std::string ToString() const override { return this->name(); }
};
template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
constexpr Type::type CTypeImpl<DERIVED, BASE, TYPE_ID, C_TYPE>::type_id;
template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, IntegerType, TYPE_ID, C_TYPE> {
bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
};
} // namespace detail
/// Concrete type class for always-null data
class ARROW_EXPORT NullType : public DataType {
public:
static constexpr Type::type type_id = Type::NA;
static constexpr const char* type_name() { return "null"; }
NullType() : DataType(Type::NA) {}
std::string ToString() const override;
DataTypeLayout layout() const override {
return DataTypeLayout({DataTypeLayout::AlwaysNull()});
}
std::string name() const override { return "null"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for boolean data
class ARROW_EXPORT BooleanType
: public detail::CTypeImpl<BooleanType, PrimitiveCType, Type::BOOL, bool> {
public:
static constexpr const char* type_name() { return "bool"; }
// BooleanType within arrow use a single bit instead of the C 8-bits layout.
int bit_width() const final { return 1; }
DataTypeLayout layout() const override {
return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::Bitmap()});
}
protected:
std::string ComputeFingerprint() const override;
};
/// \addtogroup numeric-datatypes
///
/// @{
/// Concrete type class for unsigned 8-bit integer data
class ARROW_EXPORT UInt8Type
: public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
public:
static constexpr const char* type_name() { return "uint8"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for signed 8-bit integer data
class ARROW_EXPORT Int8Type
: public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
public:
static constexpr const char* type_name() { return "int8"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for unsigned 16-bit integer data
class ARROW_EXPORT UInt16Type
: public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
public:
static constexpr const char* type_name() { return "uint16"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for signed 16-bit integer data
class ARROW_EXPORT Int16Type
: public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
public:
static constexpr const char* type_name() { return "int16"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for unsigned 32-bit integer data
class ARROW_EXPORT UInt32Type
: public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
public:
static constexpr const char* type_name() { return "uint32"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for signed 32-bit integer data
class ARROW_EXPORT Int32Type
: public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
public:
static constexpr const char* type_name() { return "int32"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for unsigned 64-bit integer data
class ARROW_EXPORT UInt64Type
: public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
public:
static constexpr const char* type_name() { return "uint64"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for signed 64-bit integer data
class ARROW_EXPORT Int64Type
: public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
public:
static constexpr const char* type_name() { return "int64"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for 16-bit floating-point data
class ARROW_EXPORT HalfFloatType
: public detail::CTypeImpl<HalfFloatType, FloatingPointType, Type::HALF_FLOAT,
uint16_t> {
public:
Precision precision() const override;
static constexpr const char* type_name() { return "halffloat"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for 32-bit floating-point data (C "float")
class ARROW_EXPORT FloatType
: public detail::CTypeImpl<FloatType, FloatingPointType, Type::FLOAT, float> {
public:
Precision precision() const override;
static constexpr const char* type_name() { return "float"; }
protected:
std::string ComputeFingerprint() const override;
};
/// Concrete type class for 64-bit floating-point data (C "double")
class ARROW_EXPORT DoubleType
: public detail::CTypeImpl<DoubleType, FloatingPointType, Type::DOUBLE, double> {
public:
Precision precision() const override;
static constexpr const char* type_name() { return "double"; }
protected:
std::string ComputeFingerprint() const override;
};
/// @}
/// \brief Base class for all variable-size binary data types
class ARROW_EXPORT BaseBinaryType : public DataType {
public:
using DataType::DataType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~BaseBinaryType() override;
};
constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
/// \addtogroup binary-datatypes
///
/// @{
/// \brief Concrete type class for variable-size binary data
class ARROW_EXPORT BinaryType : public BaseBinaryType {
public:
static constexpr Type::type type_id = Type::BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int32_t;
using PhysicalType = BinaryType;
static constexpr const char* type_name() { return "binary"; }
BinaryType() : BinaryType(Type::BINARY) {}
DataTypeLayout layout() const override {
return DataTypeLayout({DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(offset_type)),
DataTypeLayout::VariableWidth()});
}
std::string ToString() const override;
std::string name() const override { return "binary"; }
protected:
std::string ComputeFingerprint() const override;
// Allow subclasses like StringType to change the logical type.
explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
};
/// \brief Concrete type class for large variable-size binary data
class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
public:
static constexpr Type::type type_id = Type::LARGE_BINARY;
static constexpr bool is_utf8 = false;
using offset_type = int64_t;
using PhysicalType = LargeBinaryType;
static constexpr const char* type_name() { return "large_binary"; }
LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
DataTypeLayout layout() const override {
return DataTypeLayout({DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(offset_type)),
DataTypeLayout::VariableWidth()});
}
std::string ToString() const override;
std::string name() const override { return "large_binary"; }
protected:
std::string ComputeFingerprint() const override;
// Allow subclasses like LargeStringType to change the logical type.
explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
};
/// \brief Concrete type class for variable-size string data, utf8-encoded
class ARROW_EXPORT StringType : public BinaryType {
public:
static constexpr Type::type type_id = Type::STRING;
static constexpr bool is_utf8 = true;
using PhysicalType = BinaryType;
static constexpr const char* type_name() { return "utf8"; }
StringType() : BinaryType(Type::STRING) {}
std::string ToString() const override;
std::string name() const override { return "utf8"; }
protected:
std::string ComputeFingerprint() const override;
};
/// \brief Concrete type class for large variable-size string data, utf8-encoded
class ARROW_EXPORT LargeStringType : public LargeBinaryType {
public:
static constexpr Type::type type_id = Type::LARGE_STRING;
static constexpr bool is_utf8 = true;
using PhysicalType = LargeBinaryType;
static constexpr const char* type_name() { return "large_utf8"; }
LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
std::string ToString() const override;
std::string name() const override { return "large_utf8"; }
protected:
std::string ComputeFingerprint() const override;
};
/// \brief Concrete type class for fixed-size binary data
class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
public:
static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
static constexpr bool is_utf8 = false;
static constexpr const char* type_name() { return "fixed_size_binary"; }
explicit FixedSizeBinaryType(int32_t byte_width)
: FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
: FixedWidthType(override_type_id), byte_width_(byte_width) {}
std::string ToString() const override;
std::string name() const override { return "fixed_size_binary"; }
DataTypeLayout layout() const override {
return DataTypeLayout(
{DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(byte_width())});
}
int byte_width() const override { return byte_width_; }
int bit_width() const override;
// Validating constructor
static Result<std::shared_ptr<DataType>> Make(int32_t byte_width);
protected:
std::string ComputeFingerprint() const override;
int32_t byte_width_;
};
/// @}
/// \addtogroup numeric-datatypes
///
/// @{
/// \brief Base type class for (fixed-size) decimal data
class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
public:
explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision,
int32_t scale)
: FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
/// Constructs concrete decimal types
static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
int32_t scale);
int32_t precision() const { return precision_; }
int32_t scale() const { return scale_; }
/// \brief Returns the number of bytes needed for precision.
///
/// precision must be >= 1
static int32_t DecimalSize(int32_t precision);
protected:
std::string ComputeFingerprint() const override;
int32_t precision_;
int32_t scale_;
};
/// \brief Concrete type class for 128-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer. The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// As an example, `Decimal128Type(7, 3)` can exactly represent the numbers
/// 1234.567 and -1234.567 (encoded internally as the 128-bit integers
/// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
///
/// Decimal128Type has a maximum precision of 38 significant digits
/// (also available as Decimal128Type::kMaxPrecision).
/// If higher precision is needed, consider using Decimal256Type.
class ARROW_EXPORT Decimal128Type : public DecimalType {
public:
static constexpr Type::type type_id = Type::DECIMAL128;
static constexpr const char* type_name() { return "decimal128"; }
/// Decimal128Type constructor that aborts on invalid input.
explicit Decimal128Type(int32_t precision, int32_t scale);
/// Decimal128Type constructor that returns an error on invalid input.
static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
std::string ToString() const override;
std::string name() const override { return "decimal128"; }
static constexpr int32_t kMinPrecision = 1;
static constexpr int32_t kMaxPrecision = 38;
static constexpr int32_t kByteWidth = 16;
};
/// \brief Concrete type class for 256-bit decimal data
///
/// Arrow decimals are fixed-point decimal numbers encoded as a scaled
/// integer. The precision is the number of significant digits that the
/// decimal type can represent; the scale is the number of digits after
/// the decimal point (note the scale can be negative).
///
/// Decimal256Type has a maximum precision of 76 significant digits.
/// (also available as Decimal256Type::kMaxPrecision).
///
/// For most use cases, the maximum precision offered by Decimal128Type
/// is sufficient, and it will result in a more compact and more efficient
/// encoding.
class ARROW_EXPORT Decimal256Type : public DecimalType {
public:
static constexpr Type::type type_id = Type::DECIMAL256;
static constexpr const char* type_name() { return "decimal256"; }
/// Decimal256Type constructor that aborts on invalid input.
explicit Decimal256Type(int32_t precision, int32_t scale);
/// Decimal256Type constructor that returns an error on invalid input.
static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
std::string ToString() const override;
std::string name() const override { return "decimal256"; }
static constexpr int32_t kMinPrecision = 1;
static constexpr int32_t kMaxPrecision = 76;
static constexpr int32_t kByteWidth = 32;
};
/// @}
/// \addtogroup nested-datatypes
///
/// @{
/// \brief Base class for all variable-size list data types
class ARROW_EXPORT BaseListType : public NestedType {
public:
using NestedType::NestedType;
// This is only for preventing defining this class in each
// translation unit to avoid one-definition-rule violation.
~BaseListType() override;
const std::shared_ptr<Field>& value_field() const { return children_[0]; }
const std::shared_ptr<DataType>& value_type() const { return children_[0]->type(); }
};
/// \brief Concrete type class for list data
///
/// List data is nested data where each value is a variable number of
/// child items. Lists can be recursively nested, for example
/// list(list(int32)).
class ARROW_EXPORT ListType : public BaseListType {
public:
static constexpr Type::type type_id = Type::LIST;
using offset_type = int32_t;
static constexpr const char* type_name() { return "list"; }