14
14
#include " Message_generated.h"
15
15
#include " Schema_generated.h"
16
16
17
+ #include " serialize.hpp"
17
18
#include " utils.hpp"
18
19
19
20
namespace sparrow_ipc
@@ -28,116 +29,18 @@ namespace sparrow_ipc
28
29
// making its message body zero-length.
29
30
std::vector<uint8_t > serialize_null_array (sparrow::null_array& arr)
30
31
{
31
- // Use the Arrow C Data Interface to get a generic description of the array.
32
- // For a null_array, the ArrowArray struct will report n_buffers = 0.
33
32
auto [arrow_arr_ptr, arrow_schema_ptr] = sparrow::get_arrow_structures (arr);
34
33
auto & arrow_arr = *arrow_arr_ptr;
35
34
auto & arrow_schema = *arrow_schema_ptr;
36
35
37
36
std::vector<uint8_t > final_buffer;
38
-
39
37
// I - Serialize the Schema message
40
- // This part is almost identical to how a primitive_array's schema is serialized.
41
- {
42
- flatbuffers::FlatBufferBuilder schema_builder;
43
-
44
- flatbuffers::Offset<flatbuffers::String> fb_name_offset = 0 ;
45
- if (arrow_schema.name )
46
- {
47
- fb_name_offset = schema_builder.CreateString (arrow_schema.name );
48
- }
49
-
50
- // For null_array, the format string is "n".
51
- auto [type_enum, type_offset] = utils::get_flatbuffer_type (schema_builder, arrow_schema.format );
52
-
53
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>>
54
- fb_metadata_offset = 0 ;
55
-
56
- if (arr.metadata ())
57
- {
58
- sparrow::key_value_view metadata_view = *(arr.metadata ());
59
- std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>> kv_offsets;
60
- kv_offsets.reserve (metadata_view.size ());
61
- auto mv_it = metadata_view.cbegin ();
62
- for (auto i = 0 ; i < metadata_view.size (); ++i, ++mv_it)
63
- {
64
- auto key_offset = schema_builder.CreateString (std::string ((*mv_it).first ));
65
- auto value_offset = schema_builder.CreateString (std::string ((*mv_it).second ));
66
- kv_offsets.push_back (
67
- org::apache::arrow::flatbuf::CreateKeyValue (schema_builder, key_offset, value_offset));
68
- }
69
- fb_metadata_offset = schema_builder.CreateVector (kv_offsets);
70
- }
71
-
72
- auto fb_field = org::apache::arrow::flatbuf::CreateField (
73
- schema_builder,
74
- fb_name_offset,
75
- (arrow_schema.flags & static_cast <int64_t >(sparrow::ArrowFlag::NULLABLE)) != 0 ,
76
- type_enum,
77
- type_offset,
78
- 0 , // dictionary
79
- 0 , // children
80
- fb_metadata_offset);
81
-
82
- std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> fields_vec = {fb_field};
83
- auto fb_fields = schema_builder.CreateVector (fields_vec);
84
-
85
- auto schema_offset = org::apache::arrow::flatbuf::CreateSchema (schema_builder, org::apache::arrow::flatbuf::Endianness::Little, fb_fields);
86
-
87
- auto schema_message_offset = org::apache::arrow::flatbuf::CreateMessage (
88
- schema_builder,
89
- org::apache::arrow::flatbuf::MetadataVersion::V5,
90
- org::apache::arrow::flatbuf::MessageHeader::Schema,
91
- schema_offset.Union (),
92
- 0 // bodyLength
93
- );
94
- schema_builder.Finish (schema_message_offset);
95
-
96
- uint32_t schema_len = schema_builder.GetSize ();
97
- final_buffer.resize (sizeof (uint32_t ) + schema_len);
98
- memcpy (final_buffer.data () + sizeof (uint32_t ), schema_builder.GetBufferPointer (), schema_len);
99
- *(reinterpret_cast <uint32_t *>(final_buffer.data ())) = schema_len;
100
- }
38
+ details::serialize_schema_message (arrow_schema, arr.metadata (), final_buffer);
101
39
102
40
// II - Serialize the RecordBatch message
103
- {
104
- flatbuffers::FlatBufferBuilder batch_builder;
105
-
106
- // The FieldNode describes the layout (length and null count).
107
- // For a null_array, length and null_count are always equal.
108
- org::apache::arrow::flatbuf::FieldNode field_node_struct (arrow_arr.length , arrow_arr.null_count );
109
- auto fb_nodes_vector = batch_builder.CreateVectorOfStructs (&field_node_struct, 1 );
110
-
111
- // A null_array has no buffers. The ArrowArray struct reports n_buffers = 0,
112
- // so we create an empty vector of buffers for the Flatbuffers message.
113
- auto fb_buffers_vector = batch_builder.CreateVectorOfStructs <org::apache::arrow::flatbuf::Buffer>({});
114
-
115
- auto record_batch_offset = org::apache::arrow::flatbuf::CreateRecordBatch (batch_builder, arrow_arr.length , fb_nodes_vector, fb_buffers_vector);
116
-
117
- // The bodyLength is 0 because there are no data buffers.
118
- auto batch_message_offset = org::apache::arrow::flatbuf::CreateMessage (
119
- batch_builder,
120
- org::apache::arrow::flatbuf::MetadataVersion::V5,
121
- org::apache::arrow::flatbuf::MessageHeader::RecordBatch,
122
- record_batch_offset.Union (),
123
- 0 // bodyLength
124
- );
125
- batch_builder.Finish (batch_message_offset);
126
-
127
- uint32_t batch_meta_len = batch_builder.GetSize ();
128
- int64_t aligned_batch_meta_len = utils::align_to_8 (batch_meta_len);
129
-
130
- size_t current_size = final_buffer.size ();
131
- // Resize for the RecordBatch metadata. There is no body to append.
132
- final_buffer.resize (current_size + sizeof (uint32_t ) + aligned_batch_meta_len);
133
- uint8_t * dst = final_buffer.data () + current_size;
134
-
135
- *(reinterpret_cast <uint32_t *>(dst)) = batch_meta_len;
136
- dst += sizeof (uint32_t );
137
- memcpy (dst, batch_builder.GetBufferPointer (), batch_meta_len);
138
- memset (dst + batch_meta_len, 0 , aligned_batch_meta_len - batch_meta_len);
139
- }
41
+ details::serialize_record_batch_message (arrow_arr, final_buffer);
140
42
43
+ // Return the final buffer containing the complete IPC stream
141
44
return final_buffer;
142
45
}
143
46
0 commit comments