taco-db  0.1.0
Schema.h
Go to the documentation of this file.
1 // catalog/Schema.h
2 #ifndef CATALOG_SCHEMA_H
3 #define CATALOG_SCHEMA_H
4 
5 #include "tdb.h"
6 
7 #include <vector>
8 
9 namespace taco {
10 
11 class BootstrapCatCache;
12 
39 class Schema {
40 private:
41  struct FieldInfo {
44 
46  int16_t m_typlen;
47 
49  int8_t m_typalign;
50 
55  bool m_typbyref;
56 
58  uint64_t m_typparam;
59 
71 
86  };
87 
99  Schema(const std::vector<Oid> &typid,
100  const std::vector<uint64_t> &typparam,
101  const std::vector<bool> &nullable,
102  std::vector<std::string> field_names);
103 
104  Schema(const std::vector<FieldInfo>& fields,
105  const std::vector<std::string>& field_names);
106 
110  Schema() = default;
111 
112 public:
120  static Schema *Create(const std::vector<Oid> &typid,
121  const std::vector<uint64_t> &typparam,
122  const std::vector<bool> &nullable);
123 
133  static Schema *Create(const std::vector<Oid> &typid,
134  const std::vector<uint64_t> &typparam,
135  const std::vector<bool> &nullable,
136  std::vector<std::string> field_names);
137 
141  static Schema *Combine(const Schema* left, const Schema* right);
142 
148  static bool Identical(const Schema* left, const Schema* right);
149 
156  static bool Compatible(const Schema* left, const Schema* right);
157 
158 private:
164  template<class CCache>
165  void ComputeLayoutImpl(CCache *catcache, bool cache_typinfo_only);
166 
167 public:
172  void ComputeLayout();
173 
180  void ComputeLayout(BootstrapCatCache *catcache);
181 
188  void CollectTypeInfo();
189 
190  bool
192  return m_type_info_collected;
193  }
194 
195  bool
197  return m_layout_computed;
198  }
199 
200 private:
201 
202  void EnsureLayoutComputed() const {
203  if (!IsLayoutComputed()) {
204  LOG(kFatal, "Schema::ComputeLayout() must be called first "
205  "before querying the on-disk layout of a Schema");
206  }
207  }
208 
209  void EnsureTypeInfoCollected() const {
210  if (!IsTypeInfoCollected()) {
211  LOG(kFatal, "Schema::CollectTypeInfo() or Schema::ComputeLayout() "
212  "must be called first before querying the in-memory "
213  "type info of a schema");
214  }
215  }
216 
217 public:
218 
222  inline Oid
223  GetFieldTypeId(FieldId field_id) const {
224  return m_field[field_id].m_typid;
225  }
226 
230  inline uint64_t
231  GetFieldTypeParam(FieldId field_id) const {
232  return m_field[field_id].m_typparam;
233  }
234 
239  inline absl::string_view
240  GetFieldName(FieldId field_id) const {
241  if ((size_t) field_id >= m_field_names.size()) {
242  return "";
243  }
244  return m_field_names[field_id];
245  }
246 
250  inline bool
251  FieldIsNullable(FieldId field_id) const {
252  return m_field[field_id].m_nullbit_id >= 0;
253  }
254 
261  inline bool
262  FieldPassByRef(FieldId field_id) const {
263  return m_field[field_id].m_typbyref;
264  }
265 
271  GetFieldLength(size_t field_id) const {
273  return m_field[field_id].m_typlen;
274  }
275 
279  inline FieldId
280  GetNumFields() const {
281  return (FieldId) m_field.size();
282  }
283 
289  FieldId GetFieldIdFromFieldName(absl::string_view field_name) const;
290 
297  std::pair<FieldOffset, FieldOffset> GetOffsetAndLength(
298  FieldId field_id,
299  const char *payload) const;
300 
304  FieldOffset WritePayloadToBuffer(const std::vector<Datum> &data,
305  maxaligned_char_buf &buf) const;
306 
310  FieldOffset WritePayloadToBuffer(const std::vector<DatumRef> &data,
311  maxaligned_char_buf &buf) const;
312 
316  FieldOffset WritePayloadToBuffer(const std::vector<NullableDatumRef> &data,
317  maxaligned_char_buf &buf) const;
318 
319 
323  bool FieldIsNull(FieldId field_id, const char *payload) const;
324 
331  Datum GetField(FieldId field_id, const char *payload) const;
332 
340  std::vector<Datum> DissemblePayload(const char *payload) const;
341 
342 
343 private:
360  template<class SomeDatum>
361  FieldOffset WritePayloadToBufferImpl(const std::vector<SomeDatum> &data,
362  maxaligned_char_buf &buf) const;
363 
366 
368 
370 
372 
374 
376 
381 
387 
399 
406  std::vector<FieldId> m_field_reorder_idx;
407 
409  std::vector<FieldInfo> m_field;
410 
412  std::vector<std::string> m_field_names;
413 };
414 
415 } // namespace taco
416 
417 #endif // CATALOG_SCHEMA_H
BootstrapCatCache stores hard-coded data needed to bootstrap the entire database catalog.
Definition: BootstrapCatCache.h:21
A Datum stores and possibly manage the memory resource of a read-only value of a plain fixed-length C...
Definition: datum.h:250
A Schema object stores the information for accessing an ordered set of typed fields either from a dis...
Definition: Schema.h:39
bool m_has_only_nonnullable_fixedlen_fields
Definition: Schema.h:369
std::vector< std::string > m_field_names
optional field names (may be empty or of the same length as m_field)
Definition: Schema.h:412
bool FieldIsNullable(FieldId field_id) const
Returns whether this field is nullable.
Definition: Schema.h:251
FieldOffset m_varlen_end_array_begin
The offset to the beginning of the variable-length field end array.
Definition: Schema.h:386
FieldOffset GetFieldLength(size_t field_id) const
Returns the cached size of a field.
Definition: Schema.h:271
bool FieldPassByRef(FieldId field_id) const
Returns whether this field is passed by reference or by value in memory.
Definition: Schema.h:262
Oid GetFieldTypeId(FieldId field_id) const
Returns the type Oid of a field.
Definition: Schema.h:223
FieldOffset WritePayloadToBufferImpl(const std::vector< SomeDatum > &data, maxaligned_char_buf &buf) const
Convert the data as bytes in storage layout and append them to the buf without clear it first.
Definition: Schema.cpp:436
FieldId m_num_nonnullable_fixedlen_fields
Definition: Schema.h:371
void CollectTypeInfo()
Collects the type info only without computing the record payload layout.
Definition: Schema.cpp:325
FieldOffset WritePayloadToBuffer(const std::vector< Datum > &data, maxaligned_char_buf &buf) const
See Schema::WritePayloadToBufferImpl().
Definition: Schema.cpp:417
void ComputeLayoutImpl(CCache *catcache, bool cache_typinfo_only)
The generic implementation of schema layout computation with some catalog cache class CCache.
Definition: Schema.cpp:146
FieldOffset m_null_bitmap_begin
The offset to the null bitmap.
Definition: Schema.h:380
static bool Identical(const Schema *left, const Schema *right)
Returns whether the two schemas are identical (i.e., having the same numbers of fields,...
Definition: Schema.cpp:95
absl::string_view GetFieldName(FieldId field_id) const
Returns the field name of field field_id.
Definition: Schema.h:240
std::vector< Datum > DissemblePayload(const char *payload) const
Dissemble the payload into a vector of Datums in field order as defined in the schema.
Definition: Schema.cpp:582
bool IsTypeInfoCollected() const
Definition: Schema.h:191
FieldId GetNumFields() const
Returns the total number of fields.
Definition: Schema.h:280
bool m_layout_computed
Definition: Schema.h:367
static Schema * Create(const std::vector< Oid > &typid, const std::vector< uint64_t > &typparam, const std::vector< bool > &nullable)
Creates a new schema with the given types and field names.
Definition: Schema.cpp:63
Schema()=default
The default constructor for constructing a fake schema.
FieldId GetFieldIdFromFieldName(absl::string_view field_name) const
Returns the field ID of the field with the `‘field_name’'.
Definition: Schema.cpp:592
uint64_t GetFieldTypeParam(FieldId field_id) const
Returns the type parameter of a field.
Definition: Schema.h:231
void ComputeLayout()
Computes the layout of the record payload with this schema, using the global catalog cache g_db->catc...
Definition: Schema.cpp:315
FieldOffset m_varlen_payload_begin
The offset to the beginning of the varlen payload.
Definition: Schema.h:398
static Schema * Combine(const Schema *left, const Schema *right)
Combines two schema into one.
Definition: Schema.cpp:76
void EnsureLayoutComputed() const
Definition: Schema.h:202
FieldId m_num_varlen_fields
Definition: Schema.h:375
bool IsLayoutComputed() const
Definition: Schema.h:196
void EnsureTypeInfoCollected() const
Definition: Schema.h:209
std::pair< FieldOffset, FieldOffset > GetOffsetAndLength(FieldId field_id, const char *payload) const
Returns the offset and the length of a field in this schema as a pair.
Definition: Schema.cpp:330
Datum GetField(FieldId field_id, const char *payload) const
Returns a field in the payload as a Datum.
Definition: Schema.cpp:559
FieldId m_num_nullable_fixedlen_fields
Definition: Schema.h:373
bool m_type_info_collected
whether the layout has been computed
Definition: Schema.h:365
static bool Compatible(const Schema *left, const Schema *right)
Returns if the schema on the right is union compatible with the left (i.e., having the same number of...
Definition: Schema.cpp:114
std::vector< FieldInfo > m_field
information about the individual fields
Definition: Schema.h:409
bool FieldIsNull(FieldId field_id, const char *payload) const
Returns whether a field is null or not in a record payload.
Definition: Schema.cpp:546
std::vector< FieldId > m_field_reorder_idx
The order of the fields to be placed in the actual payload.
Definition: Schema.h:406
#define LOG(level,...)
LOG(LogSeverity level, const char *fmt, ...)
Definition: logging.h:116
Definition: datum.h:28
constexpr LogSeverity kFatal
Definition: logging.h:22
uint32_t Oid
Definition: tdb_base.h:210
int16_t FieldOffset
Definition: tdb_base.h:211
int16_t FieldId
Definition: tdb_base.h:212
Definition: Schema.h:41
int16_t m_typlen
Cached copy of the typlen.
Definition: Schema.h:46
int8_t m_typalign
Cached copy of typalign.
Definition: Schema.h:49
FieldOffset m_offset
Stores information about how to find this field in the payload layout.
Definition: Schema.h:85
bool m_typbyref
Whether this field is passed by ref.
Definition: Schema.h:55
FieldId m_nullbit_id
Stores about the null bit of this field in the payload layout.
Definition: Schema.h:70
Oid m_typid
The type ID of this field.
Definition: Schema.h:43
uint64_t m_typparam
The type parameter of this field.
Definition: Schema.h:58
std::vector< char, AlignedAllocImpl::aligned_allocator< 8, char > > maxaligned_char_buf
Definition: tdb_base.h:155