mito2/cache/
cache_size.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cache size of different cache value.
16
17use std::mem;
18
19use parquet::file::metadata::{
20    FileMetaData, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex, RowGroupMetaData,
21};
22use parquet::file::page_index::index::Index;
23use parquet::format::{ColumnOrder, KeyValue, PageLocation};
24use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
25
26/// Returns estimated size of [ParquetMetaData].
27pub fn parquet_meta_size(meta: &ParquetMetaData) -> usize {
28    // struct size
29    let mut size = mem::size_of::<ParquetMetaData>();
30    // file_metadata
31    size += file_meta_heap_size(meta.file_metadata());
32    // row_groups
33    size += meta
34        .row_groups()
35        .iter()
36        .map(row_group_meta_heap_size)
37        .sum::<usize>();
38    // column_index
39    size += meta
40        .column_index()
41        .map(parquet_column_index_heap_size)
42        .unwrap_or(0);
43    // offset_index
44    size += meta
45        .offset_index()
46        .map(parquet_offset_index_heap_size)
47        .unwrap_or(0);
48
49    size
50}
51
52/// Returns estimated size of [FileMetaData] allocated from heap.
53fn file_meta_heap_size(meta: &FileMetaData) -> usize {
54    // created_by
55    let mut size = meta.created_by().map(|s| s.len()).unwrap_or(0);
56    // key_value_metadata
57    size += meta
58        .key_value_metadata()
59        .map(|kvs| {
60            kvs.iter()
61                .map(|kv| {
62                    kv.key.len()
63                        + kv.value.as_ref().map(|v| v.len()).unwrap_or(0)
64                        + mem::size_of::<KeyValue>()
65                })
66                .sum()
67        })
68        .unwrap_or(0);
69    // schema_descr (It's a ptr so we also add size of SchemaDescriptor).
70    size += mem::size_of::<SchemaDescriptor>();
71    size += schema_descr_heap_size(meta.schema_descr());
72    // column_orders
73    size += meta
74        .column_orders()
75        .map(|orders| orders.len() * mem::size_of::<ColumnOrder>())
76        .unwrap_or(0);
77
78    size
79}
80
81/// Returns estimated size of [SchemaDescriptor] allocated from heap.
82fn schema_descr_heap_size(descr: &SchemaDescriptor) -> usize {
83    // schema
84    let mut size = mem::size_of::<Type>();
85    // leaves
86    size += descr
87        .columns()
88        .iter()
89        .map(|descr| mem::size_of::<ColumnDescriptor>() + column_descr_heap_size(descr))
90        .sum::<usize>();
91    // leaf_to_base
92    size += descr.num_columns() * mem::size_of::<usize>();
93
94    size
95}
96
97/// Returns estimated size of [ColumnDescriptor] allocated from heap.
98fn column_descr_heap_size(descr: &ColumnDescriptor) -> usize {
99    descr.path().parts().iter().map(|s| s.len()).sum()
100}
101
102/// Returns estimated size of [ColumnDescriptor] allocated from heap.
103fn row_group_meta_heap_size(meta: &RowGroupMetaData) -> usize {
104    mem::size_of_val(meta.columns())
105}
106
107/// Returns estimated size of [ParquetColumnIndex] allocated from heap.
108fn parquet_column_index_heap_size(column_index: &ParquetColumnIndex) -> usize {
109    column_index
110        .iter()
111        .map(|row_group| row_group.len() * mem::size_of::<Index>() + mem::size_of_val(row_group))
112        .sum()
113}
114
115/// Returns estimated size of [ParquetOffsetIndex] allocated from heap.
116fn parquet_offset_index_heap_size(offset_index: &ParquetOffsetIndex) -> usize {
117    offset_index
118        .iter()
119        .map(|row_group| {
120            row_group
121                .iter()
122                .map(|column| {
123                    column.page_locations.len() * mem::size_of::<PageLocation>()
124                        + mem::size_of_val(column)
125                })
126                .sum::<usize>()
127                + mem::size_of_val(row_group)
128        })
129        .sum()
130}