mito2/cache/
cache_size.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cache size of different cache value.
16
17use std::mem;
18
19use parquet::basic::ColumnOrder;
20use parquet::file::metadata::{
21    FileMetaData, KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
22    RowGroupMetaData,
23};
24use parquet::file::page_index::column_index::ColumnIndexMetaData as Index;
25use parquet::file::page_index::offset_index::PageLocation;
26use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
27
28/// Returns estimated size of [ParquetMetaData].
29pub fn parquet_meta_size(meta: &ParquetMetaData) -> usize {
30    // struct size
31    let mut size = mem::size_of::<ParquetMetaData>();
32    // file_metadata
33    size += file_meta_heap_size(meta.file_metadata());
34    // row_groups
35    size += meta
36        .row_groups()
37        .iter()
38        .map(row_group_meta_heap_size)
39        .sum::<usize>();
40    // column_index
41    size += meta
42        .column_index()
43        .map(parquet_column_index_heap_size)
44        .unwrap_or(0);
45    // offset_index
46    size += meta
47        .offset_index()
48        .map(parquet_offset_index_heap_size)
49        .unwrap_or(0);
50
51    size
52}
53
54/// Returns estimated size of [FileMetaData] allocated from heap.
55fn file_meta_heap_size(meta: &FileMetaData) -> usize {
56    // created_by
57    let mut size = meta.created_by().map(|s| s.len()).unwrap_or(0);
58    // key_value_metadata
59    size += meta
60        .key_value_metadata()
61        .map(|kvs| {
62            kvs.iter()
63                .map(|kv| {
64                    kv.key.len()
65                        + kv.value.as_ref().map(|v| v.len()).unwrap_or(0)
66                        + mem::size_of::<KeyValue>()
67                })
68                .sum()
69        })
70        .unwrap_or(0);
71    // schema_descr (It's a ptr so we also add size of SchemaDescriptor).
72    size += mem::size_of::<SchemaDescriptor>();
73    size += schema_descr_heap_size(meta.schema_descr());
74    // column_orders
75    size += meta
76        .column_orders()
77        .map(|orders| orders.len() * mem::size_of::<ColumnOrder>())
78        .unwrap_or(0);
79
80    size
81}
82
83/// Returns estimated size of [SchemaDescriptor] allocated from heap.
84fn schema_descr_heap_size(descr: &SchemaDescriptor) -> usize {
85    // schema
86    let mut size = mem::size_of::<Type>();
87    // leaves
88    size += descr
89        .columns()
90        .iter()
91        .map(|descr| mem::size_of::<ColumnDescriptor>() + column_descr_heap_size(descr))
92        .sum::<usize>();
93    // leaf_to_base
94    size += descr.num_columns() * mem::size_of::<usize>();
95
96    size
97}
98
99/// Returns estimated size of [ColumnDescriptor] allocated from heap.
100fn column_descr_heap_size(descr: &ColumnDescriptor) -> usize {
101    descr.path().parts().iter().map(|s| s.len()).sum()
102}
103
104/// Returns estimated size of [ColumnDescriptor] allocated from heap.
105fn row_group_meta_heap_size(meta: &RowGroupMetaData) -> usize {
106    mem::size_of_val(meta.columns())
107}
108
109/// Returns estimated size of [ParquetColumnIndex] allocated from heap.
110fn parquet_column_index_heap_size(column_index: &ParquetColumnIndex) -> usize {
111    column_index
112        .iter()
113        .map(|row_group| row_group.len() * mem::size_of::<Index>() + mem::size_of_val(row_group))
114        .sum()
115}
116
117/// Returns estimated size of [ParquetOffsetIndex] allocated from heap.
118fn parquet_offset_index_heap_size(offset_index: &ParquetOffsetIndex) -> usize {
119    offset_index
120        .iter()
121        .map(|row_group| {
122            row_group
123                .iter()
124                .map(|column| {
125                    column.page_locations.len() * mem::size_of::<PageLocation>()
126                        + mem::size_of_val(column)
127                })
128                .sum::<usize>()
129                + mem::size_of_val(row_group)
130        })
131        .sum()
132}