mito2/cache/
cache_size.rs1use std::mem;
18
19use parquet::basic::ColumnOrder;
20use parquet::file::metadata::{
21 FileMetaData, KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
22 RowGroupMetaData,
23};
24use parquet::file::page_index::column_index::ColumnIndexMetaData as Index;
25use parquet::file::page_index::offset_index::PageLocation;
26use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
27
28pub fn parquet_meta_size(meta: &ParquetMetaData) -> usize {
30 let mut size = mem::size_of::<ParquetMetaData>();
32 size += file_meta_heap_size(meta.file_metadata());
34 size += meta
36 .row_groups()
37 .iter()
38 .map(row_group_meta_heap_size)
39 .sum::<usize>();
40 size += meta
42 .column_index()
43 .map(parquet_column_index_heap_size)
44 .unwrap_or(0);
45 size += meta
47 .offset_index()
48 .map(parquet_offset_index_heap_size)
49 .unwrap_or(0);
50
51 size
52}
53
54fn file_meta_heap_size(meta: &FileMetaData) -> usize {
56 let mut size = meta.created_by().map(|s| s.len()).unwrap_or(0);
58 size += meta
60 .key_value_metadata()
61 .map(|kvs| {
62 kvs.iter()
63 .map(|kv| {
64 kv.key.len()
65 + kv.value.as_ref().map(|v| v.len()).unwrap_or(0)
66 + mem::size_of::<KeyValue>()
67 })
68 .sum()
69 })
70 .unwrap_or(0);
71 size += mem::size_of::<SchemaDescriptor>();
73 size += schema_descr_heap_size(meta.schema_descr());
74 size += meta
76 .column_orders()
77 .map(|orders| orders.len() * mem::size_of::<ColumnOrder>())
78 .unwrap_or(0);
79
80 size
81}
82
83fn schema_descr_heap_size(descr: &SchemaDescriptor) -> usize {
85 let mut size = mem::size_of::<Type>();
87 size += descr
89 .columns()
90 .iter()
91 .map(|descr| mem::size_of::<ColumnDescriptor>() + column_descr_heap_size(descr))
92 .sum::<usize>();
93 size += descr.num_columns() * mem::size_of::<usize>();
95
96 size
97}
98
99fn column_descr_heap_size(descr: &ColumnDescriptor) -> usize {
101 descr.path().parts().iter().map(|s| s.len()).sum()
102}
103
104fn row_group_meta_heap_size(meta: &RowGroupMetaData) -> usize {
106 mem::size_of_val(meta.columns())
107}
108
109fn parquet_column_index_heap_size(column_index: &ParquetColumnIndex) -> usize {
111 column_index
112 .iter()
113 .map(|row_group| row_group.len() * mem::size_of::<Index>() + mem::size_of_val(row_group))
114 .sum()
115}
116
117fn parquet_offset_index_heap_size(offset_index: &ParquetOffsetIndex) -> usize {
119 offset_index
120 .iter()
121 .map(|row_group| {
122 row_group
123 .iter()
124 .map(|column| {
125 column.page_locations.len() * mem::size_of::<PageLocation>()
126 + mem::size_of_val(column)
127 })
128 .sum::<usize>()
129 + mem::size_of_val(row_group)
130 })
131 .sum()
132}