mito2/cache/
cache_size.rs1use std::mem;
18
19use parquet::file::metadata::{
20 FileMetaData, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex, RowGroupMetaData,
21};
22use parquet::file::page_index::index::Index;
23use parquet::format::{ColumnOrder, KeyValue, PageLocation};
24use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
25
26pub fn parquet_meta_size(meta: &ParquetMetaData) -> usize {
28 let mut size = mem::size_of::<ParquetMetaData>();
30 size += file_meta_heap_size(meta.file_metadata());
32 size += meta
34 .row_groups()
35 .iter()
36 .map(row_group_meta_heap_size)
37 .sum::<usize>();
38 size += meta
40 .column_index()
41 .map(parquet_column_index_heap_size)
42 .unwrap_or(0);
43 size += meta
45 .offset_index()
46 .map(parquet_offset_index_heap_size)
47 .unwrap_or(0);
48
49 size
50}
51
52fn file_meta_heap_size(meta: &FileMetaData) -> usize {
54 let mut size = meta.created_by().map(|s| s.len()).unwrap_or(0);
56 size += meta
58 .key_value_metadata()
59 .map(|kvs| {
60 kvs.iter()
61 .map(|kv| {
62 kv.key.len()
63 + kv.value.as_ref().map(|v| v.len()).unwrap_or(0)
64 + mem::size_of::<KeyValue>()
65 })
66 .sum()
67 })
68 .unwrap_or(0);
69 size += mem::size_of::<SchemaDescriptor>();
71 size += schema_descr_heap_size(meta.schema_descr());
72 size += meta
74 .column_orders()
75 .map(|orders| orders.len() * mem::size_of::<ColumnOrder>())
76 .unwrap_or(0);
77
78 size
79}
80
81fn schema_descr_heap_size(descr: &SchemaDescriptor) -> usize {
83 let mut size = mem::size_of::<Type>();
85 size += descr
87 .columns()
88 .iter()
89 .map(|descr| mem::size_of::<ColumnDescriptor>() + column_descr_heap_size(descr))
90 .sum::<usize>();
91 size += descr.num_columns() * mem::size_of::<usize>();
93
94 size
95}
96
97fn column_descr_heap_size(descr: &ColumnDescriptor) -> usize {
99 descr.path().parts().iter().map(|s| s.len()).sum()
100}
101
102fn row_group_meta_heap_size(meta: &RowGroupMetaData) -> usize {
104 mem::size_of_val(meta.columns())
105}
106
107fn parquet_column_index_heap_size(column_index: &ParquetColumnIndex) -> usize {
109 column_index
110 .iter()
111 .map(|row_group| row_group.len() * mem::size_of::<Index>() + mem::size_of_val(row_group))
112 .sum()
113}
114
115fn parquet_offset_index_heap_size(offset_index: &ParquetOffsetIndex) -> usize {
117 offset_index
118 .iter()
119 .map(|row_group| {
120 row_group
121 .iter()
122 .map(|column| {
123 column.page_locations.len() * mem::size_of::<PageLocation>()
124 + mem::size_of_val(column)
125 })
126 .sum::<usize>()
127 + mem::size_of_val(row_group)
128 })
129 .sum()
130}