Skip to main content

servers/otlp/metrics/
semantic.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Per-table semantic metadata accumulated during one OTLP metrics encode pass.
16//!
17//! A metric emits one or more tables (histogram and summary fan out into
18//! `_bucket`/`_sum`/`_count` companions). Each emitted table collects the
19//! metric's scalar semantic keys. The resulting index is serialized onto the
20//! `greptime.internal.semantic.per_table_index` context extension and folded
21//! into each table's options at auto-create time.
22//!
23//! Conflict handling follows the RFC: when two sources disagree on a
24//! single-valued key the value collapses to `mixed` (or `unknown` for keys whose
25//! domain has no `mixed`).
26
27use std::collections::{BTreeMap, HashMap};
28
29use table::requests::{SEMANTIC_VALUE_MIXED, SEMANTIC_VALUE_UNKNOWN, validate_semantic_option};
30
31/// Index of `{table_name -> {semantic_key -> value}}` built while encoding.
32#[derive(Debug, Default)]
33pub struct SemanticIndex {
34    /// Per-table scalar keys; conflicting values collapse to `mixed`/`unknown`.
35    tables: HashMap<String, BTreeMap<&'static str, String>>,
36}
37
38impl SemanticIndex {
39    pub fn is_empty(&self) -> bool {
40        self.tables.is_empty()
41    }
42
43    /// Records a scalar semantic key for `table`. A value conflicting with one
44    /// already recorded collapses the key to `mixed`/`unknown`; once collapsed
45    /// it stays collapsed.
46    pub fn record_scalar(&mut self, table: &str, key: &'static str, value: &str) {
47        // Avoid allocating the table name (and an empty map) on the common path
48        // where the table is already present.
49        if let Some(scalars) = self.tables.get_mut(table) {
50            match scalars.get(key).map(String::as_str) {
51                Some(existing) if existing == value => {}
52                Some(SEMANTIC_VALUE_MIXED) | Some(SEMANTIC_VALUE_UNKNOWN) => {}
53                Some(_) => {
54                    scalars.insert(key, collapse_value(key));
55                }
56                None => {
57                    scalars.insert(key, value.to_string());
58                }
59            }
60        } else {
61            self.tables.insert(
62                table.to_string(),
63                BTreeMap::from([(key, value.to_string())]),
64            );
65        }
66    }
67
68    /// Serializes to the JSON `{table -> {key -> value}}` carried on the context
69    /// extension, or `None` when nothing was recorded.
70    pub fn encode(&self) -> Option<String> {
71        if self.tables.is_empty() {
72            return None;
73        }
74        serde_json::to_string(&self.tables).ok()
75    }
76
77    #[cfg(test)]
78    fn options_of(&self, table: &str) -> Option<&BTreeMap<&'static str, String>> {
79        self.tables.get(table)
80    }
81}
82
83/// The collapsed value for a conflicting scalar key: `mixed` when the key's
84/// domain accepts it, else `unknown`. Uses the vocabulary validator as the
85/// single source of truth for which keys allow `mixed`.
86fn collapse_value(key: &str) -> String {
87    if validate_semantic_option(key, SEMANTIC_VALUE_MIXED) {
88        SEMANTIC_VALUE_MIXED.to_string()
89    } else {
90        SEMANTIC_VALUE_UNKNOWN.to_string()
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use table::requests::{
97        SEMANTIC_METRIC_METADATA_QUALITY, SEMANTIC_METRIC_TYPE, SEMANTIC_METRIC_UNIT,
98    };
99
100    use super::*;
101
102    #[test]
103    fn test_scalar_recording_keeps_first_then_collapses_on_conflict() {
104        let mut index = SemanticIndex::default();
105        index.record_scalar("t", SEMANTIC_METRIC_TYPE, "counter");
106        index.record_scalar("t", SEMANTIC_METRIC_TYPE, "counter");
107        assert_eq!(
108            index
109                .options_of("t")
110                .unwrap()
111                .get(SEMANTIC_METRIC_TYPE)
112                .map(String::as_str),
113            Some("counter")
114        );
115
116        // Conflict on a key whose domain has `mixed` collapses to `mixed`.
117        index.record_scalar("t", SEMANTIC_METRIC_TYPE, "gauge");
118        assert_eq!(
119            index
120                .options_of("t")
121                .unwrap()
122                .get(SEMANTIC_METRIC_TYPE)
123                .map(String::as_str),
124            Some("mixed")
125        );
126        // Further writes stay collapsed.
127        index.record_scalar("t", SEMANTIC_METRIC_TYPE, "histogram");
128        assert_eq!(
129            index
130                .options_of("t")
131                .unwrap()
132                .get(SEMANTIC_METRIC_TYPE)
133                .map(String::as_str),
134            Some("mixed")
135        );
136    }
137
138    #[test]
139    fn test_scalar_conflict_without_mixed_domain_collapses_to_unknown() {
140        let mut index = SemanticIndex::default();
141        index.record_scalar("t", SEMANTIC_METRIC_METADATA_QUALITY, "declared");
142        index.record_scalar("t", SEMANTIC_METRIC_METADATA_QUALITY, "inferred");
143        // metadata_quality accepts only declared/inferred/unknown, so a conflict
144        // is `unknown`.
145        assert_eq!(
146            index
147                .options_of("t")
148                .unwrap()
149                .get(SEMANTIC_METRIC_METADATA_QUALITY)
150                .map(String::as_str),
151            Some("unknown")
152        );
153    }
154
155    #[test]
156    fn test_encode_is_none_when_empty_and_round_trips() {
157        let index = SemanticIndex::default();
158        assert!(index.is_empty());
159        assert_eq!(index.encode(), None);
160
161        let mut index = SemanticIndex::default();
162        index.record_scalar("metric_a", SEMANTIC_METRIC_TYPE, "counter");
163        index.record_scalar("metric_a", SEMANTIC_METRIC_UNIT, "By");
164        let json = index.encode().unwrap();
165        let parsed: BTreeMap<String, BTreeMap<String, String>> =
166            serde_json::from_str(&json).unwrap();
167        let table = parsed.get("metric_a").unwrap();
168        assert_eq!(
169            table.get(SEMANTIC_METRIC_TYPE).map(String::as_str),
170            Some("counter")
171        );
172        assert_eq!(
173            table.get(SEMANTIC_METRIC_UNIT).map(String::as_str),
174            Some("By")
175        );
176    }
177}