Skip to main content

servers/otlp/metrics/
translator.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use ahash::HashMap;
16use lazy_static::lazy_static;
17use otel_arrow_rust::proto::opentelemetry::metrics::v1::Metric;
18use regex::Regex;
19use session::protocol_ctx::{MetricType, OtlpMetricTranslationStrategy};
20
21const UNDERSCORE: &str = "_";
22const DOUBLE_UNDERSCORE: &str = "__";
23const TOTAL: &str = "total";
24const RATIO: &str = "ratio";
25const PER_PREFIX: &str = "per_";
26
27lazy_static! {
28    static ref NON_ALPHA_NUM_CHAR: Regex = Regex::new(r"[^a-zA-Z0-9]").unwrap();
29    static ref UNIT_MAP: HashMap<String, String> = [
30        // Time
31        ("d", "days"),
32        ("h", "hours"),
33        ("min", "minutes"),
34        ("s", "seconds"),
35        ("ms", "milliseconds"),
36        ("us", "microseconds"),
37        ("ns", "nanoseconds"),
38        // Bytes
39        ("By", "bytes"),
40        ("KiBy", "kibibytes"),
41        ("MiBy", "mebibytes"),
42        ("GiBy", "gibibytes"),
43        ("TiBy", "tibibytes"),
44        ("KBy", "kilobytes"),
45        ("MBy", "megabytes"),
46        ("GBy", "gigabytes"),
47        ("TBy", "terabytes"),
48        // SI
49        ("m", "meters"),
50        ("V", "volts"),
51        ("A", "amperes"),
52        ("J", "joules"),
53        ("W", "watts"),
54        ("g", "grams"),
55        // Misc
56        ("Cel", "celsius"),
57        ("Hz", "hertz"),
58        ("1", ""),
59        ("%", "percent"),
60    ]
61    .iter()
62    .map(|(k, v)| (k.to_string(), v.to_string()))
63    .collect();
64    static ref PER_UNIT_MAP: HashMap<String, String> = [
65        ("s", "second"),
66        ("m", "minute"),
67        ("h", "hour"),
68        ("d", "day"),
69        ("w", "week"),
70        ("mo", "month"),
71        ("y", "year"),
72    ]
73    .iter()
74    .map(|(k, v)| (k.to_string(), v.to_string()))
75    .collect();
76}
77
78pub fn translate_metric_name(
79    metric: &Metric,
80    metric_type: &MetricType,
81    strategy: OtlpMetricTranslationStrategy,
82) -> String {
83    match (strategy.should_escape(), strategy.should_add_suffixes()) {
84        (true, true) => normalize_metric_name(metric, metric_type),
85        (true, false) => normalize_metric_name_without_suffixes(&metric.name),
86        (false, true) => build_utf8_metric_name(&metric.name, &metric.unit, metric_type),
87        (false, false) => metric.name.clone(),
88    }
89}
90
91pub fn translate_label_name(name: &str, strategy: OtlpMetricTranslationStrategy) -> String {
92    if strategy.should_escape() {
93        normalize_label_name(name)
94    } else {
95        name.to_string()
96    }
97}
98
99// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_name.go#L55
100pub fn normalize_metric_name(metric: &Metric, metric_type: &MetricType) -> String {
101    normalize_metric_name_with_suffixes(&metric.name, &metric.unit, metric_type)
102}
103
104fn normalize_metric_name_with_suffixes(name: &str, unit: &str, metric_type: &MetricType) -> String {
105    let mut name_tokens = metric_name_tokens(name);
106
107    if !unit.is_empty() {
108        let (main, per) = build_clean_unit_suffix(unit);
109        if let Some(main) = main
110            && !name_tokens.contains(&main)
111        {
112            name_tokens.push(main);
113        }
114        if let Some(per) = per
115            && !name_tokens.contains(&per)
116        {
117            name_tokens.push("per".to_string());
118            name_tokens.push(per);
119        }
120    }
121
122    if matches!(metric_type, MetricType::MonotonicSum) {
123        name_tokens.retain(|t| t != TOTAL);
124        name_tokens.push(TOTAL.to_string());
125    }
126
127    if unit == "1" && matches!(metric_type, MetricType::Gauge) {
128        name_tokens.retain(|t| t != RATIO);
129        name_tokens.push(RATIO.to_string());
130    }
131
132    prefix_digit_metric_name(name_tokens.join(UNDERSCORE))
133}
134
135fn normalize_metric_name_without_suffixes(name: &str) -> String {
136    prefix_digit_metric_name(metric_name_tokens(name).join(UNDERSCORE))
137}
138
139fn metric_name_tokens(name: &str) -> Vec<String> {
140    NON_ALPHA_NUM_CHAR
141        .split(name)
142        .filter_map(|s| {
143            let trimmed = s.trim();
144            if trimmed.is_empty() {
145                None
146            } else {
147                Some(trimmed.to_string())
148            }
149        })
150        .collect()
151}
152
153fn prefix_digit_metric_name(name: String) -> String {
154    if let Some((_, first)) = name.char_indices().next()
155        && first.is_ascii_digit()
156    {
157        format!("_{}", name)
158    } else {
159        name
160    }
161}
162
163fn build_utf8_metric_name(input_name: &str, unit: &str, metric_type: &MetricType) -> String {
164    let mut name = input_name.to_string();
165
166    let append_ratio = unit == "1" && matches!(metric_type, MetricType::Gauge);
167    if append_ratio {
168        name = trim_suffix_and_delimiter(&name, RATIO);
169    }
170
171    let append_total = matches!(metric_type, MetricType::MonotonicSum);
172    if append_total {
173        name = trim_suffix_and_delimiter(&name, TOTAL);
174    }
175
176    let (main_unit_suffix, per_unit_suffix) = build_unit_suffixes(unit);
177    let append_per = !per_unit_suffix.is_empty();
178    if append_per {
179        name = trim_suffix_and_delimiter(&name, &per_unit_suffix);
180    }
181
182    if !main_unit_suffix.is_empty() && !name.ends_with(&main_unit_suffix) {
183        name.push('_');
184        name.push_str(&main_unit_suffix);
185    }
186    if append_per {
187        name.push('_');
188        name.push_str(&per_unit_suffix);
189    }
190    if append_total {
191        name.push_str("_total");
192    }
193    if append_ratio {
194        name.push_str("_ratio");
195    }
196
197    name
198}
199
200fn trim_suffix_and_delimiter(name: &str, suffix: &str) -> String {
201    name.strip_suffix(suffix)
202        .and_then(|prefix| prefix.strip_suffix('_'))
203        .filter(|prefix| !prefix.is_empty())
204        .unwrap_or(name)
205        .to_string()
206}
207
208fn build_clean_unit_suffix(unit: &str) -> (Option<String>, Option<String>) {
209    let (main, per) = build_unit_suffixes(unit);
210    let main = clean_unit_name(&main);
211    let per = per
212        .strip_prefix(PER_PREFIX)
213        .map(clean_unit_name)
214        .unwrap_or_default();
215
216    (
217        (!main.is_empty()).then_some(main),
218        (!per.is_empty()).then_some(per),
219    )
220}
221
222fn build_unit_suffixes(unit: &str) -> (String, String) {
223    let (main, per) = unit.split_once('/').unwrap_or((unit, ""));
224    let main_unit_suffix = unit_suffix(main, &UNIT_MAP);
225    let per_unit_suffix = unit_suffix(per, &PER_UNIT_MAP);
226
227    if per_unit_suffix.is_empty() {
228        (main_unit_suffix, per_unit_suffix)
229    } else {
230        (main_unit_suffix, format!("{PER_PREFIX}{per_unit_suffix}"))
231    }
232}
233
234fn unit_suffix(unit_str: &str, unit_map: &HashMap<String, String>) -> String {
235    let unit = unit_str.trim();
236    if unit.is_empty() || unit.contains('{') || unit.contains('}') {
237        return String::new();
238    }
239
240    unit_map
241        .get(unit)
242        .map(|s| s.as_ref())
243        .unwrap_or(unit)
244        .to_string()
245}
246
247pub(crate) fn clean_unit_name(name: &str) -> String {
248    NON_ALPHA_NUM_CHAR
249        .split(name)
250        .filter(|s| !s.is_empty())
251        .collect::<Vec<&str>>()
252        .join(UNDERSCORE)
253        .trim_matches('_')
254        .to_string()
255}
256
257// See https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/145942706622aba5c276ca47f48df438228bfea4/pkg/translator/prometheus/normalize_label.go#L27
258pub fn normalize_label_name(name: &str) -> String {
259    if name.is_empty() {
260        return name.to_string();
261    }
262
263    let n = NON_ALPHA_NUM_CHAR.replace_all(name, UNDERSCORE);
264    if let Some((_, first)) = n.char_indices().next()
265        && first.is_ascii_digit()
266    {
267        return format!("key_{}", n);
268    }
269    if n.starts_with(UNDERSCORE) && !n.starts_with(DOUBLE_UNDERSCORE) {
270        return format!("key{}", n);
271    }
272    n.to_string()
273}
274
275/// Normalize otlp instrumentation, metric and attribute names
276///
277/// <https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument-name-syntax>
278/// - since the name are case-insensitive, we transform them to lowercase for
279///   better sql usability
280/// - replace `.` and `-` with `_`
281pub fn legacy_normalize_otlp_name(name: &str) -> String {
282    name.to_lowercase().replace(['.', '-'], "_")
283}
284
285#[cfg(test)]
286mod tests {
287    use otel_arrow_rust::proto::opentelemetry::metrics::v1::Metric;
288    use session::protocol_ctx::OtlpMetricTranslationStrategy::{
289        NoTranslation, NoUtf8EscapingWithSuffixes, UnderscoreEscapingWithSuffixes,
290        UnderscoreEscapingWithoutSuffixes,
291    };
292
293    use super::*;
294
295    #[test]
296    fn test_legacy_normalize_otlp_name() {
297        assert_eq!(
298            legacy_normalize_otlp_name("jvm.memory.free"),
299            "jvm_memory_free"
300        );
301        assert_eq!(
302            legacy_normalize_otlp_name("jvm-memory-free"),
303            "jvm_memory_free"
304        );
305        assert_eq!(
306            legacy_normalize_otlp_name("jvm_memory_free"),
307            "jvm_memory_free"
308        );
309        assert_eq!(
310            legacy_normalize_otlp_name("JVM_MEMORY_FREE"),
311            "jvm_memory_free"
312        );
313        assert_eq!(
314            legacy_normalize_otlp_name("JVM_memory_FREE"),
315            "jvm_memory_free"
316        );
317    }
318
319    #[test]
320    fn test_translate_metric_name_strategies() {
321        let metric = Metric {
322            name: "http.server.duration_total".to_string(),
323            unit: "s".to_string(),
324            ..Default::default()
325        };
326
327        assert_eq!(
328            translate_metric_name(
329                &metric,
330                &MetricType::MonotonicSum,
331                UnderscoreEscapingWithSuffixes
332            ),
333            "http_server_duration_seconds_total"
334        );
335        assert_eq!(
336            translate_metric_name(
337                &metric,
338                &MetricType::MonotonicSum,
339                UnderscoreEscapingWithoutSuffixes,
340            ),
341            "http_server_duration_total"
342        );
343        assert_eq!(
344            translate_metric_name(
345                &metric,
346                &MetricType::MonotonicSum,
347                NoUtf8EscapingWithSuffixes
348            ),
349            "http.server.duration_seconds_total"
350        );
351        assert_eq!(
352            translate_metric_name(&metric, &MetricType::MonotonicSum, NoTranslation),
353            "http.server.duration_total"
354        );
355    }
356
357    #[test]
358    fn test_translate_metric_name_no_utf8_suffix_ordering() {
359        let metric = Metric {
360            name: "request.rate_per_second_total".to_string(),
361            unit: "1/s".to_string(),
362            ..Default::default()
363        };
364        assert_eq!(
365            translate_metric_name(
366                &metric,
367                &MetricType::MonotonicSum,
368                NoUtf8EscapingWithSuffixes
369            ),
370            "request.rate_per_second_total"
371        );
372
373        let metric = Metric {
374            name: "cpu.utilization_ratio".to_string(),
375            unit: "1".to_string(),
376            ..Default::default()
377        };
378        assert_eq!(
379            translate_metric_name(&metric, &MetricType::Gauge, NoUtf8EscapingWithSuffixes),
380            "cpu.utilization_ratio"
381        );
382
383        let metric = Metric {
384            name: "subtotal".to_string(),
385            ..Default::default()
386        };
387        assert_eq!(
388            translate_metric_name(
389                &metric,
390                &MetricType::MonotonicSum,
391                NoUtf8EscapingWithSuffixes
392            ),
393            "subtotal_total"
394        );
395
396        let metric = Metric {
397            name: "utilizationratio".to_string(),
398            unit: "1".to_string(),
399            ..Default::default()
400        };
401        assert_eq!(
402            translate_metric_name(&metric, &MetricType::Gauge, NoUtf8EscapingWithSuffixes),
403            "utilizationratio_ratio"
404        );
405    }
406
407    #[test]
408    fn test_translate_metric_name_prometheus_style_units_for_all_strategies() {
409        let cases = [
410            (
411                Metric {
412                    name: "duration.latency".to_string(),
413                    unit: "ms".to_string(),
414                    ..Default::default()
415                },
416                MetricType::Gauge,
417                [
418                    (
419                        UnderscoreEscapingWithSuffixes,
420                        "duration_latency_milliseconds",
421                    ),
422                    (UnderscoreEscapingWithoutSuffixes, "duration_latency"),
423                    (NoUtf8EscapingWithSuffixes, "duration.latency_milliseconds"),
424                    (NoTranslation, "duration.latency"),
425                ],
426            ),
427            (
428                Metric {
429                    name: "disk.io".to_string(),
430                    unit: "By".to_string(),
431                    ..Default::default()
432                },
433                MetricType::MonotonicSum,
434                [
435                    (UnderscoreEscapingWithSuffixes, "disk_io_bytes_total"),
436                    (UnderscoreEscapingWithoutSuffixes, "disk_io"),
437                    (NoUtf8EscapingWithSuffixes, "disk.io_bytes_total"),
438                    (NoTranslation, "disk.io"),
439                ],
440            ),
441            (
442                Metric {
443                    name: "cpu.utilization".to_string(),
444                    unit: "%".to_string(),
445                    ..Default::default()
446                },
447                MetricType::Gauge,
448                [
449                    (UnderscoreEscapingWithSuffixes, "cpu_utilization_percent"),
450                    (UnderscoreEscapingWithoutSuffixes, "cpu_utilization"),
451                    (NoUtf8EscapingWithSuffixes, "cpu.utilization_percent"),
452                    (NoTranslation, "cpu.utilization"),
453                ],
454            ),
455            (
456                Metric {
457                    name: "request.rate".to_string(),
458                    unit: "1/s".to_string(),
459                    ..Default::default()
460                },
461                MetricType::MonotonicSum,
462                [
463                    (
464                        UnderscoreEscapingWithSuffixes,
465                        "request_rate_per_second_total",
466                    ),
467                    (UnderscoreEscapingWithoutSuffixes, "request_rate"),
468                    (NoUtf8EscapingWithSuffixes, "request.rate_per_second_total"),
469                    (NoTranslation, "request.rate"),
470                ],
471            ),
472            (
473                Metric {
474                    name: "queue.depth".to_string(),
475                    unit: "{items}".to_string(),
476                    ..Default::default()
477                },
478                MetricType::Gauge,
479                [
480                    (UnderscoreEscapingWithSuffixes, "queue_depth"),
481                    (UnderscoreEscapingWithoutSuffixes, "queue_depth"),
482                    (NoUtf8EscapingWithSuffixes, "queue.depth"),
483                    (NoTranslation, "queue.depth"),
484                ],
485            ),
486        ];
487
488        for (metric, metric_type, expectations) in cases {
489            for (strategy, expected) in expectations {
490                assert_eq!(
491                    translate_metric_name(&metric, &metric_type, strategy),
492                    expected,
493                    "metric: {}, unit: {}, type: {:?}, strategy: {:?}",
494                    metric.name,
495                    metric.unit,
496                    metric_type,
497                    strategy
498                );
499            }
500        }
501    }
502
503    #[test]
504    fn test_translate_label_name_strategies() {
505        assert_eq!(
506            translate_label_name("service.name", UnderscoreEscapingWithSuffixes),
507            "service_name"
508        );
509        assert_eq!(
510            translate_label_name("_foo", UnderscoreEscapingWithoutSuffixes),
511            "key_foo"
512        );
513        assert_eq!(
514            translate_label_name("service.name", NoUtf8EscapingWithSuffixes),
515            "service.name"
516        );
517        assert_eq!(translate_label_name("_foo", NoTranslation), "_foo");
518    }
519
520    #[test]
521    fn test_clean_unit_name() {
522        assert_eq!(clean_unit_name("faults"), "faults");
523        assert_eq!(clean_unit_name("{faults}"), "faults");
524        assert_eq!(clean_unit_name("req/sec"), "req_sec");
525        assert_eq!(clean_unit_name("m/s"), "m_s");
526        assert_eq!(clean_unit_name("___test___"), "test");
527        assert_eq!(
528            clean_unit_name("multiple__underscores"),
529            "multiple_underscores"
530        );
531        assert_eq!(clean_unit_name(""), "");
532        assert_eq!(clean_unit_name("___"), "");
533        assert_eq!(clean_unit_name("bytes.per.second"), "bytes_per_second");
534    }
535}