servers/
metrics.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(not(windows))]
16pub(crate) mod jemalloc;
17
18use std::task::{Context, Poll};
19use std::time::Instant;
20
21use axum::extract::{MatchedPath, Request};
22use axum::middleware::Next;
23use axum::response::IntoResponse;
24use lazy_static::lazy_static;
25use prometheus::{
26    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
27    Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge,
28};
29use tonic::body::BoxBody;
30use tower::{Layer, Service};
31
32pub(crate) const METRIC_DB_LABEL: &str = "db";
33pub(crate) const METRIC_CODE_LABEL: &str = "code";
34pub(crate) const METRIC_TYPE_LABEL: &str = "type";
35pub(crate) const METRIC_PROTOCOL_LABEL: &str = "protocol";
36pub(crate) const METRIC_ERROR_COUNTER_LABEL_MYSQL: &str = "mysql";
37pub(crate) const METRIC_MYSQL_SUBPROTOCOL_LABEL: &str = "subprotocol";
38pub(crate) const METRIC_MYSQL_BINQUERY: &str = "binquery";
39pub(crate) const METRIC_MYSQL_TEXTQUERY: &str = "textquery";
40pub(crate) const METRIC_POSTGRES_SUBPROTOCOL_LABEL: &str = "subprotocol";
41pub(crate) const METRIC_POSTGRES_SIMPLE_QUERY: &str = "simple";
42pub(crate) const METRIC_POSTGRES_EXTENDED_QUERY: &str = "extended";
43pub(crate) const METRIC_METHOD_LABEL: &str = "method";
44pub(crate) const METRIC_PATH_LABEL: &str = "path";
45pub(crate) const METRIC_RESULT_LABEL: &str = "result";
46
47pub(crate) const METRIC_SUCCESS_VALUE: &str = "success";
48pub(crate) const METRIC_FAILURE_VALUE: &str = "failure";
49
50lazy_static! {
51    pub static ref METRIC_ERROR_COUNTER: IntCounterVec = register_int_counter_vec!(
52        "greptime_servers_error",
53        "servers error",
54        &[METRIC_PROTOCOL_LABEL]
55    )
56    .unwrap();
57    /// Http SQL query duration per database.
58    pub static ref METRIC_HTTP_SQL_ELAPSED: HistogramVec = register_histogram_vec!(
59        "greptime_servers_http_sql_elapsed",
60        "servers http sql elapsed",
61        &[METRIC_DB_LABEL],
62        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
63    )
64    .unwrap();
65    /// Http pql query duration per database.
66    pub static ref METRIC_HTTP_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
67        "greptime_servers_http_promql_elapsed",
68        "servers http promql elapsed",
69        &[METRIC_DB_LABEL],
70        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
71    )
72    .unwrap();
73    /// Http logs query duration per database.
74    pub static ref METRIC_HTTP_LOGS_ELAPSED: HistogramVec = register_histogram_vec!(
75        "greptime_servers_http_logs_elapsed",
76        "servers http logs elapsed",
77        &[METRIC_DB_LABEL],
78        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
79    )
80    .unwrap();
81    pub static ref METRIC_AUTH_FAILURE: IntCounterVec = register_int_counter_vec!(
82        "greptime_servers_auth_failure_count",
83        "servers auth failure count",
84        &[METRIC_CODE_LABEL]
85    )
86    .unwrap();
87    /// Http influxdb write duration per database.
88    pub static ref METRIC_HTTP_INFLUXDB_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
89        "greptime_servers_http_influxdb_write_elapsed",
90        "servers http influxdb write elapsed",
91        &[METRIC_DB_LABEL],
92        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
93    )
94    .unwrap();
95    /// Http prometheus write duration per database.
96    pub static ref METRIC_HTTP_PROM_STORE_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
97        "greptime_servers_http_prometheus_write_elapsed",
98        "servers http prometheus write elapsed",
99        &[METRIC_DB_LABEL],
100        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
101    )
102    .unwrap();
103    /// Prometheus remote write codec duration.
104    pub static ref METRIC_HTTP_PROM_STORE_CODEC_ELAPSED: HistogramVec = register_histogram_vec!(
105        "greptime_servers_http_prometheus_codec_elapsed",
106        "servers http prometheus request codec duration",
107        &["type"],
108    )
109    .unwrap();
110    /// Decode duration of prometheus write request.
111    pub static ref METRIC_HTTP_PROM_STORE_DECODE_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
112        .with_label_values(&["decode"]);
113    /// Duration to convert prometheus write request to gRPC request.
114    pub static ref METRIC_HTTP_PROM_STORE_CONVERT_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
115        .with_label_values(&["convert"]);
116        /// The samples count of Prometheus remote write.
117    pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
118        "greptime_servers_prometheus_remote_write_samples",
119        "frontend prometheus remote write samples"
120    )
121    .unwrap();
122    /// Http prometheus read duration per database.
123    pub static ref METRIC_HTTP_PROM_STORE_READ_ELAPSED: HistogramVec = register_histogram_vec!(
124        "greptime_servers_http_prometheus_read_elapsed",
125        "servers http prometheus read elapsed",
126        &[METRIC_DB_LABEL]
127    )
128    .unwrap();
129    /// Http prometheus endpoint query duration per database.
130    pub static ref METRIC_HTTP_PROMETHEUS_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
131        "greptime_servers_http_prometheus_promql_elapsed",
132        "servers http prometheus promql elapsed",
133        &[METRIC_DB_LABEL, METRIC_METHOD_LABEL]
134    )
135    .unwrap();
136    pub static ref METRIC_HTTP_OPENTELEMETRY_METRICS_ELAPSED: HistogramVec =
137        register_histogram_vec!(
138            "greptime_servers_http_otlp_metrics_elapsed",
139            "servers_http_otlp_metrics_elapsed",
140            &[METRIC_DB_LABEL]
141        )
142        .unwrap();
143    pub static ref METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED: HistogramVec =
144        register_histogram_vec!(
145            "greptime_servers_http_otlp_traces_elapsed",
146            "servers http otlp traces elapsed",
147            &[METRIC_DB_LABEL]
148        )
149        .unwrap();
150    pub static ref METRIC_HTTP_OPENTELEMETRY_LOGS_ELAPSED: HistogramVec =
151    register_histogram_vec!(
152        "greptime_servers_http_otlp_logs_elapsed",
153        "servers http otlp logs elapsed",
154        &[METRIC_DB_LABEL]
155    )
156    .unwrap();
157    pub static ref METRIC_HTTP_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
158        "greptime_servers_http_logs_ingestion_counter",
159        "servers http logs ingestion counter",
160        &[METRIC_DB_LABEL]
161    )
162    .unwrap();
163    pub static ref METRIC_HTTP_LOGS_INGESTION_ELAPSED: HistogramVec =
164        register_histogram_vec!(
165            "greptime_servers_http_logs_ingestion_elapsed",
166            "servers http logs ingestion elapsed",
167            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
168        )
169        .unwrap();
170    pub static ref METRIC_LOKI_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
171        "greptime_servers_loki_logs_ingestion_counter",
172        "servers loki logs ingestion counter",
173        &[METRIC_DB_LABEL]
174    )
175    .unwrap();
176    pub static ref METRIC_LOKI_LOGS_INGESTION_ELAPSED: HistogramVec =
177        register_histogram_vec!(
178            "greptime_servers_loki_logs_ingestion_elapsed",
179            "servers loki logs ingestion elapsed",
180            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
181        )
182        .unwrap();
183    pub static ref METRIC_ELASTICSEARCH_LOGS_INGESTION_ELAPSED: HistogramVec =
184        register_histogram_vec!(
185            "greptime_servers_elasticsearch_logs_ingestion_elapsed",
186            "servers elasticsearch logs ingestion elapsed",
187            &[METRIC_DB_LABEL]
188        )
189        .unwrap();
190    pub static ref METRIC_ELASTICSEARCH_LOGS_DOCS_COUNT: IntCounterVec = register_int_counter_vec!(
191        "greptime_servers_elasticsearch_logs_docs_count",
192        "servers elasticsearch logs docs count",
193        &[METRIC_DB_LABEL]
194    )
195    .unwrap();
196
197    pub static ref METRIC_HTTP_LOGS_TRANSFORM_ELAPSED: HistogramVec =
198        register_histogram_vec!(
199            "greptime_servers_http_logs_transform_elapsed",
200            "servers http logs transform elapsed",
201            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
202        )
203        .unwrap();
204    pub static ref METRIC_MYSQL_CONNECTIONS: IntGauge = register_int_gauge!(
205        "greptime_servers_mysql_connection_count",
206        "servers mysql connection count"
207    )
208    .unwrap();
209    pub static ref METRIC_MYSQL_QUERY_TIMER: HistogramVec = register_histogram_vec!(
210        "greptime_servers_mysql_query_elapsed",
211        "servers mysql query elapsed",
212        &[METRIC_MYSQL_SUBPROTOCOL_LABEL, METRIC_DB_LABEL]
213    )
214    .unwrap();
215    pub static ref METRIC_MYSQL_PREPARED_COUNT: IntCounterVec = register_int_counter_vec!(
216        "greptime_servers_mysql_prepared_count",
217        "servers mysql prepared count",
218        &[METRIC_DB_LABEL]
219    )
220    .unwrap();
221    pub static ref METRIC_POSTGRES_CONNECTIONS: IntGauge = register_int_gauge!(
222        "greptime_servers_postgres_connection_count",
223        "servers postgres connection count"
224    )
225    .unwrap();
226    pub static ref METRIC_POSTGRES_QUERY_TIMER: HistogramVec = register_histogram_vec!(
227        "greptime_servers_postgres_query_elapsed",
228        "servers postgres query elapsed",
229        &[METRIC_POSTGRES_SUBPROTOCOL_LABEL, METRIC_DB_LABEL]
230    )
231    .unwrap();
232    pub static ref METRIC_POSTGRES_PREPARED_COUNT: IntCounter = register_int_counter!(
233        "greptime_servers_postgres_prepared_count",
234        "servers postgres prepared count"
235    )
236    .unwrap();
237    pub static ref METRIC_SERVER_GRPC_DB_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
238        "greptime_servers_grpc_db_request_elapsed",
239        "servers grpc db request elapsed",
240        &[METRIC_DB_LABEL, METRIC_TYPE_LABEL, METRIC_CODE_LABEL]
241    )
242    .unwrap();
243    pub static ref METRIC_SERVER_GRPC_PROM_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
244        "greptime_servers_grpc_prom_request_elapsed",
245        "servers grpc prom request elapsed",
246        &[METRIC_DB_LABEL]
247    )
248    .unwrap();
249    pub static ref METRIC_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
250        "greptime_servers_http_requests_total",
251        "servers http requests total",
252        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL]
253    )
254    .unwrap();
255    pub static ref METRIC_HTTP_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
256        "greptime_servers_http_requests_elapsed",
257        "servers http requests elapsed",
258        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL],
259        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
260    )
261    .unwrap();
262    pub static ref METRIC_GRPC_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
263        "greptime_servers_grpc_requests_total",
264        "servers grpc requests total",
265        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL]
266    )
267    .unwrap();
268    pub static ref METRIC_GRPC_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
269        "greptime_servers_grpc_requests_elapsed",
270        "servers grpc requests elapsed",
271        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL],
272        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
273    )
274    .unwrap();
275    pub static ref METRIC_JAEGER_QUERY_ELAPSED: HistogramVec = register_histogram_vec!(
276        "greptime_servers_jaeger_query_elapsed",
277        "servers jaeger query elapsed",
278        &[METRIC_DB_LABEL, METRIC_PATH_LABEL]
279    )
280.unwrap();
281}
282
283// Based on https://github.com/hyperium/tonic/blob/master/examples/src/tower/server.rs
284// See https://github.com/hyperium/tonic/issues/242
285/// A metrics middleware.
286#[derive(Debug, Clone, Default)]
287pub(crate) struct MetricsMiddlewareLayer;
288
289impl<S> Layer<S> for MetricsMiddlewareLayer {
290    type Service = MetricsMiddleware<S>;
291
292    fn layer(&self, service: S) -> Self::Service {
293        MetricsMiddleware { inner: service }
294    }
295}
296
297#[derive(Debug, Clone)]
298pub(crate) struct MetricsMiddleware<S> {
299    inner: S,
300}
301
302impl<S> Service<http::Request<BoxBody>> for MetricsMiddleware<S>
303where
304    S: Service<http::Request<BoxBody>, Response = http::Response<BoxBody>> + Clone + Send + 'static,
305    S::Future: Send + 'static,
306{
307    type Response = S::Response;
308    type Error = S::Error;
309    type Future = futures::future::BoxFuture<'static, Result<Self::Response, Self::Error>>;
310
311    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
312        self.inner.poll_ready(cx)
313    }
314
315    fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
316        // This is necessary because tonic internally uses `tower::buffer::Buffer`.
317        // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
318        // for details on why this is necessary
319        let clone = self.inner.clone();
320        let mut inner = std::mem::replace(&mut self.inner, clone);
321
322        Box::pin(async move {
323            let start = Instant::now();
324            let path = req.uri().path().to_string();
325
326            // Do extra async work here...
327            let response = inner.call(req).await?;
328
329            let latency = start.elapsed().as_secs_f64();
330            let status = response.status().as_u16().to_string();
331
332            let labels = [path.as_str(), status.as_str()];
333            METRIC_GRPC_REQUESTS_TOTAL.with_label_values(&labels).inc();
334            METRIC_GRPC_REQUESTS_ELAPSED
335                .with_label_values(&labels)
336                .observe(latency);
337
338            Ok(response)
339        })
340    }
341}
342
343/// A middleware to record metrics for HTTP.
344// Based on https://github.com/tokio-rs/axum/blob/axum-v0.6.16/examples/prometheus-metrics/src/main.rs
345pub(crate) async fn http_metrics_layer(req: Request, next: Next) -> impl IntoResponse {
346    let start = Instant::now();
347    let path = if let Some(matched_path) = req.extensions().get::<MatchedPath>() {
348        matched_path.as_str().to_owned()
349    } else {
350        req.uri().path().to_owned()
351    };
352    let method = req.method().clone();
353
354    let response = next.run(req).await;
355
356    let latency = start.elapsed().as_secs_f64();
357    let status = response.status().as_u16().to_string();
358    let method_str = method.to_string();
359
360    let labels = [method_str.as_str(), path.as_str(), status.as_str()];
361    METRIC_HTTP_REQUESTS_TOTAL.with_label_values(&labels).inc();
362    METRIC_HTTP_REQUESTS_ELAPSED
363        .with_label_values(&labels)
364        .observe(latency);
365
366    response
367}