servers/
metrics.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(not(windows))]
16pub(crate) mod jemalloc;
17
18use std::task::{Context, Poll};
19use std::time::Instant;
20
21use axum::extract::{MatchedPath, Request};
22use axum::middleware::Next;
23use axum::response::IntoResponse;
24use lazy_static::lazy_static;
25use prometheus::{
26    register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
27    register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge,
28};
29use tonic::body::BoxBody;
30use tower::{Layer, Service};
31
32pub(crate) const METRIC_DB_LABEL: &str = "db";
33pub(crate) const METRIC_CODE_LABEL: &str = "code";
34pub(crate) const METRIC_TYPE_LABEL: &str = "type";
35pub(crate) const METRIC_PROTOCOL_LABEL: &str = "protocol";
36pub(crate) const METRIC_ERROR_COUNTER_LABEL_MYSQL: &str = "mysql";
37pub(crate) const METRIC_MYSQL_SUBPROTOCOL_LABEL: &str = "subprotocol";
38pub(crate) const METRIC_MYSQL_BINQUERY: &str = "binquery";
39pub(crate) const METRIC_MYSQL_TEXTQUERY: &str = "textquery";
40pub(crate) const METRIC_POSTGRES_SUBPROTOCOL_LABEL: &str = "subprotocol";
41pub(crate) const METRIC_POSTGRES_SIMPLE_QUERY: &str = "simple";
42pub(crate) const METRIC_POSTGRES_EXTENDED_QUERY: &str = "extended";
43pub(crate) const METRIC_METHOD_LABEL: &str = "method";
44pub(crate) const METRIC_PATH_LABEL: &str = "path";
45pub(crate) const METRIC_RESULT_LABEL: &str = "result";
46
47pub(crate) const METRIC_SUCCESS_VALUE: &str = "success";
48pub(crate) const METRIC_FAILURE_VALUE: &str = "failure";
49
50lazy_static! {
51    pub static ref METRIC_ERROR_COUNTER: IntCounterVec = register_int_counter_vec!(
52        "greptime_servers_error",
53        "servers error",
54        &[METRIC_PROTOCOL_LABEL]
55    )
56    .unwrap();
57    /// Http SQL query duration per database.
58    pub static ref METRIC_HTTP_SQL_ELAPSED: HistogramVec = register_histogram_vec!(
59        "greptime_servers_http_sql_elapsed",
60        "servers http sql elapsed",
61        &[METRIC_DB_LABEL],
62        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
63    )
64    .unwrap();
65    /// Http pql query duration per database.
66    pub static ref METRIC_HTTP_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
67        "greptime_servers_http_promql_elapsed",
68        "servers http promql elapsed",
69        &[METRIC_DB_LABEL],
70        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
71    )
72    .unwrap();
73    /// Http logs query duration per database.
74    pub static ref METRIC_HTTP_LOGS_ELAPSED: HistogramVec = register_histogram_vec!(
75        "greptime_servers_http_logs_elapsed",
76        "servers http logs elapsed",
77        &[METRIC_DB_LABEL],
78        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
79    )
80    .unwrap();
81    pub static ref METRIC_AUTH_FAILURE: IntCounterVec = register_int_counter_vec!(
82        "greptime_servers_auth_failure_count",
83        "servers auth failure count",
84        &[METRIC_CODE_LABEL]
85    )
86    .unwrap();
87    /// Http influxdb write duration per database.
88    pub static ref METRIC_HTTP_INFLUXDB_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
89        "greptime_servers_http_influxdb_write_elapsed",
90        "servers http influxdb write elapsed",
91        &[METRIC_DB_LABEL],
92        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
93    )
94    .unwrap();
95    /// Http prometheus write duration per database.
96    pub static ref METRIC_HTTP_PROM_STORE_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
97        "greptime_servers_http_prometheus_write_elapsed",
98        "servers http prometheus write elapsed",
99        &[METRIC_DB_LABEL],
100        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
101    )
102    .unwrap();
103    /// Prometheus remote write codec duration.
104    pub static ref METRIC_HTTP_PROM_STORE_CODEC_ELAPSED: HistogramVec = register_histogram_vec!(
105        "greptime_servers_http_prometheus_codec_elapsed",
106        "servers http prometheus request codec duration",
107        &["type"],
108    )
109    .unwrap();
110    /// Decode duration of prometheus write request.
111    pub static ref METRIC_HTTP_PROM_STORE_DECODE_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
112        .with_label_values(&["decode"]);
113    /// Duration to convert prometheus write request to gRPC request.
114    pub static ref METRIC_HTTP_PROM_STORE_CONVERT_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
115        .with_label_values(&["convert"]);
116        /// The samples count of Prometheus remote write.
117    pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
118        "greptime_servers_prometheus_remote_write_samples",
119        "frontend prometheus remote write samples"
120    )
121    .unwrap();
122    /// Http prometheus read duration per database.
123    pub static ref METRIC_HTTP_PROM_STORE_READ_ELAPSED: HistogramVec = register_histogram_vec!(
124        "greptime_servers_http_prometheus_read_elapsed",
125        "servers http prometheus read elapsed",
126        &[METRIC_DB_LABEL]
127    )
128    .unwrap();
129    /// Http prometheus endpoint query duration per database.
130    pub static ref METRIC_HTTP_PROMETHEUS_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
131        "greptime_servers_http_prometheus_promql_elapsed",
132        "servers http prometheus promql elapsed",
133        &[METRIC_DB_LABEL, METRIC_METHOD_LABEL]
134    )
135    .unwrap();
136    pub static ref METRIC_HTTP_OPENTELEMETRY_METRICS_ELAPSED: HistogramVec =
137        register_histogram_vec!(
138            "greptime_servers_http_otlp_metrics_elapsed",
139            "servers_http_otlp_metrics_elapsed",
140            &[METRIC_DB_LABEL]
141        )
142        .unwrap();
143    pub static ref METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED: HistogramVec =
144        register_histogram_vec!(
145            "greptime_servers_http_otlp_traces_elapsed",
146            "servers http otlp traces elapsed",
147            &[METRIC_DB_LABEL]
148        )
149        .unwrap();
150    pub static ref METRIC_HTTP_OPENTELEMETRY_LOGS_ELAPSED: HistogramVec =
151    register_histogram_vec!(
152        "greptime_servers_http_otlp_logs_elapsed",
153        "servers http otlp logs elapsed",
154        &[METRIC_DB_LABEL]
155    )
156    .unwrap();
157    pub static ref METRIC_HTTP_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
158        "greptime_servers_http_logs_ingestion_counter",
159        "servers http logs ingestion counter",
160        &[METRIC_DB_LABEL]
161    )
162    .unwrap();
163    pub static ref METRIC_HTTP_LOGS_INGESTION_ELAPSED: HistogramVec =
164        register_histogram_vec!(
165            "greptime_servers_http_logs_ingestion_elapsed",
166            "servers http logs ingestion elapsed",
167            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
168        )
169        .unwrap();
170    pub static ref METRIC_LOKI_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
171        "greptime_servers_loki_logs_ingestion_counter",
172        "servers loki logs ingestion counter",
173        &[METRIC_DB_LABEL]
174    )
175    .unwrap();
176    pub static ref METRIC_LOKI_LOGS_INGESTION_ELAPSED: HistogramVec =
177        register_histogram_vec!(
178            "greptime_servers_loki_logs_ingestion_elapsed",
179            "servers loki logs ingestion elapsed",
180            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
181        )
182        .unwrap();
183    pub static ref METRIC_ELASTICSEARCH_LOGS_INGESTION_ELAPSED: HistogramVec =
184        register_histogram_vec!(
185            "greptime_servers_elasticsearch_logs_ingestion_elapsed",
186            "servers elasticsearch logs ingestion elapsed",
187            &[METRIC_DB_LABEL]
188        )
189        .unwrap();
190    pub static ref METRIC_ELASTICSEARCH_LOGS_DOCS_COUNT: IntCounterVec = register_int_counter_vec!(
191        "greptime_servers_elasticsearch_logs_docs_count",
192        "servers elasticsearch logs docs count",
193        &[METRIC_DB_LABEL]
194    )
195    .unwrap();
196
197    pub static ref METRIC_HTTP_LOGS_TRANSFORM_ELAPSED: HistogramVec =
198        register_histogram_vec!(
199            "greptime_servers_http_logs_transform_elapsed",
200            "servers http logs transform elapsed",
201            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
202        )
203        .unwrap();
204    pub static ref METRIC_MYSQL_CONNECTIONS: IntGauge = register_int_gauge!(
205        "greptime_servers_mysql_connection_count",
206        "servers mysql connection count"
207    )
208    .unwrap();
209    pub static ref METRIC_MYSQL_QUERY_TIMER: HistogramVec = register_histogram_vec!(
210        "greptime_servers_mysql_query_elapsed",
211        "servers mysql query elapsed",
212        &[METRIC_MYSQL_SUBPROTOCOL_LABEL, METRIC_DB_LABEL],
213        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
214    )
215    .unwrap();
216    pub static ref METRIC_MYSQL_PREPARED_COUNT: IntCounterVec = register_int_counter_vec!(
217        "greptime_servers_mysql_prepared_count",
218        "servers mysql prepared count",
219        &[METRIC_DB_LABEL]
220    )
221    .unwrap();
222    pub static ref METRIC_POSTGRES_CONNECTIONS: IntGauge = register_int_gauge!(
223        "greptime_servers_postgres_connection_count",
224        "servers postgres connection count"
225    )
226    .unwrap();
227    pub static ref METRIC_POSTGRES_QUERY_TIMER: HistogramVec = register_histogram_vec!(
228        "greptime_servers_postgres_query_elapsed",
229        "servers postgres query elapsed",
230        &[METRIC_POSTGRES_SUBPROTOCOL_LABEL, METRIC_DB_LABEL],
231        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
232    )
233    .unwrap();
234    pub static ref METRIC_POSTGRES_PREPARED_COUNT: IntCounter = register_int_counter!(
235        "greptime_servers_postgres_prepared_count",
236        "servers postgres prepared count"
237    )
238    .unwrap();
239    pub static ref METRIC_SERVER_GRPC_DB_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
240        "greptime_servers_grpc_db_request_elapsed",
241        "servers grpc db request elapsed",
242        &[METRIC_DB_LABEL, METRIC_TYPE_LABEL, METRIC_CODE_LABEL]
243    )
244    .unwrap();
245    pub static ref METRIC_SERVER_GRPC_PROM_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
246        "greptime_servers_grpc_prom_request_elapsed",
247        "servers grpc prom request elapsed",
248        &[METRIC_DB_LABEL],
249        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
250    )
251    .unwrap();
252    pub static ref METRIC_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
253        "greptime_servers_http_requests_total",
254        "servers http requests total",
255        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL]
256    )
257    .unwrap();
258    pub static ref METRIC_HTTP_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
259        "greptime_servers_http_requests_elapsed",
260        "servers http requests elapsed",
261        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL],
262        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
263    )
264    .unwrap();
265    pub static ref METRIC_GRPC_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
266        "greptime_servers_grpc_requests_total",
267        "servers grpc requests total",
268        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL]
269    )
270    .unwrap();
271    pub static ref METRIC_GRPC_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
272        "greptime_servers_grpc_requests_elapsed",
273        "servers grpc requests elapsed",
274        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL],
275        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
276    )
277    .unwrap();
278    pub static ref METRIC_JAEGER_QUERY_ELAPSED: HistogramVec = register_histogram_vec!(
279        "greptime_servers_jaeger_query_elapsed",
280        "servers jaeger query elapsed",
281        &[METRIC_DB_LABEL, METRIC_PATH_LABEL]
282    ).unwrap();
283
284    pub static ref GRPC_BULK_INSERT_ELAPSED: Histogram = register_histogram!(
285        "greptime_servers_bulk_insert_elapsed",
286        "servers handle bulk insert elapsed",
287    ).unwrap();
288}
289
290// Based on https://github.com/hyperium/tonic/blob/master/examples/src/tower/server.rs
291// See https://github.com/hyperium/tonic/issues/242
292/// A metrics middleware.
293#[derive(Debug, Clone, Default)]
294pub(crate) struct MetricsMiddlewareLayer;
295
296impl<S> Layer<S> for MetricsMiddlewareLayer {
297    type Service = MetricsMiddleware<S>;
298
299    fn layer(&self, service: S) -> Self::Service {
300        MetricsMiddleware { inner: service }
301    }
302}
303
304#[derive(Debug, Clone)]
305pub(crate) struct MetricsMiddleware<S> {
306    inner: S,
307}
308
309impl<S> Service<http::Request<BoxBody>> for MetricsMiddleware<S>
310where
311    S: Service<http::Request<BoxBody>, Response = http::Response<BoxBody>> + Clone + Send + 'static,
312    S::Future: Send + 'static,
313{
314    type Response = S::Response;
315    type Error = S::Error;
316    type Future = futures::future::BoxFuture<'static, Result<Self::Response, Self::Error>>;
317
318    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
319        self.inner.poll_ready(cx)
320    }
321
322    fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
323        // This is necessary because tonic internally uses `tower::buffer::Buffer`.
324        // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
325        // for details on why this is necessary
326        let clone = self.inner.clone();
327        let mut inner = std::mem::replace(&mut self.inner, clone);
328
329        Box::pin(async move {
330            let start = Instant::now();
331            let path = req.uri().path().to_string();
332
333            // Do extra async work here...
334            let response = inner.call(req).await?;
335
336            let latency = start.elapsed().as_secs_f64();
337            let status = response.status().as_u16().to_string();
338
339            let labels = [path.as_str(), status.as_str()];
340            METRIC_GRPC_REQUESTS_TOTAL.with_label_values(&labels).inc();
341            METRIC_GRPC_REQUESTS_ELAPSED
342                .with_label_values(&labels)
343                .observe(latency);
344
345            Ok(response)
346        })
347    }
348}
349
350/// A middleware to record metrics for HTTP.
351// Based on https://github.com/tokio-rs/axum/blob/axum-v0.6.16/examples/prometheus-metrics/src/main.rs
352pub(crate) async fn http_metrics_layer(req: Request, next: Next) -> impl IntoResponse {
353    let start = Instant::now();
354    let path = if let Some(matched_path) = req.extensions().get::<MatchedPath>() {
355        matched_path.as_str().to_owned()
356    } else {
357        req.uri().path().to_owned()
358    };
359    let method = req.method().clone();
360
361    let response = next.run(req).await;
362
363    let latency = start.elapsed().as_secs_f64();
364    let status = response.status().as_u16().to_string();
365    let method_str = method.to_string();
366
367    let labels = [method_str.as_str(), path.as_str(), status.as_str()];
368    METRIC_HTTP_REQUESTS_TOTAL.with_label_values(&labels).inc();
369    METRIC_HTTP_REQUESTS_ELAPSED
370        .with_label_values(&labels)
371        .observe(latency);
372
373    response
374}