servers/
metrics.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(not(windows))]
16pub(crate) mod jemalloc;
17
18use std::task::{Context, Poll};
19use std::time::Instant;
20
21use axum::extract::{MatchedPath, Request};
22use axum::middleware::Next;
23use axum::response::IntoResponse;
24use lazy_static::lazy_static;
25use prometheus::{
26    register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
27    register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge,
28};
29use session::context::QueryContext;
30use tonic::body::BoxBody;
31use tower::{Layer, Service};
32
33pub(crate) const METRIC_DB_LABEL: &str = "db";
34pub(crate) const METRIC_CODE_LABEL: &str = "code";
35pub(crate) const METRIC_TYPE_LABEL: &str = "type";
36pub(crate) const METRIC_PROTOCOL_LABEL: &str = "protocol";
37pub(crate) const METRIC_ERROR_COUNTER_LABEL_MYSQL: &str = "mysql";
38pub(crate) const METRIC_MYSQL_SUBPROTOCOL_LABEL: &str = "subprotocol";
39pub(crate) const METRIC_MYSQL_BINQUERY: &str = "binquery";
40pub(crate) const METRIC_MYSQL_TEXTQUERY: &str = "textquery";
41pub(crate) const METRIC_POSTGRES_SUBPROTOCOL_LABEL: &str = "subprotocol";
42pub(crate) const METRIC_POSTGRES_SIMPLE_QUERY: &str = "simple";
43pub(crate) const METRIC_POSTGRES_EXTENDED_QUERY: &str = "extended";
44pub(crate) const METRIC_METHOD_LABEL: &str = "method";
45pub(crate) const METRIC_PATH_LABEL: &str = "path";
46pub(crate) const METRIC_RESULT_LABEL: &str = "result";
47
48pub(crate) const METRIC_SUCCESS_VALUE: &str = "success";
49pub(crate) const METRIC_FAILURE_VALUE: &str = "failure";
50
51lazy_static! {
52
53    pub static ref HTTP_REQUEST_COUNTER: IntCounterVec = register_int_counter_vec!(
54        "greptime_servers_http_request_counter",
55        "servers http request counter",
56        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
57    ).unwrap();
58
59    pub static ref METRIC_ERROR_COUNTER: IntCounterVec = register_int_counter_vec!(
60        "greptime_servers_error",
61        "servers error",
62        &[METRIC_PROTOCOL_LABEL]
63    )
64    .unwrap();
65    /// Http SQL query duration per database.
66    pub static ref METRIC_HTTP_SQL_ELAPSED: HistogramVec = register_histogram_vec!(
67        "greptime_servers_http_sql_elapsed",
68        "servers http sql elapsed",
69        &[METRIC_DB_LABEL],
70        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
71    )
72    .unwrap();
73    /// Http pql query duration per database.
74    pub static ref METRIC_HTTP_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
75        "greptime_servers_http_promql_elapsed",
76        "servers http promql elapsed",
77        &[METRIC_DB_LABEL],
78        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
79    )
80    .unwrap();
81    /// Http logs query duration per database.
82    pub static ref METRIC_HTTP_LOGS_ELAPSED: HistogramVec = register_histogram_vec!(
83        "greptime_servers_http_logs_elapsed",
84        "servers http logs elapsed",
85        &[METRIC_DB_LABEL],
86        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
87    )
88    .unwrap();
89    pub static ref METRIC_AUTH_FAILURE: IntCounterVec = register_int_counter_vec!(
90        "greptime_servers_auth_failure_count",
91        "servers auth failure count",
92        &[METRIC_CODE_LABEL]
93    )
94    .unwrap();
95    /// Http influxdb write duration per database.
96    pub static ref METRIC_HTTP_INFLUXDB_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
97        "greptime_servers_http_influxdb_write_elapsed",
98        "servers http influxdb write elapsed",
99        &[METRIC_DB_LABEL],
100        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
101    )
102    .unwrap();
103    /// Http prometheus write duration per database.
104    pub static ref METRIC_HTTP_PROM_STORE_WRITE_ELAPSED: HistogramVec = register_histogram_vec!(
105        "greptime_servers_http_prometheus_write_elapsed",
106        "servers http prometheus write elapsed",
107        &[METRIC_DB_LABEL],
108        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
109    )
110    .unwrap();
111    /// Prometheus remote write codec duration.
112    pub static ref METRIC_HTTP_PROM_STORE_CODEC_ELAPSED: HistogramVec = register_histogram_vec!(
113        "greptime_servers_http_prometheus_codec_elapsed",
114        "servers http prometheus request codec duration",
115        &["type"],
116    )
117    .unwrap();
118    /// Decode duration of prometheus write request.
119    pub static ref METRIC_HTTP_PROM_STORE_DECODE_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
120        .with_label_values(&["decode"]);
121    /// Duration to convert prometheus write request to gRPC request.
122    pub static ref METRIC_HTTP_PROM_STORE_CONVERT_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
123        .with_label_values(&["convert"]);
124        /// The samples count of Prometheus remote write.
125    pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounterVec = register_int_counter_vec!(
126        "greptime_servers_prometheus_remote_write_samples",
127        "frontend prometheus remote write samples",
128        &[METRIC_DB_LABEL]
129    )
130    .unwrap();
131    /// Http prometheus read duration per database.
132    pub static ref METRIC_HTTP_PROM_STORE_READ_ELAPSED: HistogramVec = register_histogram_vec!(
133        "greptime_servers_http_prometheus_read_elapsed",
134        "servers http prometheus read elapsed",
135        &[METRIC_DB_LABEL]
136    )
137    .unwrap();
138    /// Http prometheus endpoint query duration per database.
139    pub static ref METRIC_HTTP_PROMETHEUS_PROMQL_ELAPSED: HistogramVec = register_histogram_vec!(
140        "greptime_servers_http_prometheus_promql_elapsed",
141        "servers http prometheus promql elapsed",
142        &[METRIC_DB_LABEL, METRIC_METHOD_LABEL]
143    )
144    .unwrap();
145    pub static ref METRIC_HTTP_OPENTELEMETRY_METRICS_ELAPSED: HistogramVec =
146        register_histogram_vec!(
147            "greptime_servers_http_otlp_metrics_elapsed",
148            "servers_http_otlp_metrics_elapsed",
149            &[METRIC_DB_LABEL]
150        )
151        .unwrap();
152    pub static ref METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED: HistogramVec =
153        register_histogram_vec!(
154            "greptime_servers_http_otlp_traces_elapsed",
155            "servers http otlp traces elapsed",
156            &[METRIC_DB_LABEL]
157        )
158        .unwrap();
159    pub static ref METRIC_HTTP_OPENTELEMETRY_LOGS_ELAPSED: HistogramVec =
160    register_histogram_vec!(
161        "greptime_servers_http_otlp_logs_elapsed",
162        "servers http otlp logs elapsed",
163        &[METRIC_DB_LABEL]
164    )
165    .unwrap();
166    pub static ref METRIC_HTTP_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
167        "greptime_servers_http_logs_ingestion_counter",
168        "servers http logs ingestion counter",
169        &[METRIC_DB_LABEL]
170    )
171    .unwrap();
172    pub static ref METRIC_HTTP_LOGS_INGESTION_ELAPSED: HistogramVec =
173        register_histogram_vec!(
174            "greptime_servers_http_logs_ingestion_elapsed",
175            "servers http logs ingestion elapsed",
176            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
177        )
178        .unwrap();
179
180    /// Count of logs ingested into Loki.
181    pub static ref METRIC_LOKI_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
182        "greptime_servers_loki_logs_ingestion_counter",
183        "servers loki logs ingestion counter",
184        &[METRIC_DB_LABEL]
185    )
186    .unwrap();
187    pub static ref METRIC_LOKI_LOGS_INGESTION_ELAPSED: HistogramVec =
188        register_histogram_vec!(
189            "greptime_servers_loki_logs_ingestion_elapsed",
190            "servers loki logs ingestion elapsed",
191            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
192        )
193        .unwrap();
194    pub static ref METRIC_ELASTICSEARCH_LOGS_INGESTION_ELAPSED: HistogramVec =
195        register_histogram_vec!(
196            "greptime_servers_elasticsearch_logs_ingestion_elapsed",
197            "servers elasticsearch logs ingestion elapsed",
198            &[METRIC_DB_LABEL]
199        )
200        .unwrap();
201
202    /// Count of documents ingested into Elasticsearch logs.
203    pub static ref METRIC_ELASTICSEARCH_LOGS_DOCS_COUNT: IntCounterVec = register_int_counter_vec!(
204        "greptime_servers_elasticsearch_logs_docs_count",
205        "servers elasticsearch ingest logs docs count",
206        &[METRIC_DB_LABEL]
207    )
208    .unwrap();
209
210    pub static ref METRIC_HTTP_LOGS_TRANSFORM_ELAPSED: HistogramVec =
211        register_histogram_vec!(
212            "greptime_servers_http_logs_transform_elapsed",
213            "servers http logs transform elapsed",
214            &[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
215        )
216        .unwrap();
217    pub static ref METRIC_MYSQL_CONNECTIONS: IntGauge = register_int_gauge!(
218        "greptime_servers_mysql_connection_count",
219        "servers mysql connection count"
220    )
221    .unwrap();
222    pub static ref METRIC_MYSQL_QUERY_TIMER: HistogramVec = register_histogram_vec!(
223        "greptime_servers_mysql_query_elapsed",
224        "servers mysql query elapsed",
225        &[METRIC_MYSQL_SUBPROTOCOL_LABEL, METRIC_DB_LABEL],
226        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
227    )
228    .unwrap();
229    pub static ref METRIC_MYSQL_PREPARED_COUNT: IntCounterVec = register_int_counter_vec!(
230        "greptime_servers_mysql_prepared_count",
231        "servers mysql prepared count",
232        &[METRIC_DB_LABEL]
233    )
234    .unwrap();
235    pub static ref METRIC_POSTGRES_CONNECTIONS: IntGauge = register_int_gauge!(
236        "greptime_servers_postgres_connection_count",
237        "servers postgres connection count"
238    )
239    .unwrap();
240    pub static ref METRIC_POSTGRES_QUERY_TIMER: HistogramVec = register_histogram_vec!(
241        "greptime_servers_postgres_query_elapsed",
242        "servers postgres query elapsed",
243        &[METRIC_POSTGRES_SUBPROTOCOL_LABEL, METRIC_DB_LABEL],
244        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
245    )
246    .unwrap();
247    pub static ref METRIC_POSTGRES_PREPARED_COUNT: IntCounter = register_int_counter!(
248        "greptime_servers_postgres_prepared_count",
249        "servers postgres prepared count"
250    )
251    .unwrap();
252    pub static ref METRIC_SERVER_GRPC_DB_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
253        "greptime_servers_grpc_db_request_elapsed",
254        "servers grpc db request elapsed",
255        &[METRIC_DB_LABEL, METRIC_TYPE_LABEL, METRIC_CODE_LABEL]
256    )
257    .unwrap();
258    pub static ref METRIC_SERVER_GRPC_PROM_REQUEST_TIMER: HistogramVec = register_histogram_vec!(
259        "greptime_servers_grpc_prom_request_elapsed",
260        "servers grpc prom request elapsed",
261        &[METRIC_DB_LABEL],
262        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
263    )
264    .unwrap();
265    pub static ref METRIC_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
266        "greptime_servers_http_requests_total",
267        "servers http requests total",
268        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
269    )
270    .unwrap();
271    pub static ref METRIC_HTTP_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
272        "greptime_servers_http_requests_elapsed",
273        "servers http requests elapsed",
274        &[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL],
275        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
276    )
277    .unwrap();
278    pub static ref METRIC_GRPC_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
279        "greptime_servers_grpc_requests_total",
280        "servers grpc requests total",
281        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL]
282    )
283    .unwrap();
284    pub static ref METRIC_GRPC_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
285        "greptime_servers_grpc_requests_elapsed",
286        "servers grpc requests elapsed",
287        &[METRIC_PATH_LABEL, METRIC_CODE_LABEL],
288        vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
289    )
290    .unwrap();
291    pub static ref METRIC_JAEGER_QUERY_ELAPSED: HistogramVec = register_histogram_vec!(
292        "greptime_servers_jaeger_query_elapsed",
293        "servers jaeger query elapsed",
294        &[METRIC_DB_LABEL, METRIC_PATH_LABEL]
295    ).unwrap();
296
297    pub static ref GRPC_BULK_INSERT_ELAPSED: Histogram = register_histogram!(
298        "greptime_servers_bulk_insert_elapsed",
299        "servers handle bulk insert elapsed",
300    ).unwrap();
301}
302
303// Based on https://github.com/hyperium/tonic/blob/master/examples/src/tower/server.rs
304// See https://github.com/hyperium/tonic/issues/242
305/// A metrics middleware.
306#[derive(Debug, Clone, Default)]
307pub(crate) struct MetricsMiddlewareLayer;
308
309impl<S> Layer<S> for MetricsMiddlewareLayer {
310    type Service = MetricsMiddleware<S>;
311
312    fn layer(&self, service: S) -> Self::Service {
313        MetricsMiddleware { inner: service }
314    }
315}
316
317#[derive(Debug, Clone)]
318pub(crate) struct MetricsMiddleware<S> {
319    inner: S,
320}
321
322impl<S> Service<http::Request<BoxBody>> for MetricsMiddleware<S>
323where
324    S: Service<http::Request<BoxBody>, Response = http::Response<BoxBody>> + Clone + Send + 'static,
325    S::Future: Send + 'static,
326{
327    type Response = S::Response;
328    type Error = S::Error;
329    type Future = futures::future::BoxFuture<'static, Result<Self::Response, Self::Error>>;
330
331    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
332        self.inner.poll_ready(cx)
333    }
334
335    fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
336        // This is necessary because tonic internally uses `tower::buffer::Buffer`.
337        // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
338        // for details on why this is necessary
339        let clone = self.inner.clone();
340        let mut inner = std::mem::replace(&mut self.inner, clone);
341
342        Box::pin(async move {
343            let start = Instant::now();
344            let path = req.uri().path().to_string();
345
346            // Do extra async work here...
347            let response = inner.call(req).await?;
348
349            let latency = start.elapsed().as_secs_f64();
350            let status = response.status().as_u16().to_string();
351
352            let labels = [path.as_str(), status.as_str()];
353            METRIC_GRPC_REQUESTS_TOTAL.with_label_values(&labels).inc();
354            METRIC_GRPC_REQUESTS_ELAPSED
355                .with_label_values(&labels)
356                .observe(latency);
357
358            Ok(response)
359        })
360    }
361}
362
363/// A middleware to record metrics for HTTP.
364// Based on https://github.com/tokio-rs/axum/blob/axum-v0.6.16/examples/prometheus-metrics/src/main.rs
365pub(crate) async fn http_metrics_layer(req: Request, next: Next) -> impl IntoResponse {
366    let start = Instant::now();
367    let path = if let Some(matched_path) = req.extensions().get::<MatchedPath>() {
368        matched_path.as_str().to_string()
369    } else {
370        req.uri().path().to_string()
371    };
372    let method = req.method().clone();
373
374    let db = req
375        .extensions()
376        .get::<QueryContext>()
377        .map(|ctx| ctx.get_db_string())
378        .unwrap_or_else(|| "unknown".to_string());
379
380    let response = next.run(req).await;
381
382    let latency = start.elapsed().as_secs_f64();
383    let status = response.status();
384    let status = status.as_str();
385    let method_str = method.as_str();
386
387    let labels = [method_str, &path, status, db.as_str()];
388    METRIC_HTTP_REQUESTS_TOTAL.with_label_values(&labels).inc();
389    METRIC_HTTP_REQUESTS_ELAPSED
390        .with_label_values(&labels)
391        .observe(latency);
392
393    response
394}