frontend/instance/
prom_store.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::sync::Arc;
17
18use api::prom_store::remote::read_request::ResponseType;
19use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse};
20use api::v1::RowInsertRequests;
21use async_trait::async_trait;
22use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
23use client::OutputData;
24use common_catalog::format_full_table_name;
25use common_error::ext::BoxedError;
26use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
27use common_query::Output;
28use common_recordbatch::RecordBatches;
29use common_telemetry::{debug, tracing};
30use operator::insert::InserterRef;
31use operator::statement::StatementExecutor;
32use prost::Message;
33use servers::error::{self, AuthSnafu, Result as ServerResult};
34use servers::http::header::{collect_plan_metrics, CONTENT_ENCODING_SNAPPY, CONTENT_TYPE_PROTOBUF};
35use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
36use servers::interceptor::{PromStoreProtocolInterceptor, PromStoreProtocolInterceptorRef};
37use servers::prom_store::{self, Metrics};
38use servers::query_handler::{
39    PromStoreProtocolHandler, PromStoreProtocolHandlerRef, PromStoreResponse,
40};
41use session::context::QueryContextRef;
42use snafu::{OptionExt, ResultExt};
43
44use crate::error::{
45    CatalogSnafu, ExecLogicalPlanSnafu, PromStoreRemoteQueryPlanSnafu, ReadTableSnafu, Result,
46    TableNotFoundSnafu,
47};
48use crate::instance::Instance;
49
50const SAMPLES_RESPONSE_TYPE: i32 = ResponseType::Samples as i32;
51
52#[inline]
53fn is_supported(response_type: i32) -> bool {
54    // Only supports samples response right now
55    response_type == SAMPLES_RESPONSE_TYPE
56}
57
58/// Negotiating the content type of the remote read response.
59///
60/// Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
61/// implemented by server, error is returned.
62/// For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
63fn negotiate_response_type(accepted_response_types: &[i32]) -> ServerResult<ResponseType> {
64    if accepted_response_types.is_empty() {
65        return Ok(ResponseType::Samples);
66    }
67
68    let response_type = accepted_response_types
69        .iter()
70        .find(|t| is_supported(**t))
71        .with_context(|| error::NotSupportedSnafu {
72            feat: format!(
73                "server does not support any of the requested response types: {accepted_response_types:?}",
74            ),
75        })?;
76
77    // It's safe to unwrap here, we known that it should be SAMPLES_RESPONSE_TYPE
78    Ok(ResponseType::try_from(*response_type).unwrap())
79}
80
81async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
82    let OutputData::Stream(stream) = output.data else {
83        unreachable!()
84    };
85    let recordbatches = RecordBatches::try_collect(stream)
86        .await
87        .context(error::CollectRecordbatchSnafu)?;
88    Ok(QueryResult {
89        timeseries: prom_store::recordbatches_to_timeseries(table_name, recordbatches)?,
90    })
91}
92
93impl Instance {
94    #[tracing::instrument(skip_all)]
95    async fn handle_remote_query(
96        &self,
97        ctx: &QueryContextRef,
98        catalog_name: &str,
99        schema_name: &str,
100        table_name: &str,
101        query: &Query,
102    ) -> Result<Output> {
103        let table = self
104            .catalog_manager
105            .table(catalog_name, schema_name, table_name, Some(ctx))
106            .await
107            .context(CatalogSnafu)?
108            .with_context(|| TableNotFoundSnafu {
109                table_name: format_full_table_name(catalog_name, schema_name, table_name),
110            })?;
111
112        let dataframe = self
113            .query_engine
114            .read_table(table)
115            .with_context(|_| ReadTableSnafu {
116                table_name: format_full_table_name(catalog_name, schema_name, table_name),
117            })?;
118
119        let logical_plan =
120            prom_store::query_to_plan(dataframe, query).context(PromStoreRemoteQueryPlanSnafu)?;
121
122        debug!(
123            "Prometheus remote read, table: {}, logical plan: {}",
124            table_name,
125            logical_plan.display_indent(),
126        );
127
128        self.query_engine
129            .execute(logical_plan, ctx.clone())
130            .await
131            .context(ExecLogicalPlanSnafu)
132    }
133
134    #[tracing::instrument(skip_all)]
135    async fn handle_remote_queries(
136        &self,
137        ctx: QueryContextRef,
138        queries: &[Query],
139    ) -> ServerResult<Vec<(String, Output)>> {
140        let mut results = Vec::with_capacity(queries.len());
141
142        let catalog_name = ctx.current_catalog();
143        let schema_name = ctx.current_schema();
144
145        for query in queries {
146            let table_name = prom_store::table_name(query)?;
147
148            let output = self
149                .handle_remote_query(&ctx, catalog_name, &schema_name, &table_name, query)
150                .await
151                .map_err(BoxedError::new)
152                .context(error::ExecuteQuerySnafu)?;
153
154            results.push((table_name, output));
155        }
156        Ok(results)
157    }
158}
159
160#[async_trait]
161impl PromStoreProtocolHandler for Instance {
162    async fn write(
163        &self,
164        request: RowInsertRequests,
165        ctx: QueryContextRef,
166        with_metric_engine: bool,
167    ) -> ServerResult<Output> {
168        self.plugins
169            .get::<PermissionCheckerRef>()
170            .as_ref()
171            .check_permission(ctx.current_user(), PermissionReq::PromStoreWrite)
172            .context(AuthSnafu)?;
173        let interceptor_ref = self
174            .plugins
175            .get::<PromStoreProtocolInterceptorRef<servers::error::Error>>();
176        interceptor_ref.pre_write(&request, ctx.clone())?;
177
178        let _guard = if let Some(limiter) = &self.limiter {
179            Some(
180                limiter
181                    .limit_row_inserts(&request)
182                    .await
183                    .map_err(BoxedError::new)
184                    .context(error::OtherSnafu)?,
185            )
186        } else {
187            None
188        };
189
190        let output = if with_metric_engine {
191            let physical_table = ctx
192                .extension(PHYSICAL_TABLE_PARAM)
193                .unwrap_or(GREPTIME_PHYSICAL_TABLE)
194                .to_string();
195            self.handle_metric_row_inserts(request, ctx.clone(), physical_table.to_string())
196                .await
197                .map_err(BoxedError::new)
198                .context(error::ExecuteGrpcQuerySnafu)?
199        } else {
200            self.handle_row_inserts(request, ctx.clone(), true, true)
201                .await
202                .map_err(BoxedError::new)
203                .context(error::ExecuteGrpcQuerySnafu)?
204        };
205
206        Ok(output)
207    }
208
209    async fn read(
210        &self,
211        request: ReadRequest,
212        ctx: QueryContextRef,
213    ) -> ServerResult<PromStoreResponse> {
214        self.plugins
215            .get::<PermissionCheckerRef>()
216            .as_ref()
217            .check_permission(ctx.current_user(), PermissionReq::PromStoreRead)
218            .context(AuthSnafu)?;
219        let interceptor_ref = self
220            .plugins
221            .get::<PromStoreProtocolInterceptorRef<servers::error::Error>>();
222        interceptor_ref.pre_read(&request, ctx.clone())?;
223
224        let response_type = negotiate_response_type(&request.accepted_response_types)?;
225
226        // TODO(dennis): use read_hints to speedup query if possible
227        let results = self.handle_remote_queries(ctx, &request.queries).await?;
228
229        match response_type {
230            ResponseType::Samples => {
231                let mut query_results = Vec::with_capacity(results.len());
232                let mut map = HashMap::new();
233                for (table_name, output) in results {
234                    let plan = output.meta.plan.clone();
235                    query_results.push(to_query_result(&table_name, output).await?);
236                    if let Some(ref plan) = plan {
237                        collect_plan_metrics(plan, &mut [&mut map]);
238                    }
239                }
240
241                let response = ReadResponse {
242                    results: query_results,
243                };
244
245                let resp_metrics = map
246                    .into_iter()
247                    .map(|(k, v)| (k, v.into()))
248                    .collect::<HashMap<_, _>>();
249
250                // TODO(dennis): may consume too much memory, adds flow control
251                Ok(PromStoreResponse {
252                    content_type: CONTENT_TYPE_PROTOBUF.clone(),
253                    content_encoding: CONTENT_ENCODING_SNAPPY.clone(),
254                    resp_metrics,
255                    body: prom_store::snappy_compress(&response.encode_to_vec())?,
256                })
257            }
258            ResponseType::StreamedXorChunks => error::NotSupportedSnafu {
259                feat: "streamed remote read",
260            }
261            .fail(),
262        }
263    }
264
265    async fn ingest_metrics(&self, _metrics: Metrics) -> ServerResult<()> {
266        todo!();
267    }
268}
269
270/// This handler is mainly used for `frontend` or `standalone` to directly import
271/// the metrics collected by itself, thereby avoiding importing metrics through the network,
272/// thus reducing compression and network transmission overhead,
273/// so only implement `PromStoreProtocolHandler::write` method.
274pub struct ExportMetricHandler {
275    inserter: InserterRef,
276    statement_executor: Arc<StatementExecutor>,
277}
278
279impl ExportMetricHandler {
280    pub fn new_handler(
281        inserter: InserterRef,
282        statement_executor: Arc<StatementExecutor>,
283    ) -> PromStoreProtocolHandlerRef {
284        Arc::new(Self {
285            inserter,
286            statement_executor,
287        })
288    }
289}
290
291#[async_trait]
292impl PromStoreProtocolHandler for ExportMetricHandler {
293    async fn write(
294        &self,
295        request: RowInsertRequests,
296        ctx: QueryContextRef,
297        _: bool,
298    ) -> ServerResult<Output> {
299        self.inserter
300            .handle_metric_row_inserts(
301                request,
302                ctx,
303                &self.statement_executor,
304                GREPTIME_PHYSICAL_TABLE.to_string(),
305            )
306            .await
307            .map_err(BoxedError::new)
308            .context(error::ExecuteGrpcQuerySnafu)
309    }
310
311    async fn read(
312        &self,
313        _request: ReadRequest,
314        _ctx: QueryContextRef,
315    ) -> ServerResult<PromStoreResponse> {
316        unreachable!();
317    }
318
319    async fn ingest_metrics(&self, _metrics: Metrics) -> ServerResult<()> {
320        unreachable!();
321    }
322}