catalog/system_schema/information_schema/
cluster_info.rsuse std::sync::{Arc, Weak};
use std::time::Duration;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::cluster::NodeInfo;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::timestamp::Timestamp;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value;
use datatypes::vectors::{
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
};
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::CLUSTER_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils;
use crate::CatalogManager;
const PEER_ID: &str = "peer_id";
const PEER_TYPE: &str = "peer_type";
const PEER_ADDR: &str = "peer_addr";
const VERSION: &str = "version";
const GIT_COMMIT: &str = "git_commit";
const START_TIME: &str = "start_time";
const UPTIME: &str = "uptime";
const ACTIVE_TIME: &str = "active_time";
const INIT_CAPACITY: usize = 42;
pub(super) struct InformationSchemaClusterInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaClusterInfo {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(PEER_ID, ConcreteDataType::int64_datatype(), false),
ColumnSchema::new(PEER_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(VERSION, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(GIT_COMMIT, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
START_TIME,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
),
ColumnSchema::new(UPTIME, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(ACTIVE_TIME, ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaClusterInfoBuilder {
InformationSchemaClusterInfoBuilder::new(self.schema.clone(), self.catalog_manager.clone())
}
}
impl InformationTable for InformationSchemaClusterInfo {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID
}
fn table_name(&self) -> &'static str {
CLUSTER_INFO
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_cluster_info(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaClusterInfoBuilder {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
peer_ids: Int64VectorBuilder,
peer_types: StringVectorBuilder,
peer_addrs: StringVectorBuilder,
versions: StringVectorBuilder,
git_commits: StringVectorBuilder,
start_times: TimestampMillisecondVectorBuilder,
uptimes: StringVectorBuilder,
active_times: StringVectorBuilder,
}
impl InformationSchemaClusterInfoBuilder {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
peer_ids: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
versions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
git_commits: StringVectorBuilder::with_capacity(INIT_CAPACITY),
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
uptimes: StringVectorBuilder::with_capacity(INIT_CAPACITY),
active_times: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
async fn make_cluster_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let information_extension = utils::information_extension(&self.catalog_manager)?;
let node_infos = information_extension.nodes().await?;
for node_info in node_infos {
self.add_node_info(&predicates, node_info);
}
self.finish()
}
fn add_node_info(&mut self, predicates: &Predicates, node_info: NodeInfo) {
let peer_type = node_info.status.role_name();
let row = [
(PEER_ID, &Value::from(node_info.peer.id)),
(PEER_TYPE, &Value::from(peer_type)),
(PEER_ADDR, &Value::from(node_info.peer.addr.as_str())),
(VERSION, &Value::from(node_info.version.as_str())),
(GIT_COMMIT, &Value::from(node_info.git_commit.as_str())),
];
if !predicates.eval(&row) {
return;
}
if peer_type == "FRONTEND" || peer_type == "METASRV" {
self.peer_ids.push(Some(-1));
} else {
self.peer_ids.push(Some(node_info.peer.id as i64));
}
self.peer_types.push(Some(peer_type));
self.peer_addrs.push(Some(&node_info.peer.addr));
self.versions.push(Some(&node_info.version));
self.git_commits.push(Some(&node_info.git_commit));
if node_info.start_time_ms > 0 {
self.start_times
.push(Some(TimestampMillisecond(Timestamp::new_millisecond(
node_info.start_time_ms as i64,
))));
self.uptimes.push(Some(
Self::format_duration_since(node_info.start_time_ms).as_str(),
));
} else {
self.start_times.push(None);
self.uptimes.push(None);
}
if node_info.last_activity_ts > 0 {
self.active_times.push(Some(
Self::format_duration_since(node_info.last_activity_ts as u64).as_str(),
));
} else {
self.active_times.push(None);
}
}
fn format_duration_since(ts: u64) -> String {
let now = common_time::util::current_time_millis() as u64;
let duration_since = now - ts;
humantime::format_duration(Duration::from_millis(duration_since)).to_string()
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.peer_ids.finish()),
Arc::new(self.peer_types.finish()),
Arc::new(self.peer_addrs.finish()),
Arc::new(self.versions.finish()),
Arc::new(self.git_commits.finish()),
Arc::new(self.start_times.finish()),
Arc::new(self.uptimes.finish()),
Arc::new(self.active_times.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaClusterInfo {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_cluster_info(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}