use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use common_catalog::consts::DEFAULT_SCHEMA_NAME;
use common_error::ext::BoxedError;
use common_telemetry::{error, info, warn};
use snafu::{OptionExt, ResultExt};
use tokio::sync::Semaphore;
use tokio::time::Instant;
use crate::database::{parse_proxy_opts, DatabaseClient};
use crate::error::{Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
use crate::{database, Tool};
#[derive(Debug, Default, Clone, ValueEnum)]
enum ImportTarget {
Schema,
Data,
#[default]
All,
}
#[derive(Debug, Default, Parser)]
pub struct ImportCommand {
#[clap(long)]
addr: String,
#[clap(long)]
input_dir: String,
#[clap(long, default_value = "greptime-*")]
database: String,
#[clap(long, short = 'j', default_value = "1")]
import_jobs: usize,
#[clap(long, default_value = "3")]
max_retry: usize,
#[clap(long, short = 't', value_enum, default_value = "all")]
target: ImportTarget,
#[clap(long)]
auth_basic: Option<String>,
#[clap(long, value_parser = humantime::parse_duration)]
timeout: Option<Duration>,
#[clap(long)]
proxy: Option<String>,
#[clap(long, default_value = "false")]
no_proxy: bool,
}
impl ImportCommand {
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
let (catalog, schema) =
database::split_database(&self.database).map_err(BoxedError::new)?;
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
let database_client = DatabaseClient::new(
self.addr.clone(),
catalog.clone(),
self.auth_basic.clone(),
self.timeout.unwrap_or_default(),
proxy,
);
Ok(Box::new(Import {
catalog,
schema,
database_client,
input_dir: self.input_dir.clone(),
parallelism: self.import_jobs,
target: self.target.clone(),
}))
}
}
pub struct Import {
catalog: String,
schema: Option<String>,
database_client: DatabaseClient,
input_dir: String,
parallelism: usize,
target: ImportTarget,
}
impl Import {
async fn import_create_table(&self) -> Result<()> {
self.do_sql_job("create_database.sql", Some(DEFAULT_SCHEMA_NAME))
.await?;
self.do_sql_job("create_tables.sql", None).await
}
async fn import_database_data(&self) -> Result<()> {
self.do_sql_job("copy_from.sql", None).await
}
async fn do_sql_job(&self, filename: &str, exec_db: Option<&str>) -> Result<()> {
let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_count);
for schema in db_names {
let semaphore_moved = semaphore.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let database_input_dir = self.catalog_path().join(&schema);
let sql_file = database_input_dir.join(filename);
let sql = tokio::fs::read_to_string(sql_file)
.await
.context(FileIoSnafu)?;
if sql.is_empty() {
info!("Empty `{filename}` {database_input_dir:?}");
} else {
let db = exec_db.unwrap_or(&schema);
self.database_client.sql(&sql, db).await?;
info!("Imported `{filename}` for database {schema}");
}
Ok::<(), Error>(())
})
}
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "import {filename} job failed");
false
}
})
.count();
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} `{filename}` jobs, cost: {elapsed:?}");
Ok(())
}
fn catalog_path(&self) -> PathBuf {
PathBuf::from(&self.input_dir).join(&self.catalog)
}
async fn get_db_names(&self) -> Result<Vec<String>> {
let db_names = self.all_db_names().await?;
let Some(schema) = &self.schema else {
return Ok(db_names);
};
db_names
.into_iter()
.find(|db_name| db_name.to_lowercase() == schema.to_lowercase())
.map(|name| vec![name])
.context(SchemaNotFoundSnafu {
catalog: &self.catalog,
schema,
})
}
async fn all_db_names(&self) -> Result<Vec<String>> {
let mut db_names = vec![];
let path = self.catalog_path();
let mut entries = tokio::fs::read_dir(path).await.context(FileIoSnafu)?;
while let Some(entry) = entries.next_entry().await.context(FileIoSnafu)? {
let path = entry.path();
if path.is_dir() {
let db_name = match path.file_name() {
Some(name) => name.to_string_lossy().to_string(),
None => {
warn!("Failed to get the file name of {:?}", path);
continue;
}
};
db_names.push(db_name);
}
}
Ok(db_names)
}
}
#[async_trait]
impl Tool for Import {
async fn do_work(&self) -> std::result::Result<(), BoxedError> {
match self.target {
ImportTarget::Schema => self.import_create_table().await.map_err(BoxedError::new),
ImportTarget::Data => self.import_database_data().await.map_err(BoxedError::new),
ImportTarget::All => {
self.import_create_table().await.map_err(BoxedError::new)?;
self.import_database_data().await.map_err(BoxedError::new)
}
}
}
}