1use std::collections::HashMap;
16use std::fmt::Display;
17use std::fs::OpenOptions;
18use std::io;
19use std::io::Write;
20use std::path::{Path, PathBuf};
21use std::process::{Child, Command};
22use std::sync::atomic::{AtomicU32, Ordering};
23use std::sync::{Arc, Mutex};
24use std::time::Duration;
25
26use async_trait::async_trait;
27use sqlness::{Database, EnvController, QueryContext};
28use tokio::sync::Mutex as TokioMutex;
29
30use crate::client::MultiProtocolClient;
31use crate::cmd::bare::ServerAddr;
32use crate::formatter::{ErrorFormatter, MysqlFormatter, OutputFormatter, PostgresqlFormatter};
33use crate::protocol_interceptor::{MYSQL, PROTOCOL_KEY};
34use crate::server_mode::ServerMode;
35use crate::util;
36use crate::util::{PROGRAM, get_workspace_root, maybe_pull_binary};
37
38const SERVER_MODE_STANDALONE_IDX: usize = 0;
40const SERVER_MODE_METASRV_IDX: usize = 0;
42const SERVER_MODE_DATANODE_START_IDX: usize = 1;
43const SERVER_MODE_FRONTEND_IDX: usize = 4;
44const SERVER_MODE_FLOWNODE_IDX: usize = 5;
45
46#[derive(Clone)]
47pub enum WalConfig {
48 RaftEngine,
49 Kafka {
50 needs_kafka_cluster: bool,
53 broker_endpoints: Vec<String>,
54 },
55}
56
57#[derive(Debug, Clone)]
58pub(crate) enum ServiceProvider {
59 Create,
60 External(String),
61}
62
63impl From<&str> for ServiceProvider {
64 fn from(value: &str) -> Self {
65 if value.is_empty() {
66 Self::Create
67 } else {
68 Self::External(value.to_string())
69 }
70 }
71}
72
73#[derive(Clone)]
74pub struct StoreConfig {
75 pub store_addrs: Vec<String>,
76 pub setup_etcd: bool,
77 pub(crate) setup_pg: Option<ServiceProvider>,
78 pub(crate) setup_mysql: Option<ServiceProvider>,
79 pub enable_flat_format: bool,
80}
81
82#[derive(Clone)]
83pub struct Env {
84 sqlness_home: PathBuf,
85 server_addrs: ServerAddr,
86 wal: WalConfig,
87
88 bins_dir: Arc<Mutex<Option<PathBuf>>>,
92 versioned_bins_dirs: Arc<Mutex<HashMap<String, PathBuf>>>,
94 pull_version_on_need: bool,
96 store_config: StoreConfig,
98 extra_args: Vec<String>,
100}
101
102#[async_trait]
103impl EnvController for Env {
104 type DB = GreptimeDB;
105
106 async fn start(&self, mode: &str, id: usize, _config: Option<&Path>) -> Self::DB {
107 if self.server_addrs.server_addr.is_some() && id > 0 {
108 panic!("Parallel test mode is not supported when server address is already set.");
109 }
110
111 unsafe {
112 std::env::set_var("SQLNESS_HOME", self.sqlness_home.display().to_string());
113 }
114 match mode {
115 "standalone" => self.start_standalone(id).await,
116 "distributed" => self.start_distributed(id).await,
117 _ => panic!("Unexpected mode: {mode}"),
118 }
119 }
120
121 async fn stop(&self, _mode: &str, mut database: Self::DB) {
123 database.stop();
124 }
125}
126
127impl Env {
128 pub fn new(
129 data_home: PathBuf,
130 server_addrs: ServerAddr,
131 wal: WalConfig,
132 pull_version_on_need: bool,
133 bins_dir: Option<PathBuf>,
134 store_config: StoreConfig,
135 extra_args: Vec<String>,
136 ) -> Self {
137 Self {
138 sqlness_home: data_home,
139 server_addrs,
140 wal,
141 pull_version_on_need,
142 bins_dir: Arc::new(Mutex::new(bins_dir.clone())),
143 versioned_bins_dirs: Arc::new(Mutex::new(HashMap::from_iter([(
144 "latest".to_string(),
145 bins_dir.clone().unwrap_or(util::get_binary_dir("debug")),
146 )]))),
147 store_config,
148 extra_args,
149 }
150 }
151
152 async fn start_standalone(&self, id: usize) -> GreptimeDB {
153 println!("Starting standalone instance id: {id}");
154
155 if self.server_addrs.server_addr.is_some() {
156 self.connect_db(&self.server_addrs, id).await
157 } else {
158 self.build_db();
159 self.setup_wal();
160 let mut db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone());
161
162 let server_mode = ServerMode::random_standalone();
163 db_ctx.set_server_mode(server_mode.clone(), SERVER_MODE_STANDALONE_IDX);
164 let server_addr = server_mode.server_addr().unwrap();
165 let server_process = self.start_server(server_mode, &db_ctx, id, true).await;
166
167 let mut greptimedb = self.connect_db(&server_addr, id).await;
168 greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![server_process])));
169 greptimedb.is_standalone = true;
170 greptimedb.ctx = db_ctx;
171
172 greptimedb
173 }
174 }
175
176 async fn start_distributed(&self, id: usize) -> GreptimeDB {
177 if self.server_addrs.server_addr.is_some() {
178 self.connect_db(&self.server_addrs, id).await
179 } else {
180 self.build_db();
181 self.setup_wal();
182 self.setup_etcd();
183 self.setup_pg();
184 self.setup_mysql().await;
185 let mut db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone());
186
187 let meta_server_mode = ServerMode::random_metasrv();
189 let metasrv_port = match &meta_server_mode {
190 ServerMode::Metasrv {
191 rpc_server_addr, ..
192 } => rpc_server_addr
193 .split(':')
194 .nth(1)
195 .unwrap()
196 .parse::<u16>()
197 .unwrap(),
198 _ => panic!(
199 "metasrv mode not set, maybe running in remote mode which doesn't support restart?"
200 ),
201 };
202 db_ctx.set_server_mode(meta_server_mode.clone(), SERVER_MODE_METASRV_IDX);
203 let meta_server = self.start_server(meta_server_mode, &db_ctx, id, true).await;
204
205 let datanode_1_mode = ServerMode::random_datanode(metasrv_port, 0);
206 db_ctx.set_server_mode(datanode_1_mode.clone(), SERVER_MODE_DATANODE_START_IDX);
207 let datanode_1 = self.start_server(datanode_1_mode, &db_ctx, id, true).await;
208 let datanode_2_mode = ServerMode::random_datanode(metasrv_port, 1);
209 db_ctx.set_server_mode(datanode_2_mode.clone(), SERVER_MODE_DATANODE_START_IDX + 1);
210 let datanode_2 = self.start_server(datanode_2_mode, &db_ctx, id, true).await;
211 let datanode_3_mode = ServerMode::random_datanode(metasrv_port, 2);
212 db_ctx.set_server_mode(datanode_3_mode.clone(), SERVER_MODE_DATANODE_START_IDX + 2);
213 let datanode_3 = self.start_server(datanode_3_mode, &db_ctx, id, true).await;
214
215 let frontend_mode = ServerMode::random_frontend(metasrv_port);
216 let server_addr = frontend_mode.server_addr().unwrap();
217 db_ctx.set_server_mode(frontend_mode.clone(), SERVER_MODE_FRONTEND_IDX);
218 let frontend = self.start_server(frontend_mode, &db_ctx, id, true).await;
219
220 let flownode_mode = ServerMode::random_flownode(metasrv_port, 0);
221 db_ctx.set_server_mode(flownode_mode.clone(), SERVER_MODE_FLOWNODE_IDX);
222 let flownode = self.start_server(flownode_mode, &db_ctx, id, true).await;
223
224 let mut greptimedb = self.connect_db(&server_addr, id).await;
225
226 greptimedb.metasrv_process = Some(meta_server).into();
227 greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![
228 datanode_1, datanode_2, datanode_3,
229 ])));
230 greptimedb.frontend_process = Some(frontend).into();
231 greptimedb.flownode_process = Some(flownode).into();
232 greptimedb.is_standalone = false;
233 greptimedb.ctx = db_ctx;
234
235 greptimedb
236 }
237 }
238
239 async fn connect_db(&self, server_addr: &ServerAddr, id: usize) -> GreptimeDB {
240 let grpc_server_addr = server_addr.server_addr.as_ref().unwrap();
241 let pg_server_addr = server_addr.pg_server_addr.as_ref().unwrap();
242 let mysql_server_addr = server_addr.mysql_server_addr.as_ref().unwrap();
243
244 let client =
245 MultiProtocolClient::connect(grpc_server_addr, pg_server_addr, mysql_server_addr).await;
246 GreptimeDB {
247 client: TokioMutex::new(client),
248 server_processes: None,
249 metasrv_process: None.into(),
250 frontend_process: None.into(),
251 flownode_process: None.into(),
252 ctx: GreptimeDBContext {
253 time: 0,
254 datanode_id: Default::default(),
255 wal: self.wal.clone(),
256 store_config: self.store_config.clone(),
257 server_modes: Vec::new(),
258 },
259 is_standalone: false,
260 env: self.clone(),
261 id,
262 }
263 }
264
265 fn stop_server(process: &mut Child) {
266 let _ = process.kill();
267 let _ = process.wait();
268 }
269
270 async fn start_server(
271 &self,
272 mode: ServerMode,
273 db_ctx: &GreptimeDBContext,
274 id: usize,
275 truncate_log: bool,
276 ) -> Child {
277 let log_file_name = match mode {
278 ServerMode::Datanode { node_id, .. } => {
279 db_ctx.incr_datanode_id();
280 format!("greptime-{}-sqlness-datanode-{}.log", id, node_id)
281 }
282 ServerMode::Flownode { .. } => format!("greptime-{}-sqlness-flownode.log", id),
283 ServerMode::Frontend { .. } => format!("greptime-{}-sqlness-frontend.log", id),
284 ServerMode::Metasrv { .. } => format!("greptime-{}-sqlness-metasrv.log", id),
285 ServerMode::Standalone { .. } => format!("greptime-{}-sqlness-standalone.log", id),
286 };
287 let stdout_file_name = self.sqlness_home.join(log_file_name).display().to_string();
288
289 println!("DB instance {id} log file at {stdout_file_name}");
290
291 let stdout_file = OpenOptions::new()
292 .create(true)
293 .write(true)
294 .truncate(truncate_log)
295 .append(!truncate_log)
296 .open(stdout_file_name)
297 .unwrap();
298
299 let args = mode.get_args(&self.sqlness_home, self, db_ctx, id);
300 let check_ip_addrs = mode.check_addrs();
301
302 for check_ip_addr in &check_ip_addrs {
303 if util::check_port(check_ip_addr.parse().unwrap(), Duration::from_secs(1)).await {
304 panic!(
305 "Port {check_ip_addr} is already in use, please check and retry.",
306 check_ip_addr = check_ip_addr
307 );
308 }
309 }
310
311 let program = PROGRAM;
312
313 let bins_dir = self.bins_dir.lock().unwrap().clone().expect(
314 "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.",
315 );
316
317 let abs_bins_dir = bins_dir
318 .canonicalize()
319 .expect("Failed to canonicalize bins_dir");
320
321 let mut process = Command::new(abs_bins_dir.join(program))
322 .current_dir(bins_dir.clone())
323 .env("TZ", "UTC")
324 .args(args)
325 .stdout(stdout_file)
326 .spawn()
327 .unwrap_or_else(|error| {
328 panic!(
329 "Failed to start the DB with subcommand {}, Error: {error}, path: {:?}",
330 mode.name(),
331 bins_dir.join(program)
332 );
333 });
334
335 for check_ip_addr in &check_ip_addrs {
336 if !util::check_port(check_ip_addr.parse().unwrap(), Duration::from_secs(10)).await {
337 Env::stop_server(&mut process);
338 panic!("{} doesn't up in 10 seconds, quit.", mode.name())
339 }
340 }
341
342 process
343 }
344
345 async fn restart_server(&self, db: &GreptimeDB, is_full_restart: bool) {
347 {
348 if let Some(server_process) = db.server_processes.clone() {
349 let mut server_processes = server_process.lock().unwrap();
350 for server_process in server_processes.iter_mut() {
351 Env::stop_server(server_process);
352 }
353 }
354
355 if is_full_restart {
356 if let Some(mut metasrv_process) =
357 db.metasrv_process.lock().expect("poisoned lock").take()
358 {
359 Env::stop_server(&mut metasrv_process);
360 }
361 if let Some(mut frontend_process) =
362 db.frontend_process.lock().expect("poisoned lock").take()
363 {
364 Env::stop_server(&mut frontend_process);
365 }
366 }
367
368 if let Some(mut flownode_process) =
369 db.flownode_process.lock().expect("poisoned lock").take()
370 {
371 Env::stop_server(&mut flownode_process);
372 }
373 }
374
375 let new_server_processes = if db.is_standalone {
377 let server_mode = db
378 .ctx
379 .get_server_mode(SERVER_MODE_STANDALONE_IDX)
380 .cloned()
381 .unwrap();
382 let server_addr = server_mode.server_addr().unwrap();
383 let new_server_process = self.start_server(server_mode, &db.ctx, db.id, false).await;
384
385 let mut client = db.client.lock().await;
386 client
387 .reconnect_mysql_client(&server_addr.mysql_server_addr.unwrap())
388 .await;
389 client
390 .reconnect_pg_client(&server_addr.pg_server_addr.unwrap())
391 .await;
392 vec![new_server_process]
393 } else {
394 db.ctx.reset_datanode_id();
395 if is_full_restart {
396 let metasrv_mode = db
397 .ctx
398 .get_server_mode(SERVER_MODE_METASRV_IDX)
399 .cloned()
400 .unwrap();
401 let metasrv = self.start_server(metasrv_mode, &db.ctx, db.id, false).await;
402 db.metasrv_process
403 .lock()
404 .expect("lock poisoned")
405 .replace(metasrv);
406
407 tokio::time::sleep(Duration::from_secs(5)).await;
410 }
411
412 let mut processes = vec![];
413 for i in 0..3 {
414 let datanode_mode = db
415 .ctx
416 .get_server_mode(SERVER_MODE_DATANODE_START_IDX + i)
417 .cloned()
418 .unwrap();
419 let new_server_process = self
420 .start_server(datanode_mode, &db.ctx, db.id, false)
421 .await;
422 processes.push(new_server_process);
423 }
424
425 if is_full_restart {
426 let frontend_mode = db
427 .ctx
428 .get_server_mode(SERVER_MODE_FRONTEND_IDX)
429 .cloned()
430 .unwrap();
431 let frontend = self
432 .start_server(frontend_mode, &db.ctx, db.id, false)
433 .await;
434 db.frontend_process
435 .lock()
436 .expect("lock poisoned")
437 .replace(frontend);
438 }
439
440 let flownode_mode = db
441 .ctx
442 .get_server_mode(SERVER_MODE_FLOWNODE_IDX)
443 .cloned()
444 .unwrap();
445 let flownode = self
446 .start_server(flownode_mode, &db.ctx, db.id, false)
447 .await;
448 db.flownode_process
449 .lock()
450 .expect("lock poisoned")
451 .replace(flownode);
452
453 processes
454 };
455
456 if let Some(server_processes) = db.server_processes.clone() {
457 let mut server_processes = server_processes.lock().unwrap();
458 *server_processes = new_server_processes;
459 }
460 }
461
462 fn setup_wal(&self) {
464 if matches!(self.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster) {
465 util::setup_wal();
466 }
467 }
468
469 fn setup_etcd(&self) {
471 if self.store_config.setup_etcd {
472 let client_ports = self
473 .store_config
474 .store_addrs
475 .iter()
476 .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
477 .collect::<Vec<_>>();
478 util::setup_etcd(client_ports, None, None);
479 }
480 }
481
482 fn setup_pg(&self) {
484 if matches!(self.store_config.setup_pg, Some(ServiceProvider::Create)) {
485 let client_ports = self
486 .store_config
487 .store_addrs
488 .iter()
489 .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
490 .collect::<Vec<_>>();
491 let client_port = client_ports.first().unwrap_or(&5432);
492 util::setup_pg(*client_port, None);
493 }
494 }
495
496 async fn setup_mysql(&self) {
498 if matches!(self.store_config.setup_mysql, Some(ServiceProvider::Create)) {
499 let client_ports = self
500 .store_config
501 .store_addrs
502 .iter()
503 .map(|s| s.split(':').nth(1).unwrap().parse::<u16>().unwrap())
504 .collect::<Vec<_>>();
505 let client_port = client_ports.first().unwrap_or(&3306);
506 util::setup_mysql(*client_port, None);
507
508 tokio::time::sleep(Duration::from_secs(10)).await;
510 }
511 }
512
513 fn build_db(&self) {
515 if self.bins_dir.lock().unwrap().is_some() {
516 return;
517 }
518
519 println!("Going to build the DB...");
520 let output = Command::new("cargo")
521 .current_dir(util::get_workspace_root())
522 .args([
523 "build",
524 "--bin",
525 "greptime",
526 "--features",
527 "pg_kvbackend,mysql_kvbackend",
528 ])
529 .output()
530 .expect("Failed to start GreptimeDB");
531 if !output.status.success() {
532 println!("Failed to build GreptimeDB, {}", output.status);
533 println!("Cargo build stdout:");
534 io::stdout().write_all(&output.stdout).unwrap();
535 println!("Cargo build stderr:");
536 io::stderr().write_all(&output.stderr).unwrap();
537 panic!();
538 }
539
540 let _ = self
541 .bins_dir
542 .lock()
543 .unwrap()
544 .insert(util::get_binary_dir("debug"));
545 }
546
547 pub(crate) fn extra_args(&self) -> &Vec<String> {
548 &self.extra_args
549 }
550}
551
552pub struct GreptimeDB {
553 server_processes: Option<Arc<Mutex<Vec<Child>>>>,
554 metasrv_process: Mutex<Option<Child>>,
555 frontend_process: Mutex<Option<Child>>,
556 flownode_process: Mutex<Option<Child>>,
557 client: TokioMutex<MultiProtocolClient>,
558 ctx: GreptimeDBContext,
559 is_standalone: bool,
560 env: Env,
561 id: usize,
562}
563
564impl GreptimeDB {
565 async fn postgres_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
566 let mut client = self.client.lock().await;
567
568 match client.postgres_query(&query).await {
569 Ok(rows) => Box::new(PostgresqlFormatter::from(rows)),
570 Err(e) => Box::new(e),
571 }
572 }
573
574 async fn mysql_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
575 let mut client = self.client.lock().await;
576
577 match client.mysql_query(&query).await {
578 Ok(res) => Box::new(MysqlFormatter::from(res)),
579 Err(e) => Box::new(e),
580 }
581 }
582
583 async fn grpc_query(&self, _ctx: QueryContext, query: String) -> Box<dyn Display> {
584 let mut client = self.client.lock().await;
585
586 match client.grpc_query(&query).await {
587 Ok(rows) => Box::new(OutputFormatter::from(rows)),
588 Err(e) => Box::new(ErrorFormatter::from(e)),
589 }
590 }
591}
592
593#[async_trait]
594impl Database for GreptimeDB {
595 async fn query(&self, ctx: QueryContext, query: String) -> Box<dyn Display> {
596 if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() {
597 self.env.restart_server(self, false).await;
598 } else if let Some(version) = ctx.context.get("version") {
599 let version_bin_dir = self
600 .env
601 .versioned_bins_dirs
602 .lock()
603 .expect("lock poison")
604 .get(version.as_str())
605 .cloned();
606
607 match version_bin_dir {
608 Some(path) if path.clone().join(PROGRAM).is_file() => {
609 *self.env.bins_dir.lock().unwrap() = Some(path.clone());
611 }
612 _ => {
613 maybe_pull_binary(version, self.env.pull_version_on_need).await;
615 let root = get_workspace_root();
616 let new_path = PathBuf::from_iter([&root, version]);
617 *self.env.bins_dir.lock().unwrap() = Some(new_path);
618 }
619 }
620
621 self.env.restart_server(self, true).await;
622 tokio::time::sleep(Duration::from_secs(5)).await;
624 }
625
626 if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) {
627 if protocol == MYSQL {
629 self.mysql_query(ctx, query).await
630 } else {
631 self.postgres_query(ctx, query).await
632 }
633 } else {
634 self.grpc_query(ctx, query).await
635 }
636 }
637}
638
639impl GreptimeDB {
640 fn stop(&mut self) {
641 if let Some(server_processes) = self.server_processes.clone() {
642 let mut server_processes = server_processes.lock().unwrap();
643 for mut server_process in server_processes.drain(..) {
644 Env::stop_server(&mut server_process);
645 println!(
646 "Standalone or Datanode (pid = {}) is stopped",
647 server_process.id()
648 );
649 }
650 }
651 if let Some(mut metasrv) = self
652 .metasrv_process
653 .lock()
654 .expect("someone else panic when holding lock")
655 .take()
656 {
657 Env::stop_server(&mut metasrv);
658 println!("Metasrv (pid = {}) is stopped", metasrv.id());
659 }
660 if let Some(mut frontend) = self
661 .frontend_process
662 .lock()
663 .expect("someone else panic when holding lock")
664 .take()
665 {
666 Env::stop_server(&mut frontend);
667 println!("Frontend (pid = {}) is stopped", frontend.id());
668 }
669 if let Some(mut flownode) = self
670 .flownode_process
671 .lock()
672 .expect("someone else panic when holding lock")
673 .take()
674 {
675 Env::stop_server(&mut flownode);
676 println!("Flownode (pid = {}) is stopped", flownode.id());
677 }
678 if matches!(self.ctx.wal, WalConfig::Kafka { needs_kafka_cluster, .. } if needs_kafka_cluster)
679 {
680 util::teardown_wal();
681 }
682 }
683}
684
685impl Drop for GreptimeDB {
686 fn drop(&mut self) {
687 if self.env.server_addrs.server_addr.is_none() {
688 self.stop();
689 }
690 }
691}
692
693pub struct GreptimeDBContext {
694 time: i64,
696 datanode_id: AtomicU32,
697 wal: WalConfig,
698 store_config: StoreConfig,
699 server_modes: Vec<ServerMode>,
700}
701
702impl GreptimeDBContext {
703 pub fn new(wal: WalConfig, store_config: StoreConfig) -> Self {
704 Self {
705 time: common_time::util::current_time_millis(),
706 datanode_id: AtomicU32::new(0),
707 wal,
708 store_config,
709 server_modes: Vec::new(),
710 }
711 }
712
713 pub(crate) fn time(&self) -> i64 {
714 self.time
715 }
716
717 pub fn is_raft_engine(&self) -> bool {
718 matches!(self.wal, WalConfig::RaftEngine)
719 }
720
721 pub fn kafka_wal_broker_endpoints(&self) -> String {
722 match &self.wal {
723 WalConfig::RaftEngine => String::new(),
724 WalConfig::Kafka {
725 broker_endpoints, ..
726 } => serde_json::to_string(&broker_endpoints).unwrap(),
727 }
728 }
729
730 fn incr_datanode_id(&self) {
731 let _ = self.datanode_id.fetch_add(1, Ordering::Relaxed);
732 }
733
734 fn reset_datanode_id(&self) {
735 self.datanode_id.store(0, Ordering::Relaxed);
736 }
737
738 pub(crate) fn store_config(&self) -> StoreConfig {
739 self.store_config.clone()
740 }
741
742 fn set_server_mode(&mut self, mode: ServerMode, idx: usize) {
743 if idx >= self.server_modes.len() {
744 self.server_modes.resize(idx + 1, mode.clone());
745 }
746 self.server_modes[idx] = mode;
747 }
748
749 fn get_server_mode(&self, idx: usize) -> Option<&ServerMode> {
750 self.server_modes.get(idx)
751 }
752}