SQL Reference

Complete reference for SQL queries in DBX.

Table of contents

  1. Overview
    1. Supported Features
  2. Basic Queries
    1. SELECT Statement
    2. WHERE Clause
    3. ORDER BY Clause
    4. LIMIT Clause
  3. Aggregate Functions
    1. COUNT
    2. SUM
    3. AVG
    4. MIN / MAX
  4. GROUP BY
    1. Basic Grouping
    2. HAVING Clause
  5. JOIN Operations
    1. INNER JOIN
    2. LEFT JOIN
    3. RIGHT JOIN
    4. FULL OUTER JOIN
    5. Multiple Joins
  6. Scalar Functions
    1. String Functions
    2. Math Functions
    3. Date Functions
  7. Advanced Queries
    1. Subqueries
    2. CASE Expressions
    3. Window Functions
  8. Query Optimization
    1. Projection Pushdown
    2. Predicate Pushdown
    3. Vectorized Execution
  9. Working with RecordBatch
    1. Registering Tables
    2. Processing Results
  10. GPU Acceleration
    1. Enabling GPU for SQL
  11. Performance Tips
    1. 1. Use Specific Columns
    2. 2. Filter Early
    3. 3. Use Appropriate Indexes
    4. 4. Batch Queries
  12. Prepared Statements
    1. Creating Prepared Statements
    2. Benefits
  13. Error Handling
    1. Common SQL Errors
  14. SQL Examples
    1. Analytics Query
    2. User Segmentation
    3. Top N per Group
  15. Next Steps

Overview

DBX supports standard SQL queries through Apache Arrow and DataFusion integration. SQL queries operate on the Columnar Cache layer for optimal analytical performance.

Supported Features

  • SELECT - Column projection and filtering
  • WHERE - Predicate filtering
  • JOIN - Inner, Left, Right, Full Outer joins
  • GROUP BY - Aggregation and grouping
  • ORDER BY - Result sorting
  • LIMIT - Result limiting
  • Aggregate Functions - SUM, COUNT, MIN, MAX, AVG
  • Scalar Functions - String, math, date functions

Basic Queries

SELECT Statement

Select all columns:

use dbx_core::Database;

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    // ... register table with data ...
    
    let results = db.execute_sql("SELECT * FROM users")?;
    
    Ok(())
}

Select specific columns:

let results = db.execute_sql("SELECT id, name, email FROM users")?;

Column aliases:

let results = db.execute_sql(
    "SELECT id AS user_id, name AS full_name FROM users"
)?;

WHERE Clause

Basic filtering:

let results = db.execute_sql(
    "SELECT * FROM users WHERE age > 30"
)?;

Multiple conditions:

let results = db.execute_sql(
    "SELECT * FROM users WHERE age > 30 AND city = 'Seoul'"
)?;

Comparison operators:

// Equal
"SELECT * FROM users WHERE status = 'active'"

// Not equal
"SELECT * FROM users WHERE status != 'deleted'"

// Greater than / Less than
"SELECT * FROM orders WHERE amount > 1000"
"SELECT * FROM orders WHERE amount <= 500"

// LIKE pattern matching
"SELECT * FROM users WHERE email LIKE '%@gmail.com'"

// IN operator
"SELECT * FROM users WHERE city IN ('Seoul', 'Busan', 'Incheon')"

// BETWEEN
"SELECT * FROM orders WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31'"

ORDER BY Clause

Ascending order:

let results = db.execute_sql(
    "SELECT * FROM users ORDER BY age ASC"
)?;

Descending order:

let results = db.execute_sql(
    "SELECT * FROM users ORDER BY created_at DESC"
)?;

Multiple columns:

let results = db.execute_sql(
    "SELECT * FROM users ORDER BY city ASC, age DESC"
)?;

LIMIT Clause

Limit results:

let results = db.execute_sql(
    "SELECT * FROM users LIMIT 10"
)?;

With offset:

let results = db.execute_sql(
    "SELECT * FROM users LIMIT 10 OFFSET 20"
)?;

Aggregate Functions

COUNT

Count all rows:

let results = db.execute_sql(
    "SELECT COUNT(*) FROM users"
)?;

Count non-null values:

let results = db.execute_sql(
    "SELECT COUNT(email) FROM users"
)?;

Count distinct:

let results = db.execute_sql(
    "SELECT COUNT(DISTINCT city) FROM users"
)?;

SUM

Sum numeric column:

let results = db.execute_sql(
    "SELECT SUM(amount) FROM orders"
)?;

AVG

Average value:

let results = db.execute_sql(
    "SELECT AVG(age) FROM users"
)?;

MIN / MAX

Minimum and maximum:

let results = db.execute_sql(
    "SELECT MIN(age), MAX(age) FROM users"
)?;

GROUP BY

Basic Grouping

Group by single column:

let results = db.execute_sql(
    "SELECT city, COUNT(*) FROM users GROUP BY city"
)?;

Group by multiple columns:

let results = db.execute_sql(
    "SELECT city, status, COUNT(*) 
     FROM users 
     GROUP BY city, status"
)?;

HAVING Clause

Filter grouped results:

let results = db.execute_sql(
    "SELECT city, COUNT(*) as user_count
     FROM users 
     GROUP BY city
     HAVING user_count > 100"
)?;

Complex aggregations:

let results = db.execute_sql(
    "SELECT 
        city,
        COUNT(*) as total_users,
        AVG(age) as avg_age,
        SUM(order_count) as total_orders
     FROM users
     GROUP BY city
     HAVING total_users > 50 AND avg_age > 25"
)?;

JOIN Operations

INNER JOIN

Join two tables:

let results = db.execute_sql(
    "SELECT u.id, u.name, o.order_id, o.amount
     FROM users u
     INNER JOIN orders o ON u.id = o.user_id"
)?;

LEFT JOIN

Include all rows from left table:

let results = db.execute_sql(
    "SELECT u.id, u.name, o.order_id
     FROM users u
     LEFT JOIN orders o ON u.id = o.user_id"
)?;

RIGHT JOIN

Include all rows from right table:

let results = db.execute_sql(
    "SELECT u.id, u.name, o.order_id
     FROM users u
     RIGHT JOIN orders o ON u.id = o.user_id"
)?;

FULL OUTER JOIN

Include all rows from both tables:

let results = db.execute_sql(
    "SELECT u.id, u.name, o.order_id
     FROM users u
     FULL OUTER JOIN orders o ON u.id = o.user_id"
)?;

Multiple Joins

Join multiple tables:

let results = db.execute_sql(
    "SELECT 
        u.name,
        o.order_id,
        p.product_name,
        p.price
     FROM users u
     INNER JOIN orders o ON u.id = o.user_id
     INNER JOIN products p ON o.product_id = p.id"
)?;

Scalar Functions

String Functions

// UPPER / LOWER
"SELECT UPPER(name), LOWER(email) FROM users"

// LENGTH
"SELECT name, LENGTH(name) as name_length FROM users"

// SUBSTRING
"SELECT SUBSTRING(email, 1, 10) FROM users"

// CONCAT
"SELECT CONCAT(first_name, ' ', last_name) as full_name FROM users"

// TRIM
"SELECT TRIM(name) FROM users"

Math Functions

// ABS
"SELECT ABS(balance) FROM accounts"

// ROUND
"SELECT ROUND(price, 2) FROM products"

// FLOOR / CEIL
"SELECT FLOOR(rating), CEIL(rating) FROM reviews"

// POWER
"SELECT POWER(value, 2) FROM measurements"

Date Functions

// CURRENT_DATE
"SELECT CURRENT_DATE()"

// CURRENT_TIMESTAMP
"SELECT CURRENT_TIMESTAMP()"

// DATE_TRUNC
"SELECT DATE_TRUNC('day', created_at) FROM orders"

// EXTRACT
"SELECT EXTRACT(YEAR FROM created_at) as year FROM orders"

Advanced Queries

Subqueries

Subquery in WHERE:

let results = db.execute_sql(
    "SELECT * FROM users
     WHERE age > (SELECT AVG(age) FROM users)"
)?;

Subquery in FROM:

let results = db.execute_sql(
    "SELECT city, avg_age
     FROM (
         SELECT city, AVG(age) as avg_age
         FROM users
         GROUP BY city
     ) AS city_stats
     WHERE avg_age > 30"
)?;

CASE Expressions

Simple CASE:

let results = db.execute_sql(
    "SELECT 
        name,
        CASE 
            WHEN age < 18 THEN 'Minor'
            WHEN age < 65 THEN 'Adult'
            ELSE 'Senior'
        END as age_group
     FROM users"
)?;

CASE with aggregation:

let results = db.execute_sql(
    "SELECT 
        city,
        COUNT(CASE WHEN status = 'active' THEN 1 END) as active_users,
        COUNT(CASE WHEN status = 'inactive' THEN 1 END) as inactive_users
     FROM users
     GROUP BY city"
)?;

Window Functions

(Note: Window function support depends on DataFusion version)

// ROW_NUMBER
"SELECT 
    name,
    age,
    ROW_NUMBER() OVER (ORDER BY age DESC) as rank
 FROM users"

// RANK
"SELECT 
    name,
    score,
    RANK() OVER (ORDER BY score DESC) as rank
 FROM students"

// Partitioned window
"SELECT 
    name,
    city,
    age,
    AVG(age) OVER (PARTITION BY city) as city_avg_age
 FROM users"

Query Optimization

Projection Pushdown

DBX automatically pushes column selection down to storage:

// Only reads 'id' and 'name' columns from storage
let results = db.execute_sql(
    "SELECT id, name FROM users"
)?;

Predicate Pushdown

Filters are applied at the storage layer:

// Filter applied during scan, not after
let results = db.execute_sql(
    "SELECT * FROM users WHERE age > 30"
)?;

Vectorized Execution

Queries use SIMD vectorization automatically:

// Vectorized aggregation
let results = db.execute_sql(
    "SELECT SUM(amount) FROM orders"
)?;

Working with RecordBatch

Registering Tables

Register Arrow RecordBatch as a table:

use dbx_core::Database;
use arrow::array::{Int32Array, StringArray, RecordBatch};
use arrow::datatypes::{DataType, Field, Schema};
use std::sync::Arc;

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    // Create schema
    let schema = Arc::new(Schema::new(vec![
        Field::new("id", DataType::Int32, false),
        Field::new("name", DataType::Utf8, false),
        Field::new("age", DataType::Int32, false),
    ]));
    
    // Create data
    let batch = RecordBatch::try_new(
        schema.clone(),
        vec![
            Arc::new(Int32Array::from(vec![1, 2, 3])),
            Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])),
            Arc::new(Int32Array::from(vec![25, 30, 35])),
        ],
    ).unwrap();
    
    // Register table
    db.register_table("users", vec![batch]);
    
    // Now you can query it
    let results = db.execute_sql("SELECT * FROM users WHERE age > 28")?;
    
    Ok(())
}

Processing Results

Iterate through results:

use arrow::array::AsArray;

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    // ... register table ...
    
    let results = db.execute_sql("SELECT id, name FROM users")?;
    
    for batch in results {
        let id_array = batch.column(0).as_primitive::<arrow::datatypes::Int32Type>();
        let name_array = batch.column(1).as_string::<i32>();
        
        for i in 0..batch.num_rows() {
            let id = id_array.value(i);
            let name = name_array.value(i);
            println!("ID: {}, Name: {}", id, name);
        }
    }
    
    Ok(())
}

GPU Acceleration

Enabling GPU for SQL

When GPU features are enabled, certain operations automatically use GPU:

use dbx_core::Database;

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    // ... register table ...
    
    // Sync to GPU cache
    db.sync_gpu_cache("users")?;
    
    // These operations may use GPU acceleration:
    // - SUM, COUNT, MIN, MAX, AVG
    // - Filtering (WHERE clauses)
    // - GROUP BY operations
    // - Hash joins
    
    let results = db.execute_sql(
        "SELECT city, SUM(amount) 
         FROM orders 
         GROUP BY city"
    )?;
    
    Ok(())
}

Performance Tips

1. Use Specific Columns

// Good: Only select needed columns
"SELECT id, name FROM users"

// Avoid: Select all columns
"SELECT * FROM users"

2. Filter Early

// Good: Filter before join
"SELECT u.name, o.amount
 FROM users u
 INNER JOIN (
     SELECT * FROM orders WHERE amount > 1000
 ) o ON u.id = o.user_id"

3. Use Appropriate Indexes

// Ensure Bloom filters are updated
db.rebuild_index("users")?;

4. Batch Queries

// Good: Single query with aggregation
"SELECT city, COUNT(*), AVG(age) FROM users GROUP BY city"

// Avoid: Multiple separate queries
// "SELECT COUNT(*) FROM users WHERE city = 'Seoul'"
// "SELECT COUNT(*) FROM users WHERE city = 'Busan'"
// ...

Prepared Statements

Creating Prepared Statements

use dbx_core::Database;

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    // Prepare statement
    let stmt = db.prepare("SELECT * FROM users WHERE age > ?")?;
    
    // Execute with parameters
    let results = stmt.execute(&[30])?;
    
    Ok(())
}

Benefits

  • Performance: Query is parsed once, executed multiple times
  • Security: Prevents SQL injection
  • Type Safety: Parameter binding with type checking

Error Handling

Common SQL Errors

use dbx_core::{Database, DbxError};

fn main() -> dbx_core::DbxResult<()> {
    let db = Database::open_in_memory()?;
    
    match db.execute_sql("SELECT * FROM nonexistent_table") {
        Ok(results) => {
            // Process results
        }
        Err(DbxError::TableNotFound) => {
            println!("Table does not exist");
        }
        Err(DbxError::SqlParseError(msg)) => {
            println!("SQL syntax error: {}", msg);
        }
        Err(e) => {
            eprintln!("Query failed: {}", e);
        }
    }
    
    Ok(())
}

SQL Examples

Analytics Query

let results = db.execute_sql(
    "SELECT 
        DATE_TRUNC('month', order_date) as month,
        city,
        COUNT(*) as order_count,
        SUM(amount) as total_revenue,
        AVG(amount) as avg_order_value
     FROM orders
     WHERE order_date >= '2024-01-01'
     GROUP BY month, city
     HAVING total_revenue > 10000
     ORDER BY month DESC, total_revenue DESC
     LIMIT 100"
)?;

User Segmentation

let results = db.execute_sql(
    "SELECT 
        CASE 
            WHEN order_count = 0 THEN 'New'
            WHEN order_count < 5 THEN 'Occasional'
            WHEN order_count < 20 THEN 'Regular'
            ELSE 'VIP'
        END as segment,
        COUNT(*) as user_count,
        AVG(lifetime_value) as avg_ltv
     FROM users
     GROUP BY segment
     ORDER BY avg_ltv DESC"
)?;

Top N per Group

let results = db.execute_sql(
    "SELECT *
     FROM (
         SELECT 
             product_name,
             category,
             sales,
             ROW_NUMBER() OVER (
                 PARTITION BY category 
                 ORDER BY sales DESC
             ) as rank
         FROM products
     ) ranked
     WHERE rank <= 5"
)?;

Next Steps


Copyright © 2026 ByteLogicCore. MIT OR Apache-2.0 License.

This site uses Just the Docs, a documentation theme for Jekyll.