frcrawler-scripts/schemas/10_schema-crawler.clickhouse.sql
Gaël Berthaud-Müller 38679a009a add all files
2024-03-06 14:49:11 +01:00

31 lines
1.1 KiB
SQL

-- Crawler results
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.raw_results_data ON CLUSTER ${CH_CLUSTER} (
job_id UUID,
result String,
) ENGINE = MergeTree
ORDER BY (JSONExtractString(result, 'batch_id'), job_id)
TTL parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) + INTERVAL 12 MONTH;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.raw_results ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.raw_results_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, raw_results_data, murmurHash3_32(JSONExtractString(result, 'job_id')));
-- Batch metadata
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_metadata_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
scheduled_at DateTime,
queue String,
datasource String,
storage String,
job_name String,
scheduled_job_count UInt64
) ENGINE = MergeTree
ORDER BY (batch_id)
TTL scheduled_at + INTERVAL 12 MONTH;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_metadata ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.batches_metadata_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, batches_metadata_data, murmurHash3_32(toString(batch_id)));