mirror of
https://gitlab.rd.nic.fr/labs/frcrawler/scripts.git
synced 2025-04-04 19:45:48 +02:00
31 lines
1.1 KiB
SQL
31 lines
1.1 KiB
SQL
-- Crawler results
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.raw_results_data ON CLUSTER ${CH_CLUSTER} (
|
|
job_id UUID,
|
|
result String,
|
|
) ENGINE = MergeTree
|
|
ORDER BY (JSONExtractString(result, 'batch_id'), job_id)
|
|
TTL parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) + INTERVAL 12 MONTH;
|
|
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.raw_results ON CLUSTER ${CH_CLUSTER}
|
|
AS ${CH_DATABASE}.raw_results_data
|
|
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, raw_results_data, murmurHash3_32(JSONExtractString(result, 'job_id')));
|
|
|
|
-- Batch metadata
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_metadata_data ON CLUSTER ${CH_CLUSTER} (
|
|
batch_id UUID,
|
|
scheduled_at DateTime,
|
|
queue String,
|
|
datasource String,
|
|
storage String,
|
|
job_name String,
|
|
scheduled_job_count UInt64
|
|
) ENGINE = MergeTree
|
|
ORDER BY (batch_id)
|
|
TTL scheduled_at + INTERVAL 12 MONTH;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_metadata ON CLUSTER ${CH_CLUSTER}
|
|
AS ${CH_DATABASE}.batches_metadata_data
|
|
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, batches_metadata_data, murmurHash3_32(toString(batch_id)));
|