mirror of
https://gitlab.rd.nic.fr/labs/frcrawler/scripts.git
synced 2025-04-04 19:45:48 +02:00
35 lines
1.4 KiB
SQL
35 lines
1.4 KiB
SQL
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_data ON CLUSTER ${CH_CLUSTER} (
|
|
batch_id UUID,
|
|
job_name String,
|
|
datasource String,
|
|
started_at SimpleAggregateFunction(min, DateTime),
|
|
ended_at SimpleAggregateFunction(min, DateTime),
|
|
job_count AggregateFunction(uniq, UUID),
|
|
) ENGINE=AggregatingMergeTree ORDER BY (started_at)
|
|
TTL started_at + INTERVAL 12 MONTH;
|
|
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.batches_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.batches_data AS
|
|
SELECT
|
|
toUUID(JSONExtractString(result, 'batch_id')) as batch_id,
|
|
JSONExtractString(result, 'job_name') as job_name,
|
|
JSONExtractString(result, 'datasource') as datasource,
|
|
min(parseDateTimeBestEffort(JSONExtractString(result, 'started_at'))) as started_at,
|
|
max(parseDateTimeBestEffort(JSONExtractString(result, 'started_at'))) as ended_at,
|
|
uniqState(job_id) as job_count
|
|
FROM ${CH_DATABASE}.raw_results_data
|
|
GROUP BY batch_id, job_name, datasource;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.batches_all ON CLUSTER ${CH_CLUSTER}
|
|
AS ${CH_DATABASE}.batches_mv
|
|
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, batches_mv);
|
|
|
|
CREATE VIEW IF NOT EXISTS ${CH_DATABASE}.batches ON CLUSTER ${CH_CLUSTER} AS
|
|
SELECT
|
|
batch_id,
|
|
job_name,
|
|
datasource,
|
|
min(started_at) as started_at,
|
|
max(ended_at) as ended_at,
|
|
uniqMerge(job_count) as job_count
|
|
FROM ${CH_DATABASE}.batches_all
|
|
GROUP BY batch_id, job_name, datasource;
|