mirror of
https://gitlab.rd.nic.fr/labs/frcrawler/scripts.git
synced 2025-04-04 19:45:48 +02:00
31 lines
1.2 KiB
SQL
31 lines
1.2 KiB
SQL
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.siren_data ON CLUSTER ${CH_CLUSTER} (
|
|
batch_id UUID,
|
|
tld String,
|
|
metadata String,
|
|
job_id UUID,
|
|
domain String,
|
|
started_at DateTime,
|
|
siren Nullable(String),
|
|
url Nullable(String),
|
|
input Nullable(String)
|
|
) ENGINE = MergeTree
|
|
ORDER BY (batch_id, tld, domain)
|
|
TTL started_at + INTERVAL 12 MONTH;
|
|
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.siren_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.siren_data AS
|
|
SELECT
|
|
toUUID(JSONExtractString(result, 'batch_id')) as batch_id,
|
|
splitByChar('.', domain)[-1] as tld,
|
|
JSONExtractString(result, '_metadata') as metadata,
|
|
job_id,
|
|
JSONExtractString(result, 'domain') as domain,
|
|
parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) as started_at,
|
|
JSONExtract(result, 'siren', 'siren', 'Nullable(String)') as siren,
|
|
JSONExtract(result, 'siren', 'url', 'Nullable(String)') as url,
|
|
JSONExtract(result, 'siren', 'input', 'Nullable(String)') as input
|
|
FROM ${CH_DATABASE}.raw_results_data
|
|
WHERE siren IS NOT NULL;
|
|
|
|
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.siren ON CLUSTER ${CH_CLUSTER}
|
|
AS ${CH_DATABASE}.siren_mv
|
|
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, siren_mv);
|