frcrawler-scripts/schemas/20-view_dnssec.clickhouse.sql
Gaël Berthaud-Müller 38679a009a add all files
2024-03-06 14:49:11 +01:00

198 lines
7.6 KiB
SQL

CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_signed_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
tld String,
metadata String,
job_id UUID,
domain String,
started_at DateTime,
signed Bool,
error String
) ENGINE=MergeTree
ORDER BY (batch_id, tld, metadata)
TTL started_at + INTERVAL 12 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_signed_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.dnssec_signed_data AS
SELECT
JSONExtractString(result, 'batch_id') as batch_id,
splitByChar('.', domain)[-1] as tld,
JSONExtractString(result, '_metadata') as metadata,
job_id,
JSONExtractString(result, 'domain') as domain,
parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) as started_at,
JSONExtractBool(result, 'dnssec', 'signed') as signed,
JSONExtractString(result, 'dnssec', 'error') as error
FROM ${CH_DATABASE}.raw_results_data;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_signed ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.dnssec_signed_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, dnssec_signed_data);
-- aggregated view on metadata and errors
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_summary_signed_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
tld String,
metadata String,
started_at SimpleAggregateFunction(min, DateTime),
signed Bool,
error String,
domain_count AggregateFunction(uniq, UUID)
) ENGINE=AggregatingMergeTree ORDER BY (batch_id, tld, metadata, signed, error)
TTL started_at + INTERVAL 12 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_summary_signed_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.dnssec_summary_signed_data AS
SELECT
JSONExtractString(result, 'batch_id') as batch_id,
splitByChar('.', JSONExtractString(result, 'domain'))[-1] as tld,
JSONExtractString(result, '_metadata') as metadata,
min(parseDateTimeBestEffort(JSONExtractString(result, 'started_at'))) as started_at,
JSONExtractBool(result, 'dnssec', 'signed') as signed,
JSONExtractString(result, 'dnssec', 'error') as error,
uniqState(job_id) as domain_count
FROM ${CH_DATABASE}.raw_results_data
GROUP BY batch_id, tld, metadata, signed, error;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_summary_signed_all ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.dnssec_summary_signed_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, dnssec_summary_signed_data);
CREATE VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_summary_signed ON CLUSTER ${CH_CLUSTER} AS
SELECT
batch_id,
tld,
metadata,
min(started_at) as started_at,
signed,
error,
uniqMerge(domain_count) as domain_count
FROM ${CH_DATABASE}.dnssec_summary_signed_all
GROUP BY batch_id, tld, metadata, signed, error;
-- informations sur les signatures (type couvert, durée, algo, est valide, keytag)
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_signatures_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
tld String,
metadata String,
job_id UUID,
domain String,
started_at DateTime,
signed Bool,
covers String,
algorithm String,
inception DateTime,
expiration DateTime,
signer String,
key_tag UInt16,
valid Bool,
) ENGINE = MergeTree
ORDER BY (batch_id, tld, domain)
TTL started_at + INTERVAL 12 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_signatures_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.dnssec_signatures_data AS
SELECT
JSONExtractString(result, 'batch_id') as batch_id,
splitByChar('.', domain)[-1] as tld,
JSONExtractString(result, '_metadata') as metadata,
job_id,
JSONExtractString(result, 'domain') as domain,
parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) as started_at,
JSONExtractBool(result, 'dnssec', 'signed') as signed,
JSONExtractString(signature, 'covers') as covers,
JSONExtractString(signature, 'algorithm') as algorithm,
parseDateTimeBestEffort(JSONExtractString(signature, 'inception')) as inception,
parseDateTimeBestEffort(JSONExtractString(signature, 'expiration')) as expiration,
JSONExtractString(signature, 'signer') as signer,
JSONExtractString(signature, 'key_tag') as key_tag,
JSONExtractBool(signature, 'valid') as valid
FROM
${CH_DATABASE}.raw_results_data
ARRAY JOIN JSONExtractArrayRaw(result, 'dnssec', 'signatures') as signature;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_signatures ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.dnssec_signatures_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, dnssec_signatures_data);
-- informations sur les clés (flags, est associée a un DS, algo)
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_keys_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
tld String,
metadata String,
job_id UUID,
domain String,
started_at DateTime,
signed Bool,
algorithm String,
flags UInt16,
friendly_flags Array(String),
key_tag UInt16,
protocol UInt8,
has_ds Bool,
key String,
) ENGINE = MergeTree
ORDER BY (batch_id, tld, domain)
TTL started_at + INTERVAL 12 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_keys_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.dnssec_keys_data AS
SELECT
JSONExtractString(result, 'batch_id') as batch_id,
splitByChar('.', domain)[-1] as tld,
JSONExtractString(result, '_metadata') as metadata,
job_id,
JSONExtractString(result, 'domain') as domain,
parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) as started_at,
JSONExtractBool(result, 'dnssec', 'signed') as signed,
JSONExtractString(key_object, 'algorithm') as algorithm,
JSONExtractUInt(key_object, 'flags') as flags,
JSONExtract(key_object, 'friendly_flags', 'Array(String)') as friendly_flags,
JSONExtractUInt(key_object, 'key_tag') as key_tag,
JSONExtractUInt(key_object, 'protocol') as protocol,
JSONExtractBool(key_object, 'has_ds') as has_ds,
JSONExtractString(key_object, 'key') as key
FROM
${CH_DATABASE}.raw_results_data
ARRAY JOIN JSONExtractArrayRaw(result, 'dnssec', 'keys') as key_object;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_keys ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.dnssec_keys_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, dnssec_keys_data);
-- informations sur les DS (type d'algo de hash)
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_ds_data ON CLUSTER ${CH_CLUSTER} (
batch_id UUID,
tld String,
metadata String,
job_id UUID,
domain String,
started_at DateTime,
signed Bool,
digest_type String,
algorithm String,
digest String,
) ENGINE = MergeTree
ORDER BY (batch_id, tld, domain)
TTL started_at + INTERVAL 12 MONTH;
CREATE MATERIALIZED VIEW IF NOT EXISTS ${CH_DATABASE}.dnssec_ds_mv ON CLUSTER ${CH_CLUSTER} TO ${CH_DATABASE}.dnssec_ds_data AS
SELECT
JSONExtractString(result, 'batch_id') as batch_id,
splitByChar('.', domain)[-1] as tld,
JSONExtractString(result, '_metadata') as metadata,
job_id,
JSONExtractString(result, 'domain') as domain,
parseDateTimeBestEffort(JSONExtractString(result, 'started_at')) as started_at,
JSONExtractBool(result, 'dnssec', 'signed') as signed,
JSONExtractString(ds_object, 'digest_type') as digest_type,
JSONExtractString(ds_object, 'algorithm') as algorithm,
JSONExtractUInt(ds_object, 'key_tag') as key_tag,
JSONExtractString(ds_object, 'digest') as digest
FROM
${CH_DATABASE}.raw_results_data
ARRAY JOIN JSONExtractArrayRaw(result, 'dnssec', 'delegation_signers') as ds_object;
CREATE TABLE IF NOT EXISTS ${CH_DATABASE}.dnssec_ds ON CLUSTER ${CH_CLUSTER}
AS ${CH_DATABASE}.dnssec_ds_data
ENGINE = Distributed(${CH_CLUSTER}, ${CH_DATABASE}, dnssec_ds_data);