Change table TTLs
You can modify the TTL of a table in ClickHouse by using ALTER TABLE...MODIFY TTL. For example:
ALTER TABLE database.table MODIFY TTL event_date + INTERVAL 30 DAY;
However, ClickHouse will rewrite all table partitions, including those not impacted by the TTL change. This can be a very expensive operation, especially for large tables.
To avoid impacting the performance of our database, we can instead set materialize_ttl_after_modify to 0 and clear up old partitions manually.
This avoids the huge performance impact of rewriting all table partitions, but does mean there is additional manual effort.
For example:
set materialize_ttl_after_modify=0;
ALTER TABLE database.table MODIFY TTL event_date + INTERVAL 30 DAY;
ALTER TABLE database.table DROP PARTITION 202205;
ALTER TABLE database.table DROP PARTITION 202206;
ALTER TABLE database.table DROP PARTITION 202207;
Analyze local files with clickhouse-local
clickhouse-local is like running a temporary ClickHouse server that only lasts for your session. It's great for exploring local files to quickly experiment with data, without needing to set up a proper ClickHouse deployment.
It's possible to use to clickhouse-local to analyze files of structured data directly from the local file system.
SELECT count() FROM file('final.ndjson');
SELECT count()
FROM file('final.ndjson')
Query id: a0a1f4b5-40cb-4125-b68b-4ed978c41576
┌─count()─┐
│ 100000 │
└─────────┘
1 row in set. Elapsed: 0.659 sec. Processed 55.38 thousand rows, 96.97 MB (84.04 thousand rows/s., 147.16 MB/s.)
SELECT countDistinct(public_ip) FROM file('final.ndjson');
SELECT countDistinct(public_ip)
FROM file('final.ndjson')
Query id: 21df7ca5-e3bf-4010-b2a0-bf8b854502d2
┌─uniqExact(public_ip)─┐
│ 71 │
└──────────────────────┘
1 row in set. Elapsed: 0.225 sec. Processed 77.53 thousand rows, 96.45 MB (345.22 thousand rows/s., 429.46 MB/s.)
You can create tables from the local file if you want to do more than one analysis on the data. The table is destroyed when your clickhouse-local session ends.
CREATE TABLE auxiliar Engine=MergeTree() ORDER BY tuple() AS SELECT * FROM file('final.ndjson');
CREATE TABLE auxiliar
ENGINE = MergeTree
ORDER BY tuple() AS
SELECT *
FROM file('final.ndjson')
Query id: a1732be5-a912-41a5-bf8e-e524db8f12f4
Ok.
0 rows in set. Elapsed: 0.486 sec. Processed 100.00 thousand rows, 161.88 MB (205.73 thousand rows/s., 333.03 MB/s.)
SHOW CREATE TABLE auxiliar;
SHOW CREATE TABLE auxiliar
Query id: dffbcd4b-2c08-4d07-916c-b8e1b668c202
│ CREATE TABLE _local.auxiliar
(
`timestamp_iso8601` Nullable(DateTime64(9)),
`host` Nullable(String),
`public_ip` Nullable(String),
`request_method` Nullable(String),
`request_path` Nullable(String),
`status` Nullable(Int64),
`body_bytes_sent` Nullable(Int64),
`request_length` Nullable(Int64),
`first_byte` Nullable(Float64),
`request_time` Nullable(Float64),
`lambda_name` Nullable(String),
`lambda_region` Nullable(String),
`path_type` Nullable(String),
`hit_level` Nullable(String),
`hit_state` Nullable(String),
`error_details` Nullable(String),
`owner_id` Nullable(String),
`project_id` Nullable(String),
`target_path` Nullable(String),
`deployment_plan` Nullable(String),
`lambda_duration` Nullable(Float64),
`lambda_billed_duration` Nullable(Int64),
`lambda_memory_size` Nullable(Int64),
`http_user_agent` Nullable(String),
`full_vercel_id` Nullable(String),
`dc` Nullable(String),
`public_ip_country` Nullable(String),
`public_ip_city` Nullable(String),
`asn_id` Nullable(String),
`asn_name` Nullable(String)
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 8192 │
1 row in set. Elapsed: 0.001 sec.
SELECT count(), status - status % 100 AS status_range FROM auxiliar GROUP BY status_range;
SELECT
count(),
status - (status % 100) AS status_range
FROM auxiliar
GROUP BY status_range
Query id: 2685e0d4-827a-4306-8598-5d6e589dbd15
┌─count()─┬─status_range─┐
│ 74000 │ 200 │
│ 5000 │ 400 │
│ 21000 │ 300 │
└─────────┴──────────────┘
3 rows in set. Elapsed: 0.015 sec.
Add a default value for new columns
When you add a new column to a table, ClickHouse will add it with the default value:
CREATE TABLE local
ENGINE = MergeTree
ORDER BY number AS
SELECT *
FROM numbers(1000000);
ALTER TABLE local
ADD COLUMN IF NOT EXISTS `date` DateTime;
OPTIMIZE TABLE local FINAL; -- To speed up the mutation / lazy way to know it has finished
SELECT *
FROM local
LIMIT 10
Query id: b5fedb97-a1c8-475f-a674-0b1658c8e889
┌─number─┬────────────────date─┐
│ 0 │ 1970-01-01 01:00:00 │
│ 1 │ 1970-01-01 01:00:00 │
│ 2 │ 1970-01-01 01:00:00 │
│ 3 │ 1970-01-01 01:00:00 │
│ 4 │ 1970-01-01 01:00:00 │
│ 5 │ 1970-01-01 01:00:00 │
│ 6 │ 1970-01-01 01:00:00 │
│ 7 │ 1970-01-01 01:00:00 │
│ 8 │ 1970-01-01 01:00:00 │
│ 9 │ 1970-01-01 01:00:00 │
└────────┴─────────────────────┘
To change the default value for old rows you need to declare the default in the column definition:
ALTER TABLE local
ADD COLUMN IF NOT EXISTS `new_date` DateTime DEFAULT now();
OPTIMIZE TABLE local FINAL;
SELECT *
FROM local
LIMIT 10
Query id: b5ff3afd-78f7-4ea3-8d43-adc7fe14f0a0
┌─number─┬────────────────date─┬────────────new_date─┐
│ 0 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 1 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 2 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 3 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 4 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 5 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 6 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 7 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 8 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
│ 9 │ 1970-01-01 01:00:00 │ 2022-09-23 13:53:38 │
└────────┴─────────────────────┴─────────────────────┘
10 rows in set. Elapsed: 0.002 sec.
Note that this means that new rows will also get the default value if it's not declared on insertion.
ALTER TABLE local
MODIFY COLUMN `new_date` DateTime DEFAULT yesterday();
INSERT INTO local(number) VALUES (999999999);
SELECT *
FROM local
WHERE number = 999999999
Query id: 02527ad6-4644-42ff-8755-8869a9df30fa
┌────number─┬────────────────date─┬────────────new_date─┐
│ 999999999 │ 1970-01-01 01:00:00 │ 2022-09-22 00:00:00 │
└───────────┴─────────────────────┴─────────────────────┘
Top comments (0)