应用场景 #
Duckdb是一进程内分析数据库,它可以在无需维护分布式多服务器系统的情况下处理出人意料的大型数据集
使用方法 #
- 首先您需要安装Duckdb的包
pip install duckdb
- 新建连接
在连接地址填写完整的路径名即可, 如D:/abc/my.duckdb
- duckdb采用类postgrep的SQL语法, 也有一些特殊的语法, 比如:
从csv文件导入数据并建表
CREATE TABLE 'cars' AS SELECT * FROM read_csv_auto('cars.csv');
CREATE TEMP TABLE t1 AS SELECT * FROM read_csv('path/file.csv');
COPY lineitem FROM 'lineitem.csv';
交叉表查询
CREATE TABLE Cities (
Country VARCHAR, Name VARCHAR, Year INTEGER, Population INTEGER
);
INSERT INTO Cities VALUES
('NL', 'Amsterdam', 2000, 1005),
('NL', 'Amsterdam', 2010, 1065),
('NL', 'Amsterdam', 2020, 1158),
('US', 'Seattle', 2000, 564),
('US', 'Seattle', 2010, 608),
('US', 'Seattle', 2020, 738),
('US', 'New York City', 2000, 8015),
('US', 'New York City', 2010, 8175),
('US', 'New York City', 2020, 8772);
PIVOT Cities
ON Year
USING sum(Population);
PIVOT Cities
ON Year
USING sum(Population)
GROUP BY Country;
PIVOT Cities
ON Year IN (2000, 2010)
USING sum(Population)
GROUP BY Country;
PIVOT Cities
ON Country, Name
USING sum(Population);
PIVOT Cities
ON Year
USING sum(Population) AS total, max(Population) AS max
GROUP BY Country;
PIVOT Cities
ON Year
USING sum(Population)
GROUP BY Country, Name;