專案分析1


資料集



CREATE table users (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(30) NOT NULL,
    email VARCHAR(120) NOT NULL,
    age INT NOT NULL,
    created_at DATETIME
);

INSERT INTO users (name, email, age, created_at)
VALUES
('Jack Hung', 'jackh32@gmail.com', 20, '2019-07-24 17:11:01'),
('Tony Liu', 'tonykk@gmail.com', 62, '2020-06-03 17:11:01'),
('Amy Chang', 'amychang@gmail.com', 32, '2020-05-11 17:11:01');

CREATE table products (
    id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(30) NOT NULL,
    price INT NOT NULL,
    weight DECIMAL(5, 2),
    category VARCHAR(20),
    created_at DATETIME
);

INSERT INTO products (name, price, weight, category, created_at)
VALUES 
('華速 intel i3 筆電', 20000, 2.12, 'NB', '2020-05-11 17:11:01'),
('Mac Pro 筆電', 62000, 1.4, 'NB', '2020-05-11 17:11:01'),
('微興電競筆電', 32000, 3.00, 'NB', '2020-05-11 17:11:01'),
('戈林冰箱', 22000, 13.78, '3C', '2020-05-11 17:11:01'),
('三力冰箱', 52000, 23.18, '3C', '2020-05-11 17:11:01'),
('C 語言入門', 420, 0.31, 'Book', '2020-05-11 17:11:01'),
('python3 實戰', 580, 0.28, 'Book', '2020-05-11 17:11:01'),
('JavaScript 英雄', 1000, 0.12, 'Book', '2020-05-11 17:11:01'),
('Java 資料分析', 340, -1, 'Book', '2020-05-11 17:11:01'),
('python 資料分析', 640, -0.43, 'Book', '2020-05-11 17:11:01');

CREATE table orders (
    id INT PRIMARY KEY AUTO_INCREMENT,
    amount INT NOT NULL,
    -- 先宣告欄位,再設定外鍵 FOREIGN KEY REFERENCES
    customer_id INT,
    created_at DATETIME,
    FOREIGN KEY (customer_id) REFERENCES users(id)
);


INSERT INTO orders (amount, customer_id, created_at)
VALUES
(188420, 1, '2020-05-11 17:11:01'),
(54000, 3, '2019-04-13 17:11:01'),
(104420, 2, '2020-06-21 17:11:01'),
(52420, 2, '2020-05-01 17:11:01'),
(104000, 2, '2020-07-11 17:11:01'),
(32420, 2, '2019-03-24 17:11:01');


CREATE table order_details (
    id INT PRIMARY KEY AUTO_INCREMENT,
    -- 先宣告欄位,再設定外鍵 FOREIGN KEY REFERENCES
    order_id INT,
    product_id INT,
    created_at DATETIME,
    FOREIGN KEY (order_id) REFERENCES orders(id),
    FOREIGN KEY (product_id) REFERENCES products(id)
);

INSERT INTO order_details (order_id, product_id, created_at)
VALUES
(1, 1, '2020-05-11 17:11:01'),
(1, 2, '2020-05-11 17:11:01'),
(1, 3, '2020-05-11 17:11:01'),
(1, 4, '2020-05-11 17:11:01'),
(1, 5, '2020-05-11 17:11:01'),
(1, 6, '2020-05-11 17:11:01'),
(2, 3, '2020-04-13 17:11:01'),
(2, 4, '2020-04-13 17:11:01'),
(3, 5, '2020-06-21 17:11:01'),
(3, 6, '2020-06-21 17:11:01'),
(3, 5, '2020-06-21 17:11:01'),
(4, 5, '2020-05-01 17:11:01'),
(4, 6, '2020-05-01 17:11:01'),
(5, 5, '2020-07-11 17:11:01'),
(5, 5, '2020-07-11 17:11:01'),
(6, 6, '2020-03-24 17:11:01'),
(6, 3, '2020-03-24 17:11:01');

視窗函式


視窗函式(Windows function),用來細部分析使用的函式,將資料進行更細緻的細分成一個個窗格(Frame)方便更細部的計算。

視窗函式可以為以下兩種:
1.彙總函式(SUM、AVG 等)
2.專用函式:RANK()排名函式

<視窗函式> OVER (PARTITION BY 欄位 ORDER BY)

透過 category 進行分類並使用 RANK() 和進行排名


> 透過PARTITION將資料分類成3C、BOOK、NB三個FRAMES,分別處理
SELECT name, price, category, RANK() OVER(PARTITION BY category ORDER BY price) FROM products;
![](https://static.coderbridge.com/img/smpss96271/de94266c08ac4a3aa9371eddfcea63fe.png)

不使用 PARTITION 進行總排名


> 把所有資料視為一大個FRAME來做RANK()的排名。
SELECT name, price, category, RANK() OVER(ORDER BY price) FROM products;
![](https://static.coderbridge.com/img/smpss96271/5129e94d005f405dbb7e84ef905863a4.png)

category 當作窗格進行累加價格


> 用category分類並累價格。
SELECT name, price, category, SUM(price) OVER(PARTITION BY category ORDER BY price) AS aggregate_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/72af36ed77ec48dd86c4a65194c5d44e.png)

移動平均計算


> 將tuple的price與前兩筆資料的price進行平均得此筆資料的平均值。另一種寫法:OVER (ORDER BY id ROWS 2 PRECEDING)
SELECT id, name, price, category,
AVG(price) OVER (ORDER BY id ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/ebd5e57814014b72a3148fbbe1b0615c.png) > 將tuple的price與後一筆資料的price進行平均得此筆資料的平均值。
SELECT id, name, price, category,
AVG(price) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS moving_avg_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/89804072120c4bd3a24240f1f4452f1d.png)

小計、總計 GROUP 運算子


>ROLLUP,可以讓我們根據條件進行分組小計和總計的計算。
SELECT name, category, SUM(price) FROM products GROUP BY category, name WITH ROLLUP;
![](https://static.coderbridge.com/img/smpss96271/847f231d420b44a283298f260dfba1c2.png)

常用營收分析日期函式


>搭配時間函式將訂單的資料進行分析,獲得日、月、年營收統計。 >>DATE() 取出日期時間中日期的部分:YYYY-MM-DD >>EXTRACT() 日期函式可以取出 TIMESTAMP 的日期部分(年 YEAR、月 MONTH等)
SELECT DATE('2020-03-24 17:11:01');

日營收分析

SELECT
    DATE(created_at),
    COUNT(*),
    SUM(amount),
    AVG(amount)
FROM orders
GROUP BY DATE(created_at)
ORDER BY DATE(created_at);

月營收平均

SELECT
    EXTRACT(YEAR_MONTH FROM DATE(created_at)),
    COUNT(*),
    SUM(amount),
    AVG(amount)
FROM orders
GROUP BY EXTRACT(MONTH FROM DATE(created_at))
ORDER BY DATE(created_at);

年營收平均

SELECT
    EXTRACT(YEAR FROM DATE(created_at)),
    COUNT(*),
    SUM(amount),
    AVG(amount)
FROM orders
GROUP BY EXTRACT(YEAR FROM DATE(created_at))
ORDER BY DATE(created_at);

類別營收

SELECT products.category,SUM(price) FROM orders,order_details,products 
WHERE orders.id=order_details.order_id and products.id=order_details.product_id 
GROUP BY products.category;






你可能感興趣的文章

圈套

圈套

Ajax/ JSONP/ CORS 相關筆記

Ajax/ JSONP/ CORS 相關筆記

迭代陣列 forEach, for of, for in, map

迭代陣列 forEach, for of, for in, map






留言討論