I have the following table:
province_id | owner_id | game_id | resource_production | valid_from | valid_until | static_province_id |
---|---|---|---|---|---|---|
auto incremented Integer | integer | integer | integer | timestamp | timestamp | integer |
I have two indexes:
- test_valid_from(game_id, owner_id, valid_from, valid_until)
- test_valid_until(game_id, owner_id, valid_until, valid_from)
Every time the production of a province changes the valid_until timestamp of the old provinces gets set to the current timestamp. Also a new province is inserted with the current timestamp in the valid_from field. If the province is the newest province of its static_province_id in this game valid_until is null.
My Goal
I want to have a sub 1 second query that can get me the total resource production for every change in resource production of an owner in a specified game.
My first approach
1. Query
SELECT
a.game_id,
a.valid_from,
a.owner_id,
SUM(b.resource_production)
FROM
(select distinct valid_from, owner_id, game_id from province) a
JOIN province b
ON a.game_id = b.game_id
AND a.owner_id = b.owner_id
AND (
(
b.valid_from < a.valid_from
and a.valid_from < b.valid_until
)
OR a.valid_from = b.valid_from
OR (
b.valid_until IS NULL
AND b.valid_from < a.valid_from))
where a.game_id=1 and a.owner_id = 1
GROUP BY a.game_id, a.owner_id, a.valid_from
Explain
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "1773449.97"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"nested_loop": [
{
"table": {
"table_name": "a",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "b",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"a.game_id",
"a.owner_id"
],
"rows_examined_per_scan": 241,
"rows_produced_per_join": 1146881,
"filtered": "22.66",
"index_condition": "(((`testing`.`b`.`valid_from` < `a`.`valid_from`) and (`a`.`valid_from` < `testing`.`b`.`valid_until`)) or (`testing`.`b`.`valid_from` = `a`.`valid_from`) or ((`testing`.`b`.`valid_until` is null) and (`testing`.`b`.`valid_from` < `a`.`valid_from`)))",
"cost_info": {
"read_cost": "1265065.52",
"eval_cost": "114688.15",
"prefix_cost": "1773449.97",
"data_read_per_join": "70M"
},
"used_columns": [
"province_id",
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id",
"static_province_id"
]
}
}
]
}
}
}
Result
game_id | valid_from | owner_id | SUM(b.resource_production) |
---|---|---|---|
1 | ‘2022-10-25 23:02:13’ | 1 | 7829 |
1 | ‘2022-10-26 00:22:14’ | 1 | 7836 |
… | … | … | … |
The query works, but it is way to slow as it takes about 17 seconds.
My second approach
i have read that the mysql optimizer might not use the index in multiple or conditions as it is the case in my first query with the valid_from and valid_until.
2.Query
select g.game_id, g.owner_id, g.valid_from, sum(g.rp) from (
select a_b.game_id, a_b.owner_id, a.resource_production as rp, a_b.valid_from from(select distinct valid_from, owner_id, game_id from province) a_b
join province a
on a.game_id = a_b.game_id
AND a.owner_id = a_b.owner_id
AND
(
a.valid_from < a_b.valid_from
and a_b.valid_from < a.valid_until
)
Union
select b_b.game_id, b_b.owner_id, b.resource_production as rp, b_b.valid_from from(select distinct valid_from, owner_id, game_id from province) b_b
join province b
on b.game_id = b_b.game_id
AND b.owner_id = b_b.owner_id
AND b.valid_from = b_b.valid_from
Union
select c_b.game_id, c_b.owner_id, c.resource_production as rp, c_b.valid_from from(select distinct valid_from, owner_id, game_id from province) c_b
join province c
on c.game_id = c_b.game_id
AND c.owner_id = c_b.owner_id
AND (
c.valid_until IS NULL
AND c_b.valid_from < c.valid_from )) g
where g.game_id=1 and g.owner_id=1
group by g.game_id, g.owner_id, g.valid_from
Explain
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "76805.35"
},
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "g",
"access_type": "ALL",
"rows_examined_per_scan": 682692,
"rows_produced_per_join": 682692,
"filtered": "100.00",
"cost_info": {
"read_cost": "8536.15",
"eval_cost": "68269.20",
"prefix_cost": "76805.35",
"data_read_per_join": "20M"
},
"used_columns": [
"game_id",
"owner_id",
"rp",
"valid_from"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"union_result": {
"using_temporary_table": true,
"select_id": 8,
"table_name": "<union2,4,6>",
"access_type": "ALL",
"query_specifications": [
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "1773449.97"
},
"nested_loop": [
{
"table": {
"table_name": "a_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "a",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"a_b.game_id",
"a_b.owner_id"
],
"rows_examined_per_scan": 241,
"rows_produced_per_join": 562138,
"filtered": "11.11",
"index_condition": "((`testing`.`a`.`valid_from` < `a_b`.`valid_from`) and (`a_b`.`valid_from` < `testing`.`a`.`valid_until`))",
"cost_info": {
"read_cost": "1265065.52",
"eval_cost": "56213.89",
"prefix_cost": "1773449.97",
"data_read_per_join": "34M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id"
]
}
}
]
}
},
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 4,
"cost_info": {
"query_cost": "33802.62"
},
"nested_loop": [
{
"table": {
"table_name": "b_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 5,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "b",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id",
"valid_from"
],
"key_length": "16",
"ref": [
"b_b.game_id",
"b_b.owner_id",
"b_b.valid_from"
],
"rows_examined_per_scan": 4,
"rows_produced_per_join": 89841,
"filtered": "100.00",
"cost_info": {
"read_cost": "22460.27",
"eval_cost": "8984.11",
"prefix_cost": "33802.62",
"data_read_per_join": "5M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"game_id"
]
}
}
]
}
},
{
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 6,
"cost_info": {
"query_cost": "34611.03"
},
"nested_loop": [
{
"table": {
"table_name": "c_b",
"access_type": "ALL",
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"cost_info": {
"read_cost": "264.25",
"eval_cost": "2094.00",
"prefix_cost": "2358.25",
"data_read_per_join": "490K"
},
"used_columns": [
"valid_from",
"owner_id",
"game_id"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 7,
"cost_info": {
"query_cost": "2189.32"
},
"duplicates_removal": {
"using_filesort": false,
"table": {
"table_name": "province",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_from",
"used_key_parts": [
"game_id",
"owner_id"
],
"key_length": "12",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 20940,
"rows_produced_per_join": 20940,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "95.32",
"eval_cost": "2094.00",
"prefix_cost": "2189.32",
"data_read_per_join": "1M"
},
"used_columns": [
"province_id",
"owner_id",
"valid_from",
"game_id",
"static_province_id"
]
}
}
}
}
}
},
{
"table": {
"table_name": "c",
"access_type": "ref",
"possible_keys": [
"ix_gid_plid_vf_vl",
"test_valid_until",
"test_valid_from"
],
"key": "test_valid_until",
"used_key_parts": [
"game_id",
"owner_id",
"valid_until"
],
"key_length": "17",
"ref": [
"c_b.game_id",
"c_b.owner_id",
"const"
],
"rows_examined_per_scan": 4,
"rows_produced_per_join": 30713,
"filtered": "33.33",
"index_condition": "((`testing`.`c`.`valid_until` is null) and (`c_b`.`valid_from` < `testing`.`c`.`valid_from`))",
"cost_info": {
"read_cost": "23037.70",
"eval_cost": "3071.39",
"prefix_cost": "34611.03",
"data_read_per_join": "1M"
},
"used_columns": [
"owner_id",
"resource_production",
"valid_from",
"valid_until",
"game_id"
]
}
}
]
}
}
]
}
}
}
}
}
}
}
Result
game_id | owner_id | valid_from | sum(g.rp) |
---|---|---|---|
1 | 1 | ‘2022-10-25 23:02:13’ | 28222 |
1 | 1 | ‘2022-10-26 00:42:13’ | 28229 |
… | … | … | … |
The 2. query neither works nor is it fast enough at about 14 seconds. It doesn’t work because it returns the wrong total production for every timestamp. I hope that you can help me because I am out of ideas.
2
Answers
Deleting the two indexes, and adding this one seems to improve performance (When I was able to reproduce a correct set of test-data…. )
The suggested index is:
You can test it by running this DBFIDDLE, and look at the times produce by
SHOW PROFILES
.After running it, comment the
CREATE INDEX
that I am suggesting to use, and uncomment the currently commented indexes (the 2 index you have currently), and re-run the fiddle again, to see the changed times inSHOW PROFILES
.This suggested index is covering, because all used fields are contained in the index.
It would be nice to know if, and how much, this improves …
Start with moving these into the first ‘derived’ table:
Have this index (as Luuk suggests):
Then see my questions in the Comments above.
After that, I may have more advice.