The task is to exclude row by a group where the "word" field is NULL
Tables
CREATE TABLE `search_phrases` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`phrase` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`frequency` bigint NOT NULL,
PRIMARY KEY (`id`),
KEY `search_phrases_phrase_index` (`phrase`),
KEY `search_phrases_frequency_index` (`frequency`)
) ENGINE=InnoDB AUTO_INCREMENT=1000001 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `search_words` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`word` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `search_words_word_unique` (`word`)
) ENGINE=InnoDB AUTO_INCREMENT=128287 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `search_combinable` (
`search_phrase_id` int unsigned NOT NULL,
`search_word_id` int unsigned NOT NULL,
KEY `search_combinable_search_phrase_id_foreign` (`search_phrase_id`),
KEY `search_combinable_search_word_id_foreign` (`search_word_id`),
CONSTRAINT `search_combinable_search_phrase_id_foreign` FOREIGN KEY (`search_phrase_id`) REFERENCES `search_phrases` (`id`),
CONSTRAINT `search_combinable_search_word_id_foreign` FOREIGN KEY (`search_word_id`) REFERENCES `search_words` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `requested_words` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`word` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`ts` timestamp NULL DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `requested_words_word_index` (`word`)
) ENGINE=InnoDB AUTO_INCREMENT=436 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
/* some sample data */
INSERT INTO search_phrases VALUES (1, 'men''s shorts', 1031588), (2, 'red shorts', 456000), (3, 'green shorts', 436000);
INSERT INTO search_words VALUES (1, 'men''s'), (2, 'shorts'), (3, 'red'), (4, 'green');
INSERT INTO search_combinable VALUES (1, 1), (1, 2), (2, 3), (2, 2), (3, 4), (3, 2);
INSERT INTO requested_words VALUES (1, 'men''s', '2023-03-01 07:45:49'), (2, 'shorts', '2023-03-01 07:45:49'), (3, 'red', '2023-03-01 07:45:49');
As a result, there should be phrases that contain words from the requested words table
select sp.id, sp.phrase, sp.frequency from search_phrases as sp
inner join search_combinable as sc on sc.search_phrase_id = sp.id
inner join search_words as sw on sw.id = sc.search_word_id
inner join requested_words as rw on rw.word = sw.word
where sp.id not in (
select sp.id from search_phrases as temp_sp
inner join search_combinable as sc on sc.search_phrase_id = temp_sp.id
inner join search_words as sw on sw.id = sc.search_word_id
left join requested_words as rw on rw.word = sw.word
where rw.word is null and temp_sp.id = sp.id
)
group by sp.id
order by sp.frequency desc
The execution time is about 29 seconds. It grows in proportion to the increase in the "request_table"
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|----|-------------|---------|------------|--------|-------------------------------------------------------------------------------------|--------------------------------------------|---------|-----------------------------|------|----------|----------------------------------------------|
| 1 | SIMPLE | rw | | index | requested_words_word_index | requested_words_word_index | 1022 | | 435 | 100 | Using index; Using temporary; Using filesort |
| 1 | SIMPLE | sw | | eq_ref | PRIMARY,search_words_word_unique | search_words_word_unique | 1022 | laravel.rw.word | 1 | 100 | Using index |
| 1 | SIMPLE | sc | | ref | search_combinable_search_phrase_id_foreign,search_combinable_search_word_id_foreign | search_combinable_search_word_id_foreign | 4 | laravel.sw.id | 17 | 100 | |
| 1 | SIMPLE | sp | | eq_ref | PRIMARY,search_phrases_phrase_index,search_phrases_frequency_index | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 | |
| 1 | SIMPLE | temp_sp | | eq_ref | PRIMARY | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 | Using where; Not exists; Using index |
| 1 | SIMPLE | sc | | ref | search_combinable_search_phrase_id_foreign,search_combinable_search_word_id_foreign | search_combinable_search_phrase_id_foreign | 4 | laravel.sc.search_phrase_id | 2 | 100 | |
| 1 | SIMPLE | sw | | eq_ref | PRIMARY | PRIMARY | 4 | laravel.sc.search_word_id | 1 | 100 | |
| 1 | SIMPLE | rw | | ref | requested_words_word_index | requested_words_word_index | 1022 | laravel.sw.word | 4 | 100 | Using where; Using index |
Dataset
search_phrases
id | phrase | frequency |
---|---|---|
1 | men’s shorts | 1031588 |
2 | red shorts | 456000 |
3 | green shorts | 436000 |
search_combinable
search_phrase_id | search_word_id |
---|---|
1 | 1 |
1 | 2 |
2 | 3 |
2 | 2 |
3 | 4 |
3 | 2 |
search_words
id | word |
---|---|
1 | men’s |
2 | shorts |
3 | red |
4 | green |
requested_words
id | word | ts |
---|---|---|
1 | men’s | 2023-03-01 07:45:49 |
2 | shorts | 2023-03-01 07:45:49 |
3 | red | 2023-03-01 07:45:49 |
in the response, I expect to see the phrases: men’s shorts and red shorts
P.S
After adding the primary key to the search_combinable table
CREATE TABLE `search_combinable` (
`search_phrase_id` int unsigned NOT NULL,
`search_word_id` int unsigned NOT NULL,
PRIMARY KEY (`search_word_id`,`search_phrase_id`),
KEY `search_combinable_search_word_id_search_phrase_id_index` (`search_word_id`,`search_phrase_id`),
KEY `search_combinable_search_phrase_id_foreign` (`search_phrase_id`),
CONSTRAINT `search_combinable_search_phrase_id_foreign` FOREIGN KEY (`search_phrase_id`) REFERENCES `search_phrases` (`id`),
CONSTRAINT `search_combinable_search_word_id_foreign` FOREIGN KEY (`search_word_id`) REFERENCES `search_words` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
select sp.id, sp.phrase, sp.frequency
from search_phrases as sp
where not exists (
select 1
from search_phrases sp2
left join search_combinable as sc on sc.search_phrase_id = sp2.id
left join search_words as sw on sw.id = sc.search_word_id
left join requested_words as rw on rw.word = sw.word
where sp2.id = sp.id
and rw.word is null
)
order by sp.frequency desc;
id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | SIMPLE | sw | index | PRIMARY | search_words_word_unique | 1022 | 43359 | 100 | Using index; Using temporary; Using filesort | ||
1 | SIMPLE | rw | ref | requested_words_word_index | requested_words_word_index | 1022 | laravel.sw.word | 4 | 100 | Using index | |
1 | SIMPLE | sc | ref | PRIMARY,search_combinable_search_word_id_search_phrase_id_index,search_combinable_search_phrase_id_foreign | search_combinable_search_word_id_search_phrase_id_index | 4 | laravel.sw.id | 18 | 100 | Using index | |
1 | SIMPLE | sp | eq_ref | PRIMARY,search_phrases_phrase_index,search_phrases_frequency_index | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 |
2
Answers
Remove all the redundancy, all of those tables are involved in the query – you don’t also need another set of joins etc. in the where clause:
Your junction table
search_combinable
has no PK and is not indexed properly. It should be:It may be that the PK and reverse should be the other way around. You can experiment with your dataset and known queries.
The best I have come up with so far is:
With your test dataset it returns 47 rows in 0.93 – 0.97 seconds.
I managed to squeeze a tiny bit off by pre-joining
search_words
andrequested_words
but YMMV:0.68 – 0.70 seconds