Elasticsearch.js analyzer error using custom analyzer - SEO

ServerStorm
February 11, 2017
242 views
0 votes
2 Answers

Using the latest version of the elasticsearch.js and trying to create a custom path analyzer when indexing and creating the mapping for some posts.

The goal is creating keywords out of each segment of the path. However as a start simply trying to get the analyzer working.

Here is the elasticsearch.js create_mapped_index.js, you can see the custom analyzer near the top of the file:

var client = require('./connection.js');

client.indices.create({
index: "wcm-posts",
body: {
    "settings": {
        "analysis": {
            "analyzer": {
                "wcm_path_analyzer": {
                    "tokenizer": "wcm_path_tokenizer",
                    "type": "custom"
                }
            },
            "tokenizer": {
                "wcm_path_tokenizer": {
                    "type": "pattern",
                    "pattern": "/"
                }
            }
        }
    },
    "mappings": {
        "post": {
            "properties": {
                "id": { "type": "string", "index": "not_analyzed" },
                "titles": {
                    "type": "object",
                    "properties": {
                        "main": { "type": "string" },
                        "subtitle": { "type": "string" },
                        "alternate": { "type": "string"  },
                        "concise": { "type": "string" },
                        "seo": { "type": "string" }
                    }
                },
                "tags": {
                    "properties": {
                        "id": { "type": "string", "index": "not_analyzed" },
                        "name": { "type": "string", "index": "not_analyzed" },
                        "slug": { "type": "string" }
                    },
                },
                "main_taxonomies": {
                    "properties": {
                        "id": { "type": "string", "index": "not_analyzed" },
                        "name": { "type": "string", "index": "not_analyzed" },
                        "slug": { "type": "string", "index": "not_analyzed" },
                        "path": { "type": "string", "index": "wcm_path_analyzer" }
                    },
                },
                "categories": {
                    "properties": {
                        "id": { "type": "string", "index": "not_analyzed" },
                        "name": { "type": "string", "index": "not_analyzed" },
                        "slug": { "type": "string", "index": "not_analyzed" },
                        "path": { "type": "string", "index": "wcm_path_analyzer" }
                    },
                },
                "content_elements": {
                    "dynamic": "true",
                    "type": "nested",
                    "properties": {
                        "content": { "type": "string" }
                    }
                }
            }
        }
    }
  }
}, function (err, resp, respcode) {
    console.log(err, resp, respcode);
});

If the call to wcm_path_analyzer is set to “non_analyzed” or index is omitted the index, mapping and insertion of posts work.

As soon as I try to use the custom analyzer on the main_taxonomy and categories path fields, like shown in the json above, I get this error:

  response: '{"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"wrong value for index [wcm_path_analyzer] for field [path]"}],"type":"mapper_parsing_exception","reason":"Failed to parse mapping [post]: wrong value for index [wcm_path_analyzer] for field [path]","caused_by":{"type":"mapper_parsing_exception","reason":"wrong value for index [wcm_path_analyzer] for field [path]"}},"status":400}',
toString: [Function],
toJSON: [Function] } { error:
{ root_cause: [ [Object] ],
 type: 'mapper_parsing_exception',
 reason: 'Failed to parse mapping [post]: wrong value for index [wcm_path_analyzer] for field [path]',
 caused_by:
  { type: 'mapper_parsing_exception',
    reason: 'wrong value for index [wcm_path_analyzer] for field [path]' } },
  status: 400 } 400

Here is an example of the two objects that need the custom analyzer on the path field. I pulled this example, after inserting 15 posts into the elasticsearch index when not using the custom analyzer:

 "main_taxonomies": [
        {
          "id": "123",
          "type": "category",
          "name": "News",
          "slug": "news",
          "path": "/News/"
        }
      ],
      "categories": [
        {
          "id": "157",
          "name": "Local News",
          "slug": "local-news",
          "path": "/News/Local News/",
          "main": true
        },

To this point, I had googled similar questions and most said that people were missing putting the analyzers in settings and not adding the parameters to the body. I believe this is correct.

I have also reviewed the elasticsearch.js documentation and tried to create a:

client.indices.putSettings({})

But for this to be used the index needs to exist with the mappings or it throws an error ‘no indices found’

Not sure where to go from here? Your suggestions are appreciated.

Answers

Chosen as BEST ANSWER
- ServerStorm
- February 12, 2017 at 12:30 am
- 0 votes
0
So I got it working ... I think that the json objects were too complex or it was the change of adding the analyzer to the field mappings that did the trick.

first I flattened out:

To:
```
"main_taxonomies_path": "/News/",
"categories_paths": [ "/News/Local/", "/Business/Local/" ],
"search_tags": [ "montreal-3","laval-4" ],
```
Then I updated the analyzer to:
```
"settings": {
        "analysis": {
            "analyzer": {
                "wcm_path_analyzer": {
                    "tokenizer": "wcm_path_tokenizer",
                    "type": "custom"
                }
            },
            "tokenizer": {
                "wcm_path_tokenizer": {
                    "type": "pattern",
                    "pattern": "/",
                    "replacement": ","
                }
            }
        }
    },
```
Notice that the analyzer 'type' is set to custom.

Then when mapping theses flattened fields:
```
"main_taxonomies_path": { "type": "string", "analyzer": "wcm_path_analyzer" },
"categories_paths": { "type": "string", "analyzer": "wcm_path_analyzer" },
"search_tags": { "type": "string" },
```
which when searching yields for these fields:
```
 "main_taxonomies_path": "/News/",
 "categories_paths": [ "/News/Local News/",  "/Business/Local Business/" ],
 "search_tags": [ "montreal-2", "laval-3" ],
```
So the custom analyzer does what it was set to do in this situation.

I'm not sure if I could apply type object to the main_taxonomies_path and categories_paths, so I will play around with this and see.

I will be refining the pattern searches to format the results differently but happy to have this working.

For completeness I will put my final custom pattern analyzer, mapping and results, once I've completed this.

Regards, Steve

(Edit)

So the final analyzer is:

var client = require('./connection.js');

client.indices.create({
  index: "wcm-posts",
  body: {
    "settings": {
        "analysis": {
            "analyzer": {
                "wcm_path_analyzer": {
                    "type" : "pattern",
                    "lowercase": true,
                    "pattern": "/" 
                }
            }
        }
    },
    "mappings": {
        "post": {
            "properties": {
                "id": { "type": "string", "index": "not_analyzed" },
                "client_id": { "type": "string", "index": "not_analyzed" },
                "license_id": { "type": "string", "index": "not_analyzed" },
                "origin_id": { "type": "string" },
 ...
 ...
                "origin_slug": { "type": "string" },
                "main_taxonomies_path": { "type": "string", "analyzer": "wcm_path_analyzer", "search_analyzer": "standard" },
                "categories_paths": { "type": "string", "analyzer": "wcm_path_analyzer", "search_analyzer": "standard" },
                "search_tags": { "type": "string" },
                // See the custom analyzer set here --------------------------^

I did determine that at least for the path or pattern analyzers that complex nested or objects cannot be used. The flattened fields set to “type”: “string” was the only way to get this to work.

I ended up not needing a custom tokenizer as the pattern analyzer is full featured and already includes a tokenizer.

I chose to use the pattern analyzer as it breaks on the pattern leaving individual terms whereas the path segments the path in different ways but does not create individual terms ( I hope I’m correct in saying this. I base it on the documentation ).

Hope this helps someone else!

Steve

Please signup or login to give your own answer.

Click here to cancel reply.

Elasticsearch.js analyzer error using custom analyzer – SEO

Answers