Skip to content

Elasticsearch Mapping Configuration for Searching⚓︎

Summary⚓︎

The business requirement is very straight forward - I want a search for video game or videogame or video gaming to return all documents with a name similar to video gaming. Specifically:

  • Search for Video Game matching documents when name is video games
  • Search for Video Game matching documents when name is Video gaming
  • Search for video game matching docuents when name is videogame

Mapping⚓︎

## First we create the index template with the settings
POST /_template/my-index-template
{
  "index_patterns": [
    "my-index"
  ],
  "settings": {
    "analysis": {
      "analyzer": {
        "ngram_analyser": {
          "type": "custom",
          "tokenizer": "standard",
          "char_filter": [
            "remove_space",
            "remove_dash"
          ],
          "filter": [
            "lowercase",
            "english_stemmer",
            "english_stopwords",
            "ngram_filter"
          ]
        },
        "edge_ngram_analyser": {
          "type": "custom",
          "tokenizer": "standard",
          "char_filter": [
            "remove_space",
            "remove_dash"
          ],
          "filter": [
            "lowercase",
            "english_stemmer",
            "english_stopwords",
            "edge_ngram_filter"
          ]
        }
      },
      "filter": {
        "english_stemmer": {
          "type": "stemmer",
          "name": "english"
        },
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 8,
          "max_gram": 12
        },
        "ngram_filter": {
          "type": "ngram",
          "min_gram": 8,
          "max_gram": 9
        },
        "english_stopwords": {
          "type": "stop",
          "stopwords": "_english_"
        }
      },
      "char_filter": {
        "remove_space": {
          "type": "pattern_replace",
          "pattern": "\\s+",
          "replacement": ""
        },
        "remove_dash": {
          "type": "pattern_replace",
          "pattern": "\\-",
          "replacement": ""
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "fields": {
          "ngram_analyser": {
            "type": "text",
            "analyzer": "ngram_analyser"
          },
          "edge_ngram_analyser": {
            "type": "text",
            "analyzer": "edge_ngram_analyser"
          }
        }
      }
    }
  }
}

## We delete any old test indexes
DELETE my-index

## We post in the documents that we want to find
POST /my-index/_doc/1
{"name" : "videogame" }
POST /my-index/_doc/2
{"name" : "video games" }
POST /my-index/_doc/3
{"name" : "video gaming" }
POST /my-index/_doc/4
{"name" : "video-game" }

Now we setup a simple multi-field search to search across the three name fields. We can put on a fuzziness of 1 to catch mis-spellings.

python
GET /my-index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "multi_match": {
            "type": "best_fields",
            "query": "video gaming",
            "fields": [
              "name",
              "name.edge_ngram_analyser",
              "name.ngram_analyser"
            ],
            "fuzziness": 1
          }
        }
      ]
    }
  }
}