Andrey Lutskevich Andrey Lutskevich - 3 months ago 37
PHP Question

Elasticsearch find input word and all synonyms

Using elasticsearch I try find all items by word "skiing".

My mapping (PHP array):

"properties" => [
"title" => [
"type" => "string",
"boost" => 1.0,
"analyzer" => "autocomplete"
]
]


Settings:

"settings"=> [
"analysis" => [
"analyzer" => [
"autocomplete" => [
"type" => "custom",
"tokenizer" => "standard",
"filter" => ["lowercase", "trim", "synonym", "porter_stem"],
"char_filter" => ["html_strip"]
]
],
"filter" => [
"synonym" => [
"type" => "synonym",
"synonyms_path" => "analysis/synonyms.txt"
]
]
]
]


Search query:

[
"index" => "articles",
"body" => [
"query" => [
"filtered" => [
"query" => [
"bool" => [
"must" => [
"indices" => [
"indices" => ["articles"],
"query" => [
"bool" => [
"should" => [
"multi_match" => [
"query" => "skiing",
"fields" => ["title"]
]
]
]
]
]
]
]
]
]
],
"sort" => [
"_score" => [
"order" => "desc"
]
]
],
"size" => 10,
"from" => 0,
"search_type" => "dfs_query_then_fetch",
"explain" => true
];


In the sysnonyms.txt have skiing => xanthic.

I want get all items with "skiing" (because it is input word), "ski" (by porter_stem tokenizer) and then "xanthic" (by synonyms file). But get result only with word "xanthic".

Please, tell me why? How I need configure the index?

Answer

Thanx, but this is decision. I changed mapping:

"properties" => [
    "title" => [
        "type" => "string",
        "boost" => 1.5,
        "analyzer" => "standard",
        "fields" => [
            "english" => [
                "type" => "string",
                "analyzer" => "standard",
                "search_analyzer" => "english",
                "boost" => 1.0
            ],
            "synonym" => [
                "type" => "string",
                "analyzer" => "standard",
                "search_analyzer" => "synonym",
                "boost" => 0.5
            ]
        ]
    ]
]

Settings:

"settings"=> [
    "analysis" => [
        "analyzer" => [
            "synonym" => [
                "type" => "custom",
                "tokenizer" => "standard",
                "filter" => ["lowercase", "trim", "synonym"],
                "char_filter" => ["html_strip"]
            ]
        ],
        "filter" => [
            "synonym" => [
                "type" => "synonym",
                "synonyms_path" => "analysis/synonyms.txt"
            ]
        ]
    ]
]
Comments