ElasticSearch的ngram中文拼音简繁体搜索.docx
《ElasticSearch的ngram中文拼音简繁体搜索.docx》由会员分享,可在线阅读,更多相关《ElasticSearch的ngram中文拼音简繁体搜索.docx(14页珍藏版)》请在冰豆网上搜索。
ElasticSearch的ngram中文拼音简繁体搜索
ElasticSearch的ngram、中文拼音、简繁体搜索
ElasticSearch版本:
elasticsearch-7.3.0
ElasticSearch相关插件安装可以参考:
ElasticSearch服务上创建Mapping
curl-H"Content-Type:
application/json"-XPUT'http:
//192.168.0.1:
9200/book_v2/'-d'
{
"settings":
{
"analysis":
{
"analyzer":
{
"pinyin_analyzer_1":
{
"tokenizer":
"pinyin_tokenizer_1"
},
"pinyin_analyzer_2":
{
"tokenizer":
"pinyin_tokenizer_2"
},
"tsconvert":
{
"tokenizer":
"tsconvert_tokenizer",
"filter":
[
"tsconvert_filter"
]
},
"autocomplete_analyzer":
{
"type":
"custom",
"tokenizer":
"ik_smart",
"filter":
[
"autocomplete_filter"
]
},
"custom_analyzer_1":
{
"type":
"custom",
"tokenizer":
"ik_smart",
"filter":
[
"cus_pinyin_filter_1"
]
},
"custom_analyzer_2":
{
"type":
"custom",
"tokenizer":
"ik_max_word",
"filter":
[
"stconvert_filter"
]
},
"custom_analyzer_3":
{
"type":
"custom",
"tokenizer":
"ik_max_word",
"filter":
[
"tsconvert_filter"
]
}
},
"tokenizer":
{
"pinyin_tokenizer_1":
{
"type":
"pinyin",
"keep_separate_first_letter":
false,
"keep_full_pinyin":
true,
"keep_original":
true,
"limit_first_letter_length":
16,
"lowercase":
true,
"remove_duplicated_term":
true
},
"pinyin_tokenizer_2":
{
"type":
"pinyin",
"keep_separate_first_letter":
true,
"keep_full_pinyin":
false
},
"tsconvert_tokenizer":
{
"type":
"stconvert",
"delimiter":
"#",
"keep_both":
false,
"convert_type":
"t2s"
}
},
"filter":
{
"stconvert_filter":
{
"type":
"stconvert",
"delimiter":
"#",
"keep_both":
false,
"convert_type":
"s2t"
},
"tsconvert_filter":
{
"type":
"stconvert",
"delimiter":
"#",
"keep_both":
false,
"convert_type":
"t2s"
},
"autocomplete_filter":
{
"type":
"edge_ngram",
"min_gram":
1,
"max_gram":
10
},
"cus_pinyin_filter_1":
{
"type":
"pinyin",
"keep_joined_full_pinyin":
true,
"keep_separate_first_letter":
true,
"none_chinese_pinyin_tokenize":
true
}
},
"char_filter":
{
"tsconvert":
{
"type":
"stconvert",
"convert_type":
"t2s"
}
}
}
},
"mappings":
{
"dynamic":
"strict",
"_source":
{
"excludes":
[
"id"
]
},
"properties":
{
"id":
{
"type":
"keyword"
},
"author":
{
"analyzer":
"ik_max_word",
"type":
"text",
"fields":
{
"raw":
{
"type":
"keyword"
},
"pinyin0":
{
"analyzer":
"pinyin",
"type":
"text"
},
"pinyin1":
{
"analyzer":
"pinyin_analyzer_1",
"type":
"text"
},
"pinyin2":
{
"analyzer":
"pinyin_analyzer_2",
"type":
"text"
},
"s2t":
{
"analyzer":
"stconvert",
"type":
"text"
},
"t2s":
{
"analyzer":
"tsconvert",
"type":
"text"
},
"ac":
{
"analyzer":
"autocomplete_analyzer",
"type":
"text"
},
"cus1":
{
"analyzer":
"custom_analyzer_1",
"type":
"text"
},
"cus2":
{
"analyzer":
"custom_analyzer_2",
"type":
"text"
},
"cus3":
{
"analyzer":
"custom_analyzer_3",
"type":
"text"
}
}
},
"title":
{
"analyzer":
"ik_max_word",
"type":
"text"
},
"describe":
{
"analyzer":
"ik_smart",
"type":
"text"
},
"publish_time":
{
"type":
"date",
"format":
"yyyy-MM-ddHH:
mm:
ss||yyyy-MM-dd||epoch_millis"
}
}
}
}
'
ElasticSearch中新增数据
curl-H"Content-Type:
application/json"-XPOST'http:
//192.168.0.1:
9200/book_v2/_doc/1'-d'
{
"id":
"1",
"author":
"张三丰123",
"title":
"太极拳三十天入门到精通123",
"describe":
"手把手、一对一教学、三十天入门到精通,包教包会",
"publish_time":
"2019-08-2217:
48:
16"
}
'
curl-H"Content-Type:
application/json"-XPOST'http:
//192.168.0.1:
9200/book_v2/_doc/2'-d'
{
"id":
"2",
"author":
"张三丰abc",
"title":
"太极拳三十天入门到精通abc",
"describe":
"手把手、一对一教学、三十天入门到精通,包教包会",
"publish_time":
"2019-08-2217:
48:
16"
}
'
curl-H"Content-Type:
application/json"-XPOST'http:
//192.168.0.1:
9200/book_v2/_doc/3'-d'
{
"id":
"3",
"author":
"張三豐",
"title":
"太极拳三十天入门到精通",
"describe":
"手把手、一对一教学、三十天入门到精通,包教包会,龍馬精神",
"publish_time":
"2019-08-2217:
48:
16"
}
'
验证相关分词器
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"pinyin",
"text":
"张三丰"
}
'
"zhang","zsf","san","feng"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"pinyin_analyzer_1",
"text":
"张三丰"
}
'
"张三丰","zhang","zsf","san","feng"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"pinyin_analyzer_2",
"text":
"张三丰"
}
'
"z","zsf","s","f"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"stconvert",
"text":
"张三丰"
}
'
"張三豐"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"tsconvert",
"text":
"張三豐"
}
'
"张三丰"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"autocomplete_analyzer",
"text":
"张三丰"
}
'
"张","张三","张三丰"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"custom_analyzer_1",
"text":
"张三丰"
}
'
"z","zhang","zhangsanfeng","zsf","s","san","f","feng"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"custom_analyzer_2",
"text":
"张三丰"
}
'
"張三豐","張三","三","豐"
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_analyze?
pretty=true'-d'
{
"analyzer":
"custom_analyzer_3",
"text":
"張三豐"
}
'
"张","三","丰"
验证查询
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match":
{
"author":
"张三丰"
}
},
"from":
0,
"size":
10,
"highlight":
{
"pre_tags":
["red\">"],
"post_tags":
[""],
"fields":
{
"author":
{}
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.t2s":
"张三丰"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.pinyin0":
"zhang"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.pinyin1":
"zhang"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.pinyin1":
"张三丰"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.pinyin2":
"zsf"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.cus1":
"zsf"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.cus2":
"丰"
}
}
}
'
curl-H"Content-Type:
application/json"-XGET'http:
//192.168.0.1:
9200/book_v2/_search?
pretty=true'-d'
{
"query":
{
"match_phrase":
{
"author.cus3":
"豐"
}
}
}
'