原文链接:https://xiets.blog.csdn.net/article/details/132348920

版权声明:原创文章禁止转载

专栏目录:Elasticsearch 专栏(总目录)

ES 搜索 API 官网文档:Search APIs

先创建一个索引,并写入一些文档用于搜索示例:

PUT /hotel // 酒店索引

{

"mappings": {

"properties": {

"name": { // 名称

"type": "text",

"fields": {

"keyword_name": {

"type": "keyword"

}

}

},

"price": { // 价格

"type": "double"

},

"decoration_date": { // 装修日期

"type": "date",

"format": "yyyy-MM-dd"

},

"international": { // 是否国际酒店 (国际酒店可以接待外宾)

"type": "boolean"

},

"location": { // 地理位置坐标

"type": "geo_point"

},

"tag": { // 标签

"type": "keyword"

}

}

}

}

写入一些文档示例:

PUT /hotel/_doc/001

{

"name": "龙门国际大酒店",

"price": 300.00,

"decoration_date": "2024-06-06",

"international": true,

"location": {

"lat": 22.5377084,

"lon": 113.9308322

},

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

PUT /hotel/_doc/002

{

"name": "龙门精选假日酒店",

"price": 200.00,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

PUT /hotel/_doc/003

{

"name": "龙门客栈古风酒店",

"price": 350.00,

"decoration_date": "2021-10-08",

"international": false,

"location": {

"lat": 22.53396,

"lon": 114.0554156

}

}

PUT /hotel/_doc/004

{

"name": "悦来时尚宾馆",

"price": 99.00,

"decoration_date": "2023-08-08",

"international": false,

"location": {

"lat": 22.5325899,

"lon": 113.922899

},

"tag": ["冰箱", "微波炉", "洗衣机"]

}

PUT /hotel/_doc/005

{

"name": "悦来文雅大酒店",

"price": 550.00,

"decoration_date": "2020-11-11",

"international": true,

"location": {

"lat": 22.4829366,

"lon": 114.0913511

},

"tag": ["WIFI", "停车场"]

}

PUT /hotel/_doc/006

{

"name": "烟雨楼文雅假日酒店",

"price": 600.00,

"decoration_date": "2024-12-12",

"international": false,

"location": {

"lat": 22.9266059,

"lon": 113.8363914

},

"tag": ["WIFI", "微波炉", "洗衣机"]

}

1. 搜索辅助

1.1 返回部分字段

搜索结果中的文档数据封装在响应的 _source 字段中,搜索时可以只返回指定字段,请求格式:

POST //_search

{

"_source": ["field1", "field2"], // 指定返回的字段

"query": {

// ... 查询条件

}

}

返回部分字段搜索示例:

POST /hotel/_search

{

"_source": ["name", "price"], // 只返回 name 和 price 这两个字段

"query": {

"term": { // 搜索国际酒店

"international": {

"value": true

}

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 3,

"relation": "eq"

},

"max_score": 0.6931471,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 0.6931471,

"_source": {

"name": "龙门国际大酒店",

"price": 300

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 0.6931471,

"_source": {

"name": "龙门精选假日酒店",

"price": 200

}

},

{

"_index": "hotel",

"_id": "005",

"_score": 0.6931471,

"_source": {

"name": "悦来文雅大酒店",

"price": 550

}

}

]

}

}

1.2 结果计数与分页

如果搜索数据匹配的结果非常多,需要使用分页返回结果,需要告诉客户端结果文档的总数。ES 的 _count API 可以查询搜索匹配的文档总数量。

_count API 请求格式:

POST //_count

{

"query": {

// ... 查询条件

}

}

// 返回格式

{

"count": 123, // 匹配的文档数量

"_shards": { // 匹配文档的分配信息

// ...

}

}

使用分页搜索,需要告诉 ES 从匹配的文档列表中的第几个文档开始返回,以及最多要返回多少个文档,搜索参数 from 和 size 就是分别表示开始返回的文档位置和数量,from 默认值为 0,size 默认值为 10。

分页搜索 请求格式:

POST //_search

{

"from": 0, // 返回结果的起始文档位置

"size": 20, // 最多需要返回的文档数量

"query": {

// ... 查询条件

}

}

结果计数与分页请求示例:

POST /hotel/_count

{

"query": {

"term": {

"tag": { // 搜索有 WIFI 的酒店的数量

"value": "WIFI"

}

}

}

}

// 返回

{

"count": 4,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

}

}

POST /hotel/_search

{

"from": 2, // 从索引为 2 的位置开始返回

"size": 1, // 最多返回 1 个文档

"query": {

"term": {

"tag": {

"value": "停车场" // 搜索有停车场的酒店

}

}

}

}

// 返回

{

"took": 3,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 3,

"relation": "eq"

},

"max_score": 0.75783724,

"hits": [

{

"_index": "hotel",

"_id": "005",

"_score": 0.75783724,

"_source": {

"name": "悦来文雅大酒店",

"price": 550,

"decoration_date": "2020-11-11",

"international": true,

"location": {

"lat": 22.4829366,

"lon": 114.0913511

},

"tag": ["WIFI", "停车场"]

}

}

]

}

}

1.3 性能分析

使用 ES 执行一个搜索请求,如果查询条件非常复杂,搜索请求可能响应比较慢。ES 提供了 profile 功能,可以详细列出执行一个搜索请求的细节,以及每一个步骤的耗时,可以帮助用户对查询条件进行性能分析,以便优化查询条件。

启用 profile 功能,只需要在搜索请求中增加 "profile": true 的参数:

POST //_search

{

"profile": true,

"query": {

// ... 查询条件

}

}

性能分析搜索请求示例:

POST /hotel/_search

{

"profile": true, // 启用性能分析

"query": {

"match": {

"name": "龙门" // match 搜索不能用 {"value": "keyword"} 的形式提供值, 而是直接提供

}

}

}

执行上面的搜索请求后,将返回一段冗长的信息。除了在 hits 节点返回匹配的文档外,主要还返回了一个 profile 节点,该节点详细记录了搜索的过程。从下面的返回信息中可以看出,全文搜索 “龙门”,拆分成了两个子搜索 “name:龙 name:门”,并给出了子搜索结果的详细信息。

完整的搜索响应:

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 3,

"relation": "eq"

},

"max_score": 1.4251624,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1.4251624,

"_source": {

"name": "龙门国际大酒店",

"price": 300,

"decoration_date": "2024-06-06",

"international": true,

"location": {

"lat": 22.5377084,

"lon": 113.9308322

},

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1.3494902,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "003",

"_score": 1.3494902,

"_source": {

"name": "龙门客栈古风酒店",

"price": 350,

"decoration_date": "2021-10-08",

"international": false,

"location": {

"lat": 22.53396,

"lon": 114.0554156

}

}

}

]

},

"profile": {

"shards": [

{

"id": "[hNLgGgpaQoq_u57-x6tz1A][hotel][0]",

"searches": [

{

"query": [

{

"type": "BooleanQuery",

"description": "name:龙 name:门",

"time_in_nanos": 459669,

"breakdown": {

"set_min_competitive_score_count": 0,

"match_count": 3,

"shallow_advance_count": 0,

"set_min_competitive_score": 0,

"next_doc": 4916,

"match": 418,

"next_doc_count": 3,

"score_count": 3,

"compute_max_score_count": 0,

"compute_max_score": 0,

"advance": 31626,

"advance_count": 2,

"count_weight_count": 0,

"score": 5501,

"build_scorer_count": 4,

"create_weight": 226208,

"shallow_advance": 0,

"count_weight": 0,

"create_weight_count": 1,

"build_scorer": 191000

},

"children": [

{

"type": "TermQuery",

"description": "name:龙",

"time_in_nanos": 150875,

"breakdown": {

"set_min_competitive_score_count": 0,

"match_count": 0,

"shallow_advance_count": 6,

"set_min_competitive_score": 0,

"next_doc": 0,

"match": 0,

"next_doc_count": 0,

"score_count": 3,

"compute_max_score_count": 6,

"compute_max_score": 13708,

"advance": 1417,

"advance_count": 5,

"count_weight_count": 0,

"score": 3417,

"build_scorer_count": 6,

"create_weight": 60917,

"shallow_advance": 5707,

"count_weight": 0,

"create_weight_count": 1,

"build_scorer": 65709

}

},

{

"type": "TermQuery",

"description": "name:门",

"time_in_nanos": 85873,

"breakdown": {

"set_min_competitive_score_count": 0,

"match_count": 0,

"shallow_advance_count": 6,

"set_min_competitive_score": 0,

"next_doc": 0,

"match": 0,

"next_doc_count": 0,

"score_count": 3,

"compute_max_score_count": 6,

"compute_max_score": 2790,

"advance": 2708,

"advance_count": 5,

"count_weight_count": 0,

"score": 791,

"build_scorer_count": 6,

"create_weight": 36333,

"shallow_advance": 1459,

"count_weight": 0,

"create_weight_count": 1,

"build_scorer": 41792

}

}

]

}

],

"rewrite_time": 11708,

"collector": [

{

"name": "SimpleTopScoreDocCollector",

"reason": "search_top_hits",

"time_in_nanos": 22249

}

]

}

],

"aggregations": [],

"fetch": {

"type": "fetch",

"description": "",

"time_in_nanos": 87833,

"breakdown": {

"load_stored_fields": 18042,

"load_source": 751,

"load_stored_fields_count": 3,

"next_reader_count": 2,

"load_source_count": 3,

"next_reader": 9916

},

"debug": {

"stored_fields": [

"_id",

"_routing",

"_source"

]

},

"children": [

{

"type": "FetchSourcePhase",

"description": "",

"time_in_nanos": 1459,

"breakdown": {

"process_count": 3,

"process": 1167,

"next_reader": 292,

"next_reader_count": 2

},

"debug": {

"fast_path": 3

}

},

{

"type": "StoredFieldsPhase",

"description": "",

"time_in_nanos": 1792,

"breakdown": {

"process_count": 3,

"process": 1292,

"next_reader": 500,

"next_reader_count": 2

}

}

]

}

}

]

}

}

1.4 评分分析

当执行搜索请求搜索文档时,搜索引擎会对匹配的文档打分,如果没有指定排序规则,ES 将使用默认的打分算法对文档排序。有时我们需要知道指定搜索条件对某个文档具体的打分详情(包括不匹配时的详情),以便对搜索条件进行优化。ES 提供了 _explain API 可以查看搜索时的匹配详情。

_explain API 请求格式,查询「指定搜索条件」对「某个具体文档」的匹配详情:

POST //_explain/

{

"query": {

// ... 搜索条件

}

}

_explain API 请求示例:

POST /hotel/_explain/001

{

"query": {

"match": {

"name": "哈哈"

}

}

}

// 返回

{

"_index": "hotel",

"_id": "001",

"matched": false, // 不匹配

"explanation": {

"value": 0,

"description": "no matching term",

"details": []

}

}

POST /hotel/_explain/001

{

"query": {

"match": {

"name": "龙门"

}

}

}

// 返回

{

"_index": "hotel",

"_id": "001",

"matched": true,

"explanation": {

"value": 1.4251624, // 匹配总分 (所有子查询匹配得分之和)

"description": "sum of:",

"details": [

{

"value": 0.7125812, // 子查询匹配得分

"description": "weight(name:龙 in 0) [PerFieldSimilarity], result of:",

"details": [

{

"value": 0.7125812,

"description": "score(freq=1.0), computed as boost * idf * tf from:",

"details": [

{

"value": 2.2,

"description": "boost",

"details": []

},

{

"value": 0.6931472,

"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",

"details": [

{

"value": 3,

"description": "n, number of documents containing term",

"details": []

},

{

"value": 6,

"description": "N, total number of documents with field",

"details": []

}

]

},

{

"value": 0.46728975,

"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",

"details": [

{

"value": 1,

"description": "freq, occurrences of term within document",

"details": []

},

{

"value": 1.2,

"description": "k1, term saturation parameter",

"details": []

},

{

"value": 0.75,

"description": "b, length normalization parameter",

"details": []

},

{

"value": 7,

"description": "dl, length of field",

"details": []

},

{

"value": 7.5,

"description": "avgdl, average length of field",

"details": []

}

]

}

]

}

]

},

{

"value": 0.7125812,

"description": "weight(name:门 in 0) [PerFieldSimilarity], result of:",

"details": [

{

"value": 0.7125812,

"description": "score(freq=1.0), computed as boost * idf * tf from:",

"details": [

{

"value": 2.2,

"description": "boost",

"details": []

},

{

"value": 0.6931472,

"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",

"details": [

{

"value": 3,

"description": "n, number of documents containing term",

"details": []

},

{

"value": 6,

"description": "N, total number of documents with field",

"details": []

}

]

},

{

"value": 0.46728975,

"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",

"details": [

{

"value": 1,

"description": "freq, occurrences of term within document",

"details": []

},

{

"value": 1.2,

"description": "k1, term saturation parameter",

"details": []

},

{

"value": 0.75,

"description": "b, length normalization parameter",

"details": []

},

{

"value": 7,

"description": "dl, length of field",

"details": []

},

{

"value": 7.5,

"description": "avgdl, average length of field",

"details": []

}

]

}

]

}

]

}

]

}

}

2. 搜索匹配

ES 针对不同的数据类型提供了丰富的搜索匹配功能,如进行完全匹配的 term 查询,用于分词匹配的 match 查询,用于大小/范围匹配的 range 查询等。

2.1 查询所有文档: match_all

match_all 查询用于查询所有文档。ES 使用 match_all 查询时,不对文档打分(默认所有文档都满分匹配)。查询所有文档通常需要和分页查询一起使用。

match_all 查询请求格式:

POST //_search

{

"query": {

"from": 0, // 返回结果的起始文档位置

"size": 20, // 最多需要返回的文档数量

"match_all": { // 查询所有文档

"boost": 1.0 // 满分设置为 1.0 分 (所有文档的匹配分数都设置为该分数)

}

}

}

match_all 查询请求示例:

POST /hotel/_search

{

"from": 1,

"size": 3,

"query": {

"match_all": {

"boost": 1.0

}

}

}

// 返回

{

"took": 0,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 6,

"relation": "eq"

},

"max_score": 1,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "003",

"_score": 1,

"_source": {

"name": "龙门客栈古风酒店",

"price": 350,

"decoration_date": "2021-10-08",

"international": false,

"location": {

"lat": 22.53396,

"lon": 114.0554156

}

}

},

{

"_index": "hotel",

"_id": "004",

"_score": 1,

"_source": {

"name": "悦来时尚宾馆",

"price": 99,

"decoration_date": "2023-08-08",

"international": false,

"location": {

"lat": 22.5325899,

"lon": 113.922899

},

"tag": ["冰箱", "微波炉", "洗衣机"]

}

}

]

}

}

2.2 全匹配: term、terms

term 查询用于全匹配查询,term 匹配的字段类型支持 数值型、布尔型、日期时间、关键字类型 (包括这些类型的数组类型)。terms 查询则是 term 的扩展形式,用于同时查询一个或多个值(多个值之间是逻辑或的关系)。

term 和 terms 查询的请求格式:

POST //_search

{

"query": {

"term": { // term 查询

"": { // 要搜索的字段

"value": "" // 搜索值, 也可以直接 "": "" 的形式提供搜索值

}

}

}

}

POST //_search

{

"query": {

"terms": { // terms 查询

"": ["", ""] // 搜索字段和多个搜索值, 多个值之间是逻辑或的关系 (即只要匹配其中一个即可)

}

}

}

term 查询请求示例:

POST /hotel/_search

{

"query": {

"term": { // term 查询

"name.keyword_name": { // 搜索 name 字段的 keyword_name 子字段,

"value": "龙门精选假日酒店" // name 字段是 text 类型不能使用 term 查询,

} // 而 name.keyword_name 子字段是 keyword 类型, 可以使用 term 查询。

}

}

}

// 返回

{

"took": 0,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 1,

"relation": "eq"

},

"max_score": 1.540445,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": 1.540445,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

}

]

}

}

terms 查询请求示例:

POST /hotel/_search

{

"query": {

"terms": { // terms 查询

"tag": ["冰箱", "微波炉"] // 搜索有 冰箱 或 微波炉 的酒店

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 4,

"relation": "eq"

},

"max_score": 1,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1,

"_source": {

"name": "龙门国际大酒店",

"price": 300,

"decoration_date": "2024-06-06",

"international": true,

"location": {

"lat": 22.5377084,

"lon": 113.9308322

},

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "004",

"_score": 1,

"_source": {

"name": "悦来时尚宾馆",

"price": 99,

"decoration_date": "2023-08-08",

"international": false,

"location": {

"lat": 22.5325899,

"lon": 113.922899

},

"tag": ["冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "006",

"_score": 1,

"_source": {

"name": "烟雨楼文雅假日酒店",

"price": 600,

"decoration_date": "2024-12-12",

"international": false,

"location": {

"lat": 22.9266059,

"lon": 113.8363914

},

"tag": ["WIFI", "微波炉", "洗衣机"]

}

}

]

}

}

2.3 范围匹配: range

range 查询用于范围匹配,一般只用于数值类型和日期类型等可比较大小的字段类型的查询。

range 查询需要指定查询的字段值边界,有以下请求参数用于指定范围边界:

gt:大于;gte:大于等于;lt:小于;lte:小于等于。

range 查询请求格式:

POST //_search

{

"query": {

"ragne": { // range 查询

"": { // 要查询的字段

"gt": "value1", // 范围匹配条件

"lte": "value2"

}

}

}

}

range 查询请求示例:

POST /hotel/_search

{

"_source": ["name", "price"], // 返回部分字段

"query": {

"range": { // range 查询

"price": { // 查询价格在 [200, 300] 之间的酒店

"gte": 200,

"lte": 300

}

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 1,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1,

"_source": {

"name": "龙门国际大酒店",

"price": 300

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店",

"price": 200

}

}

]

}

}

2.4 字段值是否存在: exists

ES 写入文档时,可以只写入部分字段,或者某个字段写入 null 值。而 exists 查询就是用于查询某个字段是否存在。

字段存在(满足exists匹配)的条件有:

值存在且不为null;值是数组,但不是空数组,并且数组元素不全为null。

exists 查询请求格式:

POST //_search

{

"query": {

"exists": { // exists 查询

"field": "" // 要查询的字段

}

}

}

exists 查询请求示例:

POST /hotel/_search

{

"_source": ["name", "price", "tag"],

"query": {

"exists": { // 查询有 tag 字段值文档

"field": "tag"

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 5,

"relation": "eq"

},

"max_score": 1,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1,

"_source": {

"name": "龙门国际大酒店",

"price": 300,

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "004",

"_score": 1,

"_source": {

"name": "悦来时尚宾馆",

"price": 99,

"tag": ["冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "005",

"_score": 1,

"_source": {

"name": "悦来文雅大酒店",

"price": 550,

"tag": ["WIFI", "停车场"]

}

},

{

"_index": "hotel",

"_id": "006",

"_score": 1,

"_source": {

"name": "烟雨楼文雅假日酒店",

"price": 600,

"tag": ["WIFI", "微波炉", "洗衣机"]

}

}

]

}

}

2.5 布尔查询: bool

布尔查询是一种复合查询,可以根据多个普通的子查询使用逻辑与或非组合出满足各种复杂条件的查询。一个布尔查询包含了多个子查询,每一个子查询的结果都是一个布尔值,而多个子查询的结果再根据逻辑与或非的关系又组合成最终的一个布尔值用于最终确定是否匹配文档。布尔查询对文档的打分是按各子查询的匹配程度对文档就行综合打分。

布尔查询中多个子查询结果支持的逻辑组合方式:

must:逻辑与,必须匹配所有查询条件;should:逻辑或,至少匹配其中一个查询条件;must_not:逻辑非,必须不匹配所有查询条件;filter:必须匹配所有查询条件,但该条件的匹配程度不参与打分计算(相当于不参与打分的must)。

bool 查询请求格式:

POST //_search

{

"query": {

"bool": { // 布尔查询, 布尔查询是多个子查询结果的逻辑组合,

"must|should|must_not|filter": [ // 所以这里是多个子查询组成的数组, 节点名称表示的是子查询结果的逻辑组合方式。

{

"term|match|range|exists|geo_distance|bool": {

// 单个子查询 (支持各种查询类型, 包括 分词匹配的match查询、地理距离查询 和 bool查询)

}

},

{

"term|match|range|exists|geo_distance|bool": {

// 子查询

}

}

]

}

}

}

2.5.1 逻辑与: must 查询

bool 查询的 must 查询示例:

POST /hotel/_search

{

"_source": ["name", "international"],

"query": {

"bool": { // bool 查询

"must": [ // 查询名称包含“龙门”的国际酒店

{

"match": {

"name": "龙门"

}

},

{

"term": {

"international": {

"value": true

}

}

}

]

}

}

}

// 返回

{

"took": 2,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 2.1183095,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 2.1183095,

"_source": {

"name": "龙门国际大酒店",

"international": true

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 2.0426373,

"_source": {

"name": "龙门精选假日酒店",

"international": true

}

}

]

}

}

2.5.2 逻辑或: should 查询

bool 查询的 should 查询示例:

POST /hotel/_search

{

"_source": ["name", "price"],

"query": {

"bool": { // bool 查询

"should": [ // 查询名称包含“假日”或“精选”的酒店

{

"match": {

"name": "假日"

}

},

{

"match": {

"name": "精选"

}

}

]

}

}

}

// 返回

{

"took": 6,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 5.003666,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": 5.003666,

"_source": {

"name": "龙门精选假日酒店",

"price": 200

}

},

{

"_index": "hotel",

"_id": "006",

"_score": 1.9034984,

"_source": {

"name": "烟雨楼文雅假日酒店",

"price": 600

}

}

]

}

}

2.5.3 逻辑非: must_not 查询

bool 查询的 must_not 查询示例:

POST /hotel/_search

{

"_source": ["name", "price"],

"query": {

"bool": { // bool 查询

"must_not": [ // 搜索名称不包含“国际”, 也不包含“酒店”的酒店

{

"match": {

"name": "国际"

}

},

{

"match": {

"name": "酒店"

}

}

]

}

}

}

// 返回

{

"took": 6,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 1,

"relation": "eq"

},

"max_score": 0,

"hits": [

{

"_index": "hotel",

"_id": "004",

"_score": 0,

"_source": {

"name": "悦来时尚宾馆",

"price": 99

}

}

]

}

}

2.5.4 过滤条件: filter 查询

bool 查询的 filter 查询示例:

POST /hotel/_search

{

"_source": ["name", "tag"],

"query": {

"bool": { // bool 查询

"must": [ // 名称必须包含“龙门”

{

"match": {

"name": "龙门"

}

}

],

"filter": [ // 并且必须有停车场 (该条件只过滤, 不参与打分)

{

"term": {

"tag": {

"value": "停车场"

}

}

}

]

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 1.4251624,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1.4251624,

"_source": {

"name": "龙门国际大酒店",

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1.3494902,

"_source": {

"name": "龙门精选假日酒店",

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

}

]

}

}

3. 全文搜索/分词匹配

全文搜索一般指对文本(text)类型数据的搜索。与 term、range 等全匹配的结构化搜索不同,全文搜索可以部分匹配。全文搜索首先对搜索词进行分析(分词),根据分析结果再构建出多个子查询。

全文搜索包含 match、multi_match 和 match_phrase 等查询方式。

3.1 match 查询

match 查询是全文搜索的主要方式。

match 查询请求格式:

POST //_search

{

"query": {

"match": { // match 搜索

"": "" // 查询的字段和搜索词

}

}

}

POST //_search

{

"query": {

"match": { // match 搜索

"": { // 查询的字段

"query": "" // 搜索词也可以写在单独对象中, 并且可以添加 match 查询参数

}

}

}

}

match 查询请求示例:

POST /hotel/_search

{

"_source": ["name"],

"query": {

"match": { // match 查询

"name": "精选假日"

}

}

}

// 返回

{

"took": 3,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 5.003666,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": 5.003666,

"_source": {

"name": "龙门精选假日酒店"

}

},

{

"_index": "hotel",

"_id": "006",

"_score": 1.9034984,

"_source": {

"name": "烟雨楼文雅假日酒店"

}

}

]

}

}

3.2 multi_match 查询

multi_match 查询可以使用同一个查询关键词同时查询多个 text 类型的字段(只要其中一个字段有匹配的即可),相当于使用 bool 查询的 should 查询封装了两个 match 子查询。

multi_match 查询请求格式:

POST //_search

{

"query": {

"multi_match": { // multi_match 搜索

"query": "", // 查询的搜索词

"fields": ["", ""] // 查询的字段 (所有字段都必须是 text 类型, 只要有其中一个字段匹配了搜索词即可)

}

}

}

搜索示例中只有一个字段是 text 类型,这里不做示例。

3.3 match_phrase 查询

match_phrase 查询用于匹配切确的短语或临近的词语,也就是希望不分词,或者分词后中间最多只能间隔指定字数的距离。

match_phrase 查询请求格式:

POST //_search

{

"query": {

"match_phrase": { // match_phrase 搜索

"": "" // 查询的字段和搜索词

}

}

}

POST //_search

{

"query": {

"match_phrase": { // match_phrase 搜索

"": { // 查询的字段

"query": "", // 查询的搜索词

"slop": 2 // 临近匹配, 分词之间允许的最大字数距离

}

}

}

}

match_phrase 查询请求示例:

POST /hotel/_search

{

"_source": ["name"],

"query": {

"match_phrase": { // match_phrase 查询

"name": { // 查询字段

"query": "精选假日" // 查询短语 (不部分匹配)

}

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 1,

"relation": "eq"

},

"max_score": 5.0036654,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": 5.0036654,

"_source": {

"name": "龙门精选假日酒店"

}

}

]

}

}

match_phrase 查询请求示例(临近的词语):

POST /hotel/_search

{

"_source": ["name"],

"query": {

"match_phrase": { // match_phrase 查询

"name": { // 查询字段

"query": "文雅酒店", // 查询短语

"slop": 1 // 分词后中间最多可以有1个字的距离

}

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 1,

"relation": "eq"

},

"max_score": 1.7047069,

"hits": [

{

"_index": "hotel",

"_id": "005",

"_score": 1.7047069,

"_source": {

"name": "悦来文雅大酒店" // "文雅"和"酒店"之间有1个字的距离, 匹配搜索。没有匹配到 "烟雨楼文雅假日酒店", 因为"文雅"和"酒店"之间有2个字的距离。

}

}

]

}

}

4. 基于地理位置的查询

ES 支持地理位置坐标和形状区域数据类型的存储和搜索。其中地理坐标存储了经纬度,可以根据位置距离搜索数据。

geo_point 类型表示地理坐标类型,地理类型官网相关链接:

Geopoint field type:地理坐标类型。Geoshape field type:地理形状类型。

对于 geo_point 类型的字段,有 3 种查询方式,分别为 geo_distance查询、geo_bounding_box查询 和 geo_polygon查询。

其中 geo_distance 查询是根据指定中心坐标点,查询指定距离范围内的文档。

geo_distance 查询请求格式:

POST //_search

{

"query": {

"geo_distance": {

"distance": "5km", // 距离范围

"": { // 需要查询的 geo_point 类型的字段, 提供一个中心点坐标值

"lat": 22.5298891, // 纬度, 正数表示北纬, 负数表示南纬

"lon": 113.9449817 // 经度, 正数表示东经, 负数表示西经

}

}

}

}

geo_distance 查询请求示例,搜索 2km 内的酒店:

POST /hotel/_search

{

"query": {

"geo_distance": { // geo_distance 查询

"distance": "2km", // 距离范围

"location": { // "location" 字段的中心点坐标

"lat": 22.5298891, // 纬度, 正数表示北纬, 负数表示南纬

"lon": 113.9449817 // 经度, 正数表示东经, 负数表示西经

}

}

}

}

// 返回

{

"took": 2,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 2,

"relation": "eq"

},

"max_score": 1,

"hits": [

{

"_index": "hotel",

"_id": "001",

"_score": 1,

"_source": {

"name": "龙门国际大酒店",

"price": 300,

"decoration_date": "2024-06-06",

"international": true,

"location": {

"lat": 22.5377084,

"lon": 113.9308322

},

"tag": ["WIFI", "停车场", "冰箱", "微波炉", "洗衣机"]

}

},

{

"_index": "hotel",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04",

"international": true,

"location": {

"lat": 22.531667,

"lon": 113.9497277

},

"tag": ["WIFI", "停车场", "微波炉", "洗衣机"]

}

}

]

}

}

5. 搜索建议

搜索建议指的是用户在输入搜索词的过程中,系统根据已输入部分给出建议的搜索关键词,也就是自动补全,用户只需要点击其中一个搜索关键词直接进行搜索。要实现搜索建议,需要在搜索过程中,用户每输入一个字,就把已输入部分发到服务端查询匹配的搜索建议词列表,因此该场景需要服务端能快速响应请求。通过给出搜索建议,可以避免用户输入错误的关键词,或者引导用户使用更合适的关键词搜索,可以大大提升用户搜索体验和搜索结果的准确度。

ES 可以使用 Completion Suggester 实现搜索建议功能。如果要一个字段支持搜索建议,可以把字段定义为 completion 类型,搜索时使用 suggest 搜索。具体参考官网:Suggesters。

为了方便演示,重新创建一个索引,写入一些文档:

// 创建索引

PUT /hotel_suggest

{

"mappings": {

"properties": { // 映射字段

"name": { // 字段名称

"type": "completion" // 字段类型, 用于搜索建议

}

}

}

}

// 写入文档

PUT /hotel_suggest/_doc/001

{"name": "龙门国际大酒店"}

PUT /hotel_suggest/_doc/002

{"name": "龙门精选假日酒店"}

PUT /hotel_suggest/_doc/003

{"name": "龙门客栈古风酒店"}

PUT /hotel_suggest/_doc/004

{"name": "悦来时尚宾馆"}

PUT /hotel_suggest/_doc/005

{"name": "悦来文雅大酒店"}

PUT /hotel_suggest/_doc/006

{"name": "烟雨楼文雅假日酒店"}

suggest 搜索请求格式:

POST //_search

{

"suggest": { // suggest 搜素

"": { // 自定义的搜索建议的名称, 可同时搜索多个

"prefix": "", // 匹配前缀的关键词 (还有其他匹配方式)

"completion": {

"field": "" // 用于匹配前缀的 completion 类型的字段

}

}

}

}

suggest 搜索可以和 query 搜索一起使用,前者搜索结果封装在响应的 suggest 节点中,后者搜索结果封装在响应的 hits 节点中。

suggest 搜索请求示例:

POST /hotel_suggest/_search

{

"suggest": {

"name_suggest_1": {

"prefix": "龙门",

"completion": {

"field": "name"

}

}

}

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 0,

"relation": "eq"

},

"max_score": null,

"hits": []

},

"suggest": {

"name_suggest_1": [ // 搜索请求中自定义的搜索建议名称

{

"text": "龙门",

"offset": 0,

"length": 2,

"options": [

{

"text": "龙门国际大酒店", // 搜索建议词

"_index": "hotel_suggest",

"_id": "001",

"_score": 1,

"_source": { // 原文档内容

"name": "龙门国际大酒店"

}

},

{

"text": "龙门客栈古风酒店",

"_index": "hotel_suggest",

"_id": "003",

"_score": 1,

"_source": {

"name": "龙门客栈古风酒店"

}

},

{

"text": "龙门精选假日酒店",

"_index": "hotel_suggest",

"_id": "002",

"_score": 1,

"_source": {

"name": "龙门精选假日酒店"

}

}

]

}

]

}

}

6. 按字段值排序

ES 搜索结果默认根据匹配程度降序排序,也可以在搜索请求时指定按照某些字段的值升序或降序排序。query 查询请求中,提供了 sort 子句用于根据指定的字段值排序。

搜索排序请求格式:

POST //_search

{

"query": {

// ... 搜索条件

},

"sort": [ // 排序, 可以按多个字段排序, 优先按数组靠前的元素字段排序

{

"": { // 按某个字段的值排序

"order": "asc|desc" // 升序或降序

}

},

{

"_geo_distance": { // 按地理距离排序

"": { // 排序字段的中心点坐标 (数据类型为 geo_point 的字段)

"lat": 22.5298891, // 纬度, 正数表示北纬, 负数表示南纬

"lon": 113.9449817 // 经度, 正数表示东经, 负数表示西经

},

"order": "asc|desc", // 升序或降序,

"unit": "km", // 距离单位, 用于计算响应中 sort 字段的数值

"distance_type": "plane" // 距离计算算法

}

}

// ...

]

}

6.1 按字段值排序

按字段值排序请求示例:

POST /hotel/_search

{

"_source": ["name", "price", "decoration_date"],

"query": { // 查询条件

"match": {

"name": "龙门"

}

},

"sort": [ // 排序条件

{

"price": { // 优先按价格升序排序

"order": "asc"

}

},

{

"decoration_date": { // 价格相同的, 再按装修日期降序排序

"order": "desc"

}

}

]

}

// 返回

{

"took": 1,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 3,

"relation": "eq"

},

"max_score": null,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": null,

"_source": {

"name": "龙门精选假日酒店",

"price": 200,

"decoration_date": "2023-05-04"

},

"sort": [ 200, 1683158400000 ] // 排序字段值, 相当于排序分数

},

{

"_index": "hotel",

"_id": "001",

"_score": null,

"_source": {

"name": "龙门国际大酒店",

"price": 300,

"decoration_date": "2024-06-06"

},

"sort": [ 300, 1717632000000 ]

},

{

"_index": "hotel",

"_id": "003",

"_score": null,

"_source": {

"name": "龙门客栈古风酒店",

"price": 350,

"decoration_date": "2021-10-08"

},

"sort": [ 350, 1633651200000 ]

}

]

}

}

6.2 按地理距离排序

按地理距离排序请求示例:

POST /hotel/_search

{

"_source": ["name", "location"],

"query": { // 查询条件

"match": {

"name": "龙门"

}

},

"sort": [ // 排序条件

{

"_geo_distance": { // 按地理距离排序

"location": { // "location" 字段的中心点坐标

"lat": 22.5298891, // 纬度, 正数表示北纬, 负数表示南纬

"lon": 113.9449817 // 经度, 正数表示东经, 负数表示西经

},

"order": "asc", // 升序排序

"unit": "km", // 排序距离单位为 km

"distance_type": "plane" // 平面算法

}

}

]

}

// 返回

{

"took": 5,

"timed_out": false,

"_shards": {

"total": 1,

"successful": 1,

"skipped": 0,

"failed": 0

},

"hits": {

"total": {

"value": 3,

"relation": "eq"

},

"max_score": null,

"hits": [

{

"_index": "hotel",

"_id": "002",

"_score": null,

"_source": {

"name": "龙门精选假日酒店",

"location": {

"lat": 22.531667,

"lon": 113.9497277

}

},

"sort": [ 0.5260141384282283 ] // 排序距离, 距离中心 0.526km

},

{

"_index": "hotel",

"_id": "001",

"_score": null,

"_source": {

"name": "龙门国际大酒店",

"location": {

"lat": 22.5377084,

"lon": 113.9308322

}

},

"sort": [ 1.693476752894645 ]

},

{

"_index": "hotel",

"_id": "003",

"_score": null,

"_source": {

"name": "龙门客栈古风酒店",

"location": {

"lat": 22.53396,

"lon": 114.0554156

}

},

"sort": [ 11.351370339175176 ]

}

]

}

}

推荐阅读

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。