友情链接:
近日,社区版家族正式发布V2024.5版本,其中,社区开发版系列重磅发布Scope开发版以及StellarDB开发版。
为了可以让大家更进一步了解产品,本系列文章从背景概念开始介绍,深入浅出的为读者介绍Scope的优势以及能力,在上一篇文章中为读者介绍了基础知识、Scope的技术优势以及能力,本篇文章将继续为读者介绍如何安装部署以及使用。
安装教程
友情提示:安装前请仔细查看安装手册注意事项章节,下方内容仅供参考
步骤一 将从官网下载下来的产品包解压后上传至安装环境
步骤二 执行下述命令进行解压,解压后将出现一个镜像tar包
tar -zxf TDH-Scope-Standalone-Community-Transwarp-2024.5-X86_64-final.tar.gz
步骤三 执行下述命令加载镜像
docker load -i scope-2024.5.tar
步骤四 执行下方指令启动容器并运行镜像,运行格式为:
docker run -d --network host -v <本地目录路径>:/opt/transwarp --privileged <镜像名>
-v参数配置了TDH挂载的本地磁盘路径。该路径下会保存产品运行过程中产生的配置conf、数据data、日志log。再次提醒请不要随意改动做好备份,以及确保该路径下没有历史版本的数据文件。
操作示例图
步骤五 容器启动后需等待30s至2分钟
步骤六 浏览器访问管理节点8180端口
打开客户端浏览器(推荐使用Google Chrome浏览器),访问http://host:8180,比如http://172.16.3.108:8180/。访问这个地址,您会看到下面的登录页面。
初次登录以admin的身份登录,密码也是admin。
步骤七 按照向导提示进行集群部署与配置即可
安装完成自助申请许可证即可使用,教程请参考:手册
curl -X PUT "localhost:9200/my_index?pretty" -H 'Content-Type: application/json' -d'{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 3
},
"mappings": {
"default_type_": {
"properties": {
"id": { "type": "integer" },
"title": { "type": "text" },
"body": { "type": "text" },
"date": { "type": "date" },
"views": { "type": "integer" },
"tags": { "type": "keyword" }
}
}
}
}
';
curl -X DELETE "localhost:9200/my_index?pretty";
单条插入
curl -X PUT "localhost:9200/my_index/default_type_/1?pretty" -H 'Content-Type: application/json' -d'
{
"id": 1,
"title": "Scope for Beginners",
"body": "Learn how to use Scope to search and analyze your data",
"date": "2022-05-09",
"views": 1000,
"tags": ["Scope", "search"]
}
';
curl -X POST "localhost:9200/my_index/default_type_/?pretty" -H 'Content-Type: application/json' -d'
{
"id": 2,
"title": "Advanced Scope",
"body": "Take your Scope skills to the next level",
"date": "2022-05-10",
"views": 500,
"tags": ["Scope", "advanced"]
}
';
curl -X POST "localhost:9200/my_index/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 3, "title": "Scope Performance Tuning", "body": "Optimize your Scope cluster for better performance", "date": "2022-05-11","views": 750, "tags": ["Scope", "performance"]}
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 4, "title": "Scope Security", "body": "Learn how to secure your Scope cluster", "date": "2022-05-12", "views": 250, "tags": ["Scope", "security"]}
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_3" } }
{"id": 5, "title": "Scope Monitoring", "body": "Monitor your Scope cluster with the Elastic Stack", "date": "2022-05-13", "views": 100, "tags": ["Scope", "monitoring"]}
';
curl -X GET "localhost:9200/my_index/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"match": {
"body": "Scope"
}
},
"sort": {
"date": { "order": "desc" }
}
}
';
curl -X GET "localhost:9200/my_index/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"range": {
"date": {
"gte": "2022-05-11"
}
}
},
"sort": {
"views": { "order": "asc" }
}
}
';
curl -X GET "localhost:9200/_cluster/health?pretty";
curl -X GET "localhost:9200/_cat/indices/my_index?v";
查看节点状态
curl -X GET "localhost:9200/_cat/nodes?v";
curl -X GET "localhost:9200/_cat/shards/my_index?v";
drop database if exists DDL_Scope_DB CASCADE;
create database if not exists DDL_Scope_DB;
use DDL_Scope_DB;
drop table if exists sql_demo;
create table sql_demo(
id string,
title string has analyzer 'standard',
author string,
price double,
description string has analyzer 'mmseg'
)stored as scope
with shard number 5 replication 3
tblproperties('scope.key.column'='id');
insert into sql_demo select '1', '百年孤独', '加西亚·马尔克斯', 39.80, '一部代表魔幻现实主义文学巅峰的经典小说。' from system.dual;
insert into sql_demo values('2', '围城', '钱钟书', 29.80, '一部中国现代文学经典,讽刺了旧中国知识分子的冷嘲热讽和无可奈何。');
batchinsert into sql_demo batchvalues(
values('3', '骆驼祥子', '老舍', 22.80, '一部反映旧中国社会底层生活的文学作品,展现了社会底层人民的生活和奋斗。'),
values('4', '茶花女', '小仲马', 18.80, '一部法国浪漫主义文学代表作,描绘了一个上层社会女性的生活和爱情。')
);
select * from sql_demo;
select * from sql_demo order by price;
select * from sql_demo where contains(description,'中国');
curl -ushiva:shiva -X GET "localhost:9200/ddl_scope_db.sql_demo/_mapping?pretty";
curl -ushiva:shiva -X GET "local:9200/ddl_scope_db.sql_demo/_settings/?pretty&filter_path=**.number_of_shards,**.number_of_replicas";
curl -ushiva:shiva -X GET "local:9200/ddl_scope_db.sql_demo/_search?pretty";
CreateIndexRequest request = new CreateIndexRequest("create_index_demo");
request.settings(Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 1)
);
CreateIndexRequest indexRequest = request.mapping(
" {\n" +
" \"" + "default_type_" + "\": {\n" +
" \"properties\": {\n" +
" \"c_text\": {\n" +
" \"type\": \"text\"\n" +
" },\n" +
" \"c_string_mf\": {\n" +
" \"type\": \"keyword\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }",
XContentType.JSON);
CreateIndexResponse createIndexResponse = highLevelClient.indices().create(indexRequest,RequestOptions.DEFAULT);
DeleteIndexRequest request = new DeleteIndexRequest("create_index_demo");
AcknowledgedResponse deleteIndexResponse = highLevelClient.indices().delete(request,RequestOptions.DEFAULT);
IndexRequest request = new IndexRequest(
"my_index",
"default_type_",
"6");
String jsonString = "{\n" +
" \"id\": 6,\n" +
" \"title\": \"Scope for Beginners\",\n" +
" \"body\": \"Learn how to use Scope to search and analyze your data\",\n" +
" \"date\": \"2022-05-09\",\n" +
" \"views\": 1000,\n" +
" \"tags\": [\"Scope\", \"search\"]\n" +
"}";
IndexRequest source = request.source(jsonString, XContentType.JSON);
IndexResponse index = highLevelClient.index(source, RequestOptions.DEFAULT);
String jsonString1 = "{\"id\": 3, \"title\": \"Scope Performance Tuning\", \"body\": \"Optimize your Scope cluster for better performance\", \"date\": \"2022-05-11\",\"views\": 750, \"tags\": [\"Scope\", \"performance\"]}";
String jsonString2 = "{\"id\": 4, \"title\": \"Scope Security\", \"body\": \"Learn how to secure your Scope cluster\", \"date\": \"2022-05-12\", \"views\": 250, \"tags\": [\"Scope\", \"security\"]}";
String jsonString3 = "{\"id\": 5, \"title\": \"Scope Monitoring\", \"body\": \"Monitor your Scope cluster with the Elastic Stack\", \"date\": \"2022-05-13\", \"views\": 100, \"tags\": [\"Scope\", \"monitoring\"]}";
BulkRequest request = new BulkRequest();
request.add(new IndexRequest("my_index", "default_type_", "3").source(jsonString1,XContentType.JSON));
request.add(new IndexRequest("my_index", "default_type_", "4").source(jsonString2,XContentType.JSON));
request.add(new IndexRequest("my_index", "default_type_", "5").source(jsonString3,XContentType.JSON));
BulkResponse bulkResponses = highLevelClient.bulk(request,RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.termQuery("body", "how"));
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("body","Monitor Optimize"));
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
SearchSourceBuilder mustQuery = searchSourceBuilder.query(QueryBuilders.boolQuery());
mustQuery.query(QueryBuilders.termQuery("body","cluster"));
mustQuery.query(QueryBuilders.termQuery("date","2022-05-13"));
searchRequest.source(mustQuery);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
在使用搜索引擎的过程中,通常会涉及诸多属于,如segment/doc/term/token/shard/index等等,其中,segment/doc/term/token都是lucene中的概念。理解这些术语有助于更深入的了解和使用搜索引擎。
lucene内部使用的是倒排索引的数据结构,将词项(term)映射到文档(document)。例如下图的3个document,进行分词后可以搜索引擎可以很快速的返回的下方问题的答案
那么是如何实现这一能力的?
将文档切分成一系列有意义的单词(term/token)的过程称之为分词,其中,分词器则负责这一过程,以建立索引进行高效的搜索和分析。
选择一个合适的分词器可以很大程度上提高检索效率,当前比较常见的分词器有以下几种:
不同的分词器会产生不同的分词结果,产生不同的索引,所以相同的查询条件会产生不同的结果。
举例说明生活中全文检索的应用实例:字典。字典的拼音表和部首检字表就相当于字典的索引,对每一个字的解释是非结构化的,如果字典没有音节表和部首检字表,在茫茫辞海中找一个字只能顺序扫描。然而字的某些信息可以提取出来进行结构化处理,比如读音,就比较结构化,分声母和韵母,分别只有几种可以一一列举,于是将读音拿出来按一定的顺序排列,每一项读音都指向此字的详细解释的页数。我们搜索时按结构化的拼音搜到读音,然后按其指向的页数,便可找到我们的非结构化数据——也即对字的解释。
全文检索就是把文本中的内容拆分成若干个关键词,然后根据关键词创建索引。查询时,根据关键词查询索引,最终找到包含关键词的文章。整个过程类似于查字典的过程。
curl -X DELETE "localhost:19200/blog_index1?pretty";
curl -X PUT "localhost:19200/blog_index1?pretty" -H 'Content-Type: application/json' -d'{
"mappings": {
"default_type_": {
"properties": {
"id": {
"type": "integer"
},
"blog_name": {
"type": "text",
"analyzer": "standard"
},
"blog_name_english": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
';
curl -X POST "localhost:19200/blog_index1/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 1, "blog_name": "Scope 介绍", "blog_name_english": "Introduction to Scope"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 2, "blog_name": "Scope 高级搜索", "blog_name_english": "Advanced Searching in Scope"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_3" } }
{ "id": 3, "blog_name": "全文检索技术比较", "blog_name_english": "Comparison of Full-Text Search Technologies"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 4, "blog_name": "Scope 数据聚合", "blog_name_english": "Scope Data Aggregation"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 5, "blog_name": "分布式数据库架构", "blog_name_english": "Introduction to NoSQL Databases"}
';
curl -XGET "localhost:19200/blog_index1/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name_english": "search"
}
}
}';
curl -XGET "localhost:19200/blog_index1/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name": "搜索"
}
}
}';
curl -X DELETE "localhost:9200/blog_index2?pretty";
curl -X PUT "localhost:9200/blog_index2?pretty" -H 'Content-Type: application/json' -d'{
"mappings": {
"default_type_": {
"properties": {
"id": {
"type": "integer"
},
"blog_name": {
"type": "text",
"analyzer": "ik_max_word"
},
"blog_name_english": {
"type": "text",
"analyzer": "standard"
},
"blog_summary": {
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
}
';
curl -X POST "localhost:9200/blog_index2/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 1, "blog_name": "Scope 介绍", "blog_name_english": "Introduction to Scope", "blog_summary": "Scope 是一个分布式搜索引擎,用于快速和可扩展地搜索和分析大量数据。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 2, "blog_name": "Scope 高级搜索", "blog_name_english": "Advanced Searching in Scope", "blog_summary": "学习如何使用 Scope 进行高级搜索和优化查询性能的技巧。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_3" } }
{ "id": 3, "blog_name": "全文检索技术比较", "blog_name_english": "Comparison of Full-Text Search Technologies", "blog_summary": "比较不同全文检索技术之间的性能和功能,了解它们在搜索算法和评估方面的优缺点。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 4, "blog_name": "Scope 数据聚合", "blog_name_english": "Scope Data Aggregation", "blog_summary": "使用 Scope 进行数据聚合和分析,了解如何从大量数据中提取有价值的信息。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 5, "blog_name": "分布式数据库架构", "blog_name_english": "Introduction to NoSQL Databases", "blog_summary": "探索分布式数据库架构的原理和设计,了解如何在分布式环境中实现数据一致性和高可用性。"}
';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name_english": "search"
}
}
}';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name": "搜索"
}
}
}';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_summary": "搜索"
}
}
}';
更多使用教程可以参考 Scope使用手册 ,欢迎体验开发版Scope。
友情链接:
近日,社区版家族正式发布V2024.5版本,其中,社区开发版系列重磅发布Scope开发版以及StellarDB开发版。
为了可以让大家更进一步了解产品,本系列文章从背景概念开始介绍,深入浅出的为读者介绍Scope的优势以及能力,在上一篇文章中为读者介绍了基础知识、Scope的技术优势以及能力,本篇文章将继续为读者介绍如何安装部署以及使用。
安装教程
友情提示:安装前请仔细查看安装手册注意事项章节,下方内容仅供参考
步骤一 将从官网下载下来的产品包解压后上传至安装环境
步骤二 执行下述命令进行解压,解压后将出现一个镜像tar包
tar -zxf TDH-Scope-Standalone-Community-Transwarp-2024.5-X86_64-final.tar.gz
步骤三 执行下述命令加载镜像
docker load -i scope-2024.5.tar
步骤四 执行下方指令启动容器并运行镜像,运行格式为:
docker run -d --network host -v <本地目录路径>:/opt/transwarp --privileged <镜像名>
-v参数配置了TDH挂载的本地磁盘路径。该路径下会保存产品运行过程中产生的配置conf、数据data、日志log。再次提醒请不要随意改动做好备份,以及确保该路径下没有历史版本的数据文件。
操作示例图
步骤五 容器启动后需等待30s至2分钟
步骤六 浏览器访问管理节点8180端口
打开客户端浏览器(推荐使用Google Chrome浏览器),访问http://host:8180,比如http://172.16.3.108:8180/。访问这个地址,您会看到下面的登录页面。
初次登录以admin的身份登录,密码也是admin。
步骤七 按照向导提示进行集群部署与配置即可
安装完成自助申请许可证即可使用,教程请参考:手册
curl -X PUT "localhost:9200/my_index?pretty" -H 'Content-Type: application/json' -d'{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 3
},
"mappings": {
"default_type_": {
"properties": {
"id": { "type": "integer" },
"title": { "type": "text" },
"body": { "type": "text" },
"date": { "type": "date" },
"views": { "type": "integer" },
"tags": { "type": "keyword" }
}
}
}
}
';
curl -X DELETE "localhost:9200/my_index?pretty";
单条插入
curl -X PUT "localhost:9200/my_index/default_type_/1?pretty" -H 'Content-Type: application/json' -d'
{
"id": 1,
"title": "Scope for Beginners",
"body": "Learn how to use Scope to search and analyze your data",
"date": "2022-05-09",
"views": 1000,
"tags": ["Scope", "search"]
}
';
curl -X POST "localhost:9200/my_index/default_type_/?pretty" -H 'Content-Type: application/json' -d'
{
"id": 2,
"title": "Advanced Scope",
"body": "Take your Scope skills to the next level",
"date": "2022-05-10",
"views": 500,
"tags": ["Scope", "advanced"]
}
';
curl -X POST "localhost:9200/my_index/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 3, "title": "Scope Performance Tuning", "body": "Optimize your Scope cluster for better performance", "date": "2022-05-11","views": 750, "tags": ["Scope", "performance"]}
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 4, "title": "Scope Security", "body": "Learn how to secure your Scope cluster", "date": "2022-05-12", "views": 250, "tags": ["Scope", "security"]}
{ "index" : {"_index" : "my_index","_type" : "default_type_", "_id" : "bulk_3" } }
{"id": 5, "title": "Scope Monitoring", "body": "Monitor your Scope cluster with the Elastic Stack", "date": "2022-05-13", "views": 100, "tags": ["Scope", "monitoring"]}
';
curl -X GET "localhost:9200/my_index/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"match": {
"body": "Scope"
}
},
"sort": {
"date": { "order": "desc" }
}
}
';
curl -X GET "localhost:9200/my_index/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"range": {
"date": {
"gte": "2022-05-11"
}
}
},
"sort": {
"views": { "order": "asc" }
}
}
';
curl -X GET "localhost:9200/_cluster/health?pretty";
curl -X GET "localhost:9200/_cat/indices/my_index?v";
查看节点状态
curl -X GET "localhost:9200/_cat/nodes?v";
curl -X GET "localhost:9200/_cat/shards/my_index?v";
drop database if exists DDL_Scope_DB CASCADE;
create database if not exists DDL_Scope_DB;
use DDL_Scope_DB;
drop table if exists sql_demo;
create table sql_demo(
id string,
title string has analyzer 'standard',
author string,
price double,
description string has analyzer 'mmseg'
)stored as scope
with shard number 5 replication 3
tblproperties('scope.key.column'='id');
insert into sql_demo select '1', '百年孤独', '加西亚·马尔克斯', 39.80, '一部代表魔幻现实主义文学巅峰的经典小说。' from system.dual;
insert into sql_demo values('2', '围城', '钱钟书', 29.80, '一部中国现代文学经典,讽刺了旧中国知识分子的冷嘲热讽和无可奈何。');
batchinsert into sql_demo batchvalues(
values('3', '骆驼祥子', '老舍', 22.80, '一部反映旧中国社会底层生活的文学作品,展现了社会底层人民的生活和奋斗。'),
values('4', '茶花女', '小仲马', 18.80, '一部法国浪漫主义文学代表作,描绘了一个上层社会女性的生活和爱情。')
);
select * from sql_demo;
select * from sql_demo order by price;
select * from sql_demo where contains(description,'中国');
curl -ushiva:shiva -X GET "localhost:9200/ddl_scope_db.sql_demo/_mapping?pretty";
curl -ushiva:shiva -X GET "local:9200/ddl_scope_db.sql_demo/_settings/?pretty&filter_path=**.number_of_shards,**.number_of_replicas";
curl -ushiva:shiva -X GET "local:9200/ddl_scope_db.sql_demo/_search?pretty";
CreateIndexRequest request = new CreateIndexRequest("create_index_demo");
request.settings(Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 1)
);
CreateIndexRequest indexRequest = request.mapping(
" {\n" +
" \"" + "default_type_" + "\": {\n" +
" \"properties\": {\n" +
" \"c_text\": {\n" +
" \"type\": \"text\"\n" +
" },\n" +
" \"c_string_mf\": {\n" +
" \"type\": \"keyword\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }",
XContentType.JSON);
CreateIndexResponse createIndexResponse = highLevelClient.indices().create(indexRequest,RequestOptions.DEFAULT);
DeleteIndexRequest request = new DeleteIndexRequest("create_index_demo");
AcknowledgedResponse deleteIndexResponse = highLevelClient.indices().delete(request,RequestOptions.DEFAULT);
IndexRequest request = new IndexRequest(
"my_index",
"default_type_",
"6");
String jsonString = "{\n" +
" \"id\": 6,\n" +
" \"title\": \"Scope for Beginners\",\n" +
" \"body\": \"Learn how to use Scope to search and analyze your data\",\n" +
" \"date\": \"2022-05-09\",\n" +
" \"views\": 1000,\n" +
" \"tags\": [\"Scope\", \"search\"]\n" +
"}";
IndexRequest source = request.source(jsonString, XContentType.JSON);
IndexResponse index = highLevelClient.index(source, RequestOptions.DEFAULT);
String jsonString1 = "{\"id\": 3, \"title\": \"Scope Performance Tuning\", \"body\": \"Optimize your Scope cluster for better performance\", \"date\": \"2022-05-11\",\"views\": 750, \"tags\": [\"Scope\", \"performance\"]}";
String jsonString2 = "{\"id\": 4, \"title\": \"Scope Security\", \"body\": \"Learn how to secure your Scope cluster\", \"date\": \"2022-05-12\", \"views\": 250, \"tags\": [\"Scope\", \"security\"]}";
String jsonString3 = "{\"id\": 5, \"title\": \"Scope Monitoring\", \"body\": \"Monitor your Scope cluster with the Elastic Stack\", \"date\": \"2022-05-13\", \"views\": 100, \"tags\": [\"Scope\", \"monitoring\"]}";
BulkRequest request = new BulkRequest();
request.add(new IndexRequest("my_index", "default_type_", "3").source(jsonString1,XContentType.JSON));
request.add(new IndexRequest("my_index", "default_type_", "4").source(jsonString2,XContentType.JSON));
request.add(new IndexRequest("my_index", "default_type_", "5").source(jsonString3,XContentType.JSON));
BulkResponse bulkResponses = highLevelClient.bulk(request,RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.termQuery("body", "how"));
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("body","Monitor Optimize"));
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchRequest searchRequest = new SearchRequest().indices("my_index").types("default_type_");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
SearchSourceBuilder mustQuery = searchSourceBuilder.query(QueryBuilders.boolQuery());
mustQuery.query(QueryBuilders.termQuery("body","cluster"));
mustQuery.query(QueryBuilders.termQuery("date","2022-05-13"));
searchRequest.source(mustQuery);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
在使用搜索引擎的过程中,通常会涉及诸多属于,如segment/doc/term/token/shard/index等等,其中,segment/doc/term/token都是lucene中的概念。理解这些术语有助于更深入的了解和使用搜索引擎。
lucene内部使用的是倒排索引的数据结构,将词项(term)映射到文档(document)。例如下图的3个document,进行分词后可以搜索引擎可以很快速的返回的下方问题的答案
那么是如何实现这一能力的?
将文档切分成一系列有意义的单词(term/token)的过程称之为分词,其中,分词器则负责这一过程,以建立索引进行高效的搜索和分析。
选择一个合适的分词器可以很大程度上提高检索效率,当前比较常见的分词器有以下几种:
不同的分词器会产生不同的分词结果,产生不同的索引,所以相同的查询条件会产生不同的结果。
举例说明生活中全文检索的应用实例:字典。字典的拼音表和部首检字表就相当于字典的索引,对每一个字的解释是非结构化的,如果字典没有音节表和部首检字表,在茫茫辞海中找一个字只能顺序扫描。然而字的某些信息可以提取出来进行结构化处理,比如读音,就比较结构化,分声母和韵母,分别只有几种可以一一列举,于是将读音拿出来按一定的顺序排列,每一项读音都指向此字的详细解释的页数。我们搜索时按结构化的拼音搜到读音,然后按其指向的页数,便可找到我们的非结构化数据——也即对字的解释。
全文检索就是把文本中的内容拆分成若干个关键词,然后根据关键词创建索引。查询时,根据关键词查询索引,最终找到包含关键词的文章。整个过程类似于查字典的过程。
curl -X DELETE "localhost:19200/blog_index1?pretty";
curl -X PUT "localhost:19200/blog_index1?pretty" -H 'Content-Type: application/json' -d'{
"mappings": {
"default_type_": {
"properties": {
"id": {
"type": "integer"
},
"blog_name": {
"type": "text",
"analyzer": "standard"
},
"blog_name_english": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
';
curl -X POST "localhost:19200/blog_index1/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 1, "blog_name": "Scope 介绍", "blog_name_english": "Introduction to Scope"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 2, "blog_name": "Scope 高级搜索", "blog_name_english": "Advanced Searching in Scope"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_3" } }
{ "id": 3, "blog_name": "全文检索技术比较", "blog_name_english": "Comparison of Full-Text Search Technologies"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 4, "blog_name": "Scope 数据聚合", "blog_name_english": "Scope Data Aggregation"}
{ "index" : {"_index" : "blog_index1","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 5, "blog_name": "分布式数据库架构", "blog_name_english": "Introduction to NoSQL Databases"}
';
curl -XGET "localhost:19200/blog_index1/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name_english": "search"
}
}
}';
curl -XGET "localhost:19200/blog_index1/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name": "搜索"
}
}
}';
curl -X DELETE "localhost:9200/blog_index2?pretty";
curl -X PUT "localhost:9200/blog_index2?pretty" -H 'Content-Type: application/json' -d'{
"mappings": {
"default_type_": {
"properties": {
"id": {
"type": "integer"
},
"blog_name": {
"type": "text",
"analyzer": "ik_max_word"
},
"blog_name_english": {
"type": "text",
"analyzer": "standard"
},
"blog_summary": {
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
}
';
curl -X POST "localhost:9200/blog_index2/_bulk?pretty" -H 'Content-Type: application/x-ndjson' --data-binary '
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_1" } }
{ "id": 1, "blog_name": "Scope 介绍", "blog_name_english": "Introduction to Scope", "blog_summary": "Scope 是一个分布式搜索引擎,用于快速和可扩展地搜索和分析大量数据。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_2" } }
{ "id": 2, "blog_name": "Scope 高级搜索", "blog_name_english": "Advanced Searching in Scope", "blog_summary": "学习如何使用 Scope 进行高级搜索和优化查询性能的技巧。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_3" } }
{ "id": 3, "blog_name": "全文检索技术比较", "blog_name_english": "Comparison of Full-Text Search Technologies", "blog_summary": "比较不同全文检索技术之间的性能和功能,了解它们在搜索算法和评估方面的优缺点。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 4, "blog_name": "Scope 数据聚合", "blog_name_english": "Scope Data Aggregation", "blog_summary": "使用 Scope 进行数据聚合和分析,了解如何从大量数据中提取有价值的信息。"}
{ "index" : {"_index" : "blog_index2","_type" : "default_type_", "_id" : "bulk_4" } }
{ "id": 5, "blog_name": "分布式数据库架构", "blog_name_english": "Introduction to NoSQL Databases", "blog_summary": "探索分布式数据库架构的原理和设计,了解如何在分布式环境中实现数据一致性和高可用性。"}
';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name_english": "search"
}
}
}';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_name": "搜索"
}
}
}';
curl -XGET "localhost:9200/blog_index2/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"term": {
"blog_summary": "搜索"
}
}
}';
更多使用教程可以参考 Scope使用手册 ,欢迎体验开发版Scope。