最近在Survey公司可能將來要採用的全文檢索方案,於網路上查了一下,於是決定使用
Elasticsearch 目前的記錄該版本已至7.x 版了,但因轉換的資料都在6.x 上因此還是以6.x的操作方式進行記錄
本次的轉換已先使用程式將原存於lucenedb中轉至es中了...而轉換的套件為第三方的「Nest」
using Elasticsearch.Net;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Nest;
using NLog;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LuceneToElasticsearch
{
class Program
{
static Logger _Logger = LogManager.GetCurrentClassLogger();
static string indexPath = "";
static void Main(string[] args)
{
doWork();
}
private static void doWork()
{
//luceneIndex export
indexPath = ConfigurationManager.AppSettings["IndexPath"];
DirectoryInfo dirInfo = new DirectoryInfo(indexPath);
FSDirectory dir = FSDirectory.Open(dirInfo);
IndexSearcher searcher = new IndexSearcher(dir, true);
var startindex = Convert.ToInt32(ConfigurationManager.AppSettings["StartRunIndex"]);
//elasticsearch connection
var uris = new[] { new Uri("http://10.1.190.187:9201") };
var connectionPool = new SniffingConnectionPool(uris);
var settings = new ConnectionSettings(connectionPool).DefaultIndex("nfs");
var client = new ElasticClient(settings);
_Logger.Info("ES連線設定完成" + Environment.NewLine);
var ir = IndexReader.Open(dir, true);
_Logger.Info("Lucene連線設定完成" + Environment.NewLine);
var totalnum = ir.MaxDoc;
var currentDocumentNumber = 0;
var successtoes = 0;
try
{
_Logger.Info("Lucene匯入ES進行中...." + Environment.NewLine);
_Logger.Info("Lucene總筆數:" + totalnum.ToString() + "" + Environment.NewLine);
for (var i = startindex; i <= totalnum; i++)
{
currentDocumentNumber = i;
var doc = ir.Document(i);
var luceneIndex = new LuceneIndex();
luceneIndex.FId = doc.GetField("FId") == null ? "" : doc.GetField("FId").StringValue;
luceneIndex.Folder = doc.GetField("Folder") == null ? "" : doc.GetField("Folder").StringValue;
luceneIndex.FTitle = doc.GetField("FTitle =") == null ? "" : doc.GetField("FTitle").StringValue;
luceneIndex.FExten = doc.GetField("FExten") == null ? "" : doc.GetField("FExten").StringValue;
luceneIndex.Client = doc.GetField("Client") == null ? "" : doc.GetField("Client").StringValue;
luceneIndex.Matter = doc.GetField("Matter") == null ? "" : doc.GetField("Matter").StringValue;
luceneIndex.Edocid = doc.GetField("Edocid") == null ? "" : doc.GetField("Edocid").StringValue;
luceneIndex.CreateUser = doc.GetField("CreateUser") == null ? "" : doc.GetField("CreateUser").StringValue;
luceneIndex.CreateDate = doc.GetField("CreateDate =") == null ? "" : doc.GetField("CreateDate").StringValue;
luceneIndex.IsAttachment = doc.GetField("IsAttachment") == null ? "" : doc.GetField("IsAttachment").StringValue;
luceneIndex.From = doc.GetField("From") == null ? "" : doc.GetField("From").StringValue;
luceneIndex.Tou = doc.GetField("Tou") == null ? "" : doc.GetField("Tou").StringValue;
luceneIndex.CC = doc.GetField("CC") == null ? "" : doc.GetField("CC").StringValue;
luceneIndex.BCC = doc.GetField("BCC") == null ? "" : doc.GetField("BCC").StringValue;
luceneIndex.Content = doc.GetField("Content") == null ? "" : doc.GetField("Content").StringValue;
//elasticsearch import
var createIndexresponse = client.CreateIndex("nfs", c => c.Mappings(m => m.Map<LuceneIndex>(mm => mm.Properties(p => p.Text(t => t.Name(n => n.Content).Analyzer("standard"))))));
client.IndexDocument(luceneIndex);
successtoes += 1;
Console.WriteLine(totalnum.ToString() + "/" + i.ToString());
}
}
catch (Exception ex)
{
_Logger.Error("全文檢索資料庫移轉錯誤(" + currentDocumentNumber.ToString() + "):" + ex.Message + Environment.NewLine);
}
ir.Dispose();
_Logger.Info("匯入完成總筆數" + totalnum.ToString() + ": 確定加至es的筆數" + successtoes.ToString() + Environment.NewLine);
}
}
}
透過以上程式轉完後進行底下基本的操作
-------------------------查詢-------------------------
GET nfs/_search?q=edocid:099999999
-------------------------新增-------------------------
POST nfs/luceneindex/
{
"fId": "_A7AIH__.__9",
"folder": "收文",
"fTitle": "",
"fExten": "msg",
"client": "L00640",
"matter": "IT406B",
"edocid": "099999999",
"createUser": "BPE",
"createDate": "",
"isAttachment": "N",
"from": "KKO",
"tou": "BruseZhang bpe@leeandli.com [Bruse]",
"cC": "",
"bCC": "",
"content": "test123",
"PropId": 123
}
-------------------------修改-------------------------
POST nfs/luceneindex/YAlqeGYB2Ejteqc8ztNd/_update
{
"doc" : {
"createUser": "BPE"
}
}
說明:其中「YAlqeGYB2Ejteqc8ztNd」為該文檔的ID,如果新增時未指定則會給予一串亂數當ID
此動作僅更換該ID中的createUser為BPE
-------------------------刪除-------------------------
DELETE nfs/luceneindex/KKkvqm0BQjPxKwc5AHdu
成功後則回傳結果如下
{
"_index": "nfs",
"_type": "luceneindex",
"_id": "KKkvqm0BQjPxKwc5AHdu",
"_version": 2,
"result": "deleted",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 1410654,
"_primary_term": 5
}