为 Lucene.net 3.0.3 创建一个简单的高亮格式化程序
Create a Simple highlight formatter for Lucene.net 3.0.3
我正在使用 Lucene.Net 3.0.3,我有可用的代码,但我不知道如何添加一个简单的格式化程序。
Public Function Lucene_Index_Search_Complete(term As String) As String
Dim sb As New StringBuilder()
Dim sw As New StringWriter(sb)
Dim writer As JsonWriter = New JsonTextWriter(sw)
Try
Dim d As Lucene.Net.Store.Directory = FSDirectory.Open(New DirectoryInfo(Server.MapPath("~") + "\IndexedFiles_V33\"))
Dim indexReader As IndexReader = indexReader.Open(d, True)
Dim indexSearch As Searcher = New IndexSearcher(indexReader)
Dim a As Analyzer = New StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)
'IFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
Dim HighlightFormatter As SimpleHTMLFormatter = New SimpleHTMLFormatter("<span style='background:yellow;'>", "</span>")
Dim FieldNames As String() = indexReader.GetFieldNames(indexReader.FieldOption.INDEXED_NO_TERMVECTOR).toArray
Dim parser As MultiFieldQueryParser = New MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, FieldNames, a)
Dim q As BooleanQuery = New BooleanQuery()
term = term.Trim
Dim phases As String() = Nothing
If term.contains(",") Then
phases = Split(term, ",")
For Each phase As String In phases
q.Add(parser.Parse(phase), Occur.SHOULD)
Next
Else
phases = Split(term, " ")
For Each phase As String In phases
q.Add(parser.Parse(phase), Occur.MUST)
Next
End If
Dim collector As TopScoreDocCollector = TopScoreDocCollector.create(1000, True)
indexSearch.Search(q, collector)
Dim hits As ScoreDoc() = collector.topDocs().scoreDocs
Dim GrantID As String = ""
Dim Title As String = ""
Dim Posted_Date As String = ""
Dim link As String = ""
Dim jsonData As JObject = Nothing
Dim purpose As String = ""
Dim Short_Description As String = ""
' Loop through the matching hits, retrieving the document
writer.WriteStartArray()
For i As Integer = 0 To hits.Length - 1
Try
If i >= 50 Then
Exit For
End If
Dim docId As Integer = hits(i).doc
Dim doc As Document = indexSearch.doc(docId)
Dim Score As String = hits(i).Score.tostring
GrantID = doc.get("GrantID")
link = doc.get("link")
jsonData = JObject.Parse(doc.get("JSON_Data"))
Title = jsonData("Funding Opportunity Title").ToString
Posted_Date = jsonData("Posted Date").ToString
purpose = jsonData("Funding Opportunity Purpose").ToString
writer.WriteStartObject()
writer.WritePropertyName("id")
writer.WriteValue((1 + i).ToString)
writer.WritePropertyName("Grant_ID")
writer.WriteValue(GrantID)
writer.WritePropertyName("text")
writer.WriteValue("<br/>" + Title + "<br/>" + purpose + "<br/><b>Score:</b> " + Score + "<br/>")
writer.WritePropertyName("Score")
writer.WriteValue(Score)
writer.WriteEndObject()
Catch ex As Exception
WriteErrorLog(ex.Message, link)
End Try
Next
writer.Close()
Return sb.ToString
Catch ex As Exception
Return ex.Message
End Try
End Function
我只想用黄色背景包裹术语。感谢您提供任何帮助,因为我已经搜索了答案,但我认为我一定遗漏了一些东西。我需要突出显示 Funding Opportunity Purpose 和 Funding Opportunity Title
的 JSON 字段中的搜索词
搞定了
Public Function GeneratePreviewSimpleText(q As Query, text As String, fieldName As String) As String
Try
Dim scorer As QueryScorer = New QueryScorer(q)
Dim formatter As IFormatter = New SimpleHTMLFormatter("<span style='background:yellow;'>", "</span>")
Dim fragmenter As SimpleFragmenter = New SimpleFragmenter(50)
Dim highlighter As Highlighter = New Highlighter(formatter, scorer)
highlighter.TextFragmenter = fragmenter
Dim stream As TokenStream = New StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30).TokenStream(fieldName, New StringReader(text))
Return highlighter.GetBestFragments(stream, text, 4, "<br/>")
Catch ex As Exception
End Try
End Function
我正在使用 Lucene.Net 3.0.3,我有可用的代码,但我不知道如何添加一个简单的格式化程序。
Public Function Lucene_Index_Search_Complete(term As String) As String
Dim sb As New StringBuilder()
Dim sw As New StringWriter(sb)
Dim writer As JsonWriter = New JsonTextWriter(sw)
Try
Dim d As Lucene.Net.Store.Directory = FSDirectory.Open(New DirectoryInfo(Server.MapPath("~") + "\IndexedFiles_V33\"))
Dim indexReader As IndexReader = indexReader.Open(d, True)
Dim indexSearch As Searcher = New IndexSearcher(indexReader)
Dim a As Analyzer = New StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30)
'IFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
Dim HighlightFormatter As SimpleHTMLFormatter = New SimpleHTMLFormatter("<span style='background:yellow;'>", "</span>")
Dim FieldNames As String() = indexReader.GetFieldNames(indexReader.FieldOption.INDEXED_NO_TERMVECTOR).toArray
Dim parser As MultiFieldQueryParser = New MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, FieldNames, a)
Dim q As BooleanQuery = New BooleanQuery()
term = term.Trim
Dim phases As String() = Nothing
If term.contains(",") Then
phases = Split(term, ",")
For Each phase As String In phases
q.Add(parser.Parse(phase), Occur.SHOULD)
Next
Else
phases = Split(term, " ")
For Each phase As String In phases
q.Add(parser.Parse(phase), Occur.MUST)
Next
End If
Dim collector As TopScoreDocCollector = TopScoreDocCollector.create(1000, True)
indexSearch.Search(q, collector)
Dim hits As ScoreDoc() = collector.topDocs().scoreDocs
Dim GrantID As String = ""
Dim Title As String = ""
Dim Posted_Date As String = ""
Dim link As String = ""
Dim jsonData As JObject = Nothing
Dim purpose As String = ""
Dim Short_Description As String = ""
' Loop through the matching hits, retrieving the document
writer.WriteStartArray()
For i As Integer = 0 To hits.Length - 1
Try
If i >= 50 Then
Exit For
End If
Dim docId As Integer = hits(i).doc
Dim doc As Document = indexSearch.doc(docId)
Dim Score As String = hits(i).Score.tostring
GrantID = doc.get("GrantID")
link = doc.get("link")
jsonData = JObject.Parse(doc.get("JSON_Data"))
Title = jsonData("Funding Opportunity Title").ToString
Posted_Date = jsonData("Posted Date").ToString
purpose = jsonData("Funding Opportunity Purpose").ToString
writer.WriteStartObject()
writer.WritePropertyName("id")
writer.WriteValue((1 + i).ToString)
writer.WritePropertyName("Grant_ID")
writer.WriteValue(GrantID)
writer.WritePropertyName("text")
writer.WriteValue("<br/>" + Title + "<br/>" + purpose + "<br/><b>Score:</b> " + Score + "<br/>")
writer.WritePropertyName("Score")
writer.WriteValue(Score)
writer.WriteEndObject()
Catch ex As Exception
WriteErrorLog(ex.Message, link)
End Try
Next
writer.Close()
Return sb.ToString
Catch ex As Exception
Return ex.Message
End Try
End Function
我只想用黄色背景包裹术语。感谢您提供任何帮助,因为我已经搜索了答案,但我认为我一定遗漏了一些东西。我需要突出显示 Funding Opportunity Purpose 和 Funding Opportunity Title
的 JSON 字段中的搜索词搞定了
Public Function GeneratePreviewSimpleText(q As Query, text As String, fieldName As String) As String
Try
Dim scorer As QueryScorer = New QueryScorer(q)
Dim formatter As IFormatter = New SimpleHTMLFormatter("<span style='background:yellow;'>", "</span>")
Dim fragmenter As SimpleFragmenter = New SimpleFragmenter(50)
Dim highlighter As Highlighter = New Highlighter(formatter, scorer)
highlighter.TextFragmenter = fragmenter
Dim stream As TokenStream = New StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30).TokenStream(fieldName, New StringReader(text))
Return highlighter.GetBestFragments(stream, text, 4, "<br/>")
Catch ex As Exception
End Try
End Function