lucene-go
lucene-go copied to clipboard
[QUESTION] - How to make search work?
I've a JSON file and trying to index the file. Indexing seems to work but search doesn't result as data
The code I've tried
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"slices"
"time"
"github.com/geange/lucene-go/codecs/simpletext"
"github.com/geange/lucene-go/core/document"
"github.com/geange/lucene-go/core/index"
"github.com/geange/lucene-go/core/search"
"github.com/geange/lucene-go/core/store"
)
var path = "icd"
func main() {
err := os.RemoveAll(path)
if err != nil {
panic(err)
}
data := readFileAsMap("cpt_codes.json")
dir, err := store.NewNIOFSDirectory("icd")
if err != nil {
panic(err)
}
codec := simpletext.NewCodec()
similarity := search.NewCastBM25Similarity()
config := index.NewWriterConfig(codec, similarity)
ctx := context.Background()
writer, err := index.NewWriter(ctx, dir, config)
if err != nil {
panic(err)
}
defer func() {
err := writer.Commit(ctx)
if err != nil {
fmt.Println("Here")
fmt.Println(err)
}
}()
start := time.Now()
for _, d := range data {
doc := document.NewDocument()
for k, v := range d {
if slices.Contains([]string{"charge_type", "client_internal_code", "client_proc_desc", "cpt_hcpcs_code", "work_item_id"}, k) {
doc.Add(document.NewField(k, v, document.STORED_ONLY))
}
}
_, err := writer.AddDocument(ctx, doc)
if err != nil {
panic(err)
}
}
fmt.Println("Indexing took", time.Since(start))
searchQuery()
}
func searchQuery() {
dir, err := store.NewNIOFSDirectory(path)
if err != nil {
panic(err)
}
codec := simpletext.NewCodec()
similarity := search.NewCastBM25Similarity()
config := index.NewWriterConfig(codec, similarity)
writer, err := index.NewWriter(context.Background(), dir, config)
if err != nil {
panic(err)
}
if writer == nil {
panic("Writer is nil")
}
reader, err := index.DirectoryReaderOpen(writer)
if err != nil {
panic(err)
}
searcher, err := search.NewIndexSearcher(reader)
if err != nil {
panic(err)
}
query := search.NewTermQuery(index.NewTerm("client_proc_desc", []byte("CANDIDA")))
startTime := time.Now()
topDocs, err := searcher.SearchTopN(query, 5)
if err != nil {
panic(err)
}
fmt.Println("Searching took", time.Since(startTime), len(topDocs.GetScoreDocs()))
for i, doc := range topDocs.GetScoreDocs() {
fmt.Printf("result%d: Doc%d\n", i, doc.GetDoc())
}
}
func readFileAsMap(file string) (icds []map[string]any) {
jsonData, err := os.ReadFile(file)
if err != nil {
panic("failed to read json file, error: " + err.Error())
return
}
if err := json.Unmarshal(jsonData, &icds); err != nil {
fmt.Printf("failed to unmarshal json file, error: %v", err)
return
}
return
}
The field and the data also exists in the JSON file
Thank you very much for your interest in this incomplete project. I will try to analyze the code.IndexWriter
does not support real-time queries in memory now, I think we could commit first directly rather than using defer
before query.
@geange Is there any update on this?
This branch will fix most query bugs, and the code is still under testing. There are still some compatibility issues between this branch code and the Java version code, mainly focused on the implementation of index write operations.