lucene-go icon indicating copy to clipboard operation
lucene-go copied to clipboard

[QUESTION] - How to make search work?

Open sujit-baniya opened this issue 10 months ago • 1 comments

I've a JSON file and trying to index the file. Indexing seems to work but search doesn't result as data

The code I've tried

package main

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"slices"
	"time"
	
	"github.com/geange/lucene-go/codecs/simpletext"
	"github.com/geange/lucene-go/core/document"
	"github.com/geange/lucene-go/core/index"
	"github.com/geange/lucene-go/core/search"
	"github.com/geange/lucene-go/core/store"
)

var path = "icd"

func main() {
	err := os.RemoveAll(path)
	if err != nil {
		panic(err)
	}
	data := readFileAsMap("cpt_codes.json")
	dir, err := store.NewNIOFSDirectory("icd")
	if err != nil {
		panic(err)
	}
	
	codec := simpletext.NewCodec()
	similarity := search.NewCastBM25Similarity()
	
	config := index.NewWriterConfig(codec, similarity)
	ctx := context.Background()
	writer, err := index.NewWriter(ctx, dir, config)
	if err != nil {
		panic(err)
	}
	defer func() {
		err := writer.Commit(ctx)
		if err != nil {
			fmt.Println("Here")
			fmt.Println(err)
		}
	}()
	start := time.Now()
	for _, d := range data {
		doc := document.NewDocument()
		for k, v := range d {
			if slices.Contains([]string{"charge_type", "client_internal_code", "client_proc_desc", "cpt_hcpcs_code", "work_item_id"}, k) {
				doc.Add(document.NewField(k, v, document.STORED_ONLY))
			}
		}
		_, err := writer.AddDocument(ctx, doc)
		if err != nil {
			panic(err)
		}
	}
	fmt.Println("Indexing took", time.Since(start))
	searchQuery()
}

func searchQuery() {
	dir, err := store.NewNIOFSDirectory(path)
	if err != nil {
		panic(err)
	}
	
	codec := simpletext.NewCodec()
	similarity := search.NewCastBM25Similarity()
	
	config := index.NewWriterConfig(codec, similarity)
	
	writer, err := index.NewWriter(context.Background(), dir, config)
	if err != nil {
		panic(err)
	}
	if writer == nil {
		panic("Writer is nil")
	}
	reader, err := index.DirectoryReaderOpen(writer)
	if err != nil {
		panic(err)
	}
	
	searcher, err := search.NewIndexSearcher(reader)
	if err != nil {
		panic(err)
	}
	
	query := search.NewTermQuery(index.NewTerm("client_proc_desc", []byte("CANDIDA")))
	
	startTime := time.Now()
	topDocs, err := searcher.SearchTopN(query, 5)
	if err != nil {
		panic(err)
	}
	
	fmt.Println("Searching took", time.Since(startTime), len(topDocs.GetScoreDocs()))
	for i, doc := range topDocs.GetScoreDocs() {
		fmt.Printf("result%d: Doc%d\n", i, doc.GetDoc())
	}
}

func readFileAsMap(file string) (icds []map[string]any) {
	jsonData, err := os.ReadFile(file)
	if err != nil {
		panic("failed to read json file, error: " + err.Error())
		return
	}
	
	if err := json.Unmarshal(jsonData, &icds); err != nil {
		fmt.Printf("failed to unmarshal json file, error: %v", err)
		return
	}
	return
}

The field and the data also exists in the JSON file

sujit-baniya avatar Apr 11 '24 15:04 sujit-baniya

Thank you very much for your interest in this incomplete project. I will try to analyze the code.IndexWriter does not support real-time queries in memory now, I think we could commit first directly rather than using defer before query.

geange avatar Apr 12 '24 00:04 geange

@geange Is there any update on this?

sujit-baniya avatar Dec 02 '24 07:12 sujit-baniya

This branch will fix most query bugs, and the code is still under testing. There are still some compatibility issues between this branch code and the Java version code, mainly focused on the implementation of index write operations.

https://github.com/geange/lucene-go/tree/feature/beginning

geange avatar Dec 18 '24 02:12 geange