go-driver icon indicating copy to clipboard operation
go-driver copied to clipboard

Several calls to createDocuments() crashes Arango server

Open naulacambra opened this issue 5 years ago • 1 comments

I'm trying to upload a bunch of json data to an arango server.

Here is my code

package main

import (
	"context"
	"encoding/csv"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"regexp"
	"strconv"
	"time"

	driver "github.com/arangodb/go-driver"
	"github.com/arangodb/go-driver/http"
)

type ChannelInfoJson struct {
	Channel int           `json:"Channel"`
	Values  []interface{} `json:"Values"`
	From    time.Time     `json:"From"`
	To      time.Time     `json:"To"`
}

func getClient() driver.Client {
	conn, _ := http.NewConnection(http.ConnectionConfig{
		Endpoints: []string{"http://[url]:8529/"},
	})

	client, _ := driver.NewClient(driver.ClientConfig{
		Connection:     conn,
		Authentication: driver.BasicAuthentication("root", "root"),
	})

	return client
}

func getDb(ctx context.Context, dbName string) driver.Database {
	client := getClient()
	db, _ := client.Database(ctx, dbName)

	return db
}

func main() {
	ctx := context.Background()
	db := getDb(ctx, "dbName")

	

	var col driver.Collection = nil
	exists, _ := db.CollectionExists(ctx, colName)

	if !exists {
              // Create collection options
	      options := &driver.CreateCollectionOptions{
		      WaitForSync: true,
	       }
		col, _ = db.CreateCollection(
			ctx,
			colName,
			options,
		)
	} else {
		col, _ = db.Collection(ctx, colName)
	}

	files, _ := ioutil.ReadDir("D:\\Documents\\json")
	docs := []ChannelInfoJson{}
	batchSize := 10

	for fileIndex, f := range files {

		// loading file
		json_file, _ := ioutil.ReadFile(fmt.Sprintf("D:\\Documentos\\TFG\\json\\%s", f.Name()))

               // unmarshal data
		var channelInfoArr []interface{}
		_ = json.Unmarshal([]byte(json_file), &channelInfoArr)

		for _, channelInfo := range channelInfoArr {
			fields, _ := channelInfo.(map[string]interface{})

			from, _ := time.Parse("02-Jan-2006 15:04:05", fields["From"].(string))
			to, _ := time.Parse("02-Jan-2006 15:04:05", fields["To"].(string))
			doc := ChannelInfoJson{
				Channel: int(field["Channel"].(float64)),
				Values:  field["Values"].([]interface{}),
				From:    from,
				To:      to,
			}
			docs = append(docs, doc)
		}

		if (fileIndex+1)%batchSize == 0 {
			_, _, err := col.CreateDocuments(ctx, docs)
			if err != nil {
				fmt.Println("Error in batch creation", err)
				os.Exit(-1)
			}

			docs = nil
			docs = []ChannelInfoJson{}
		}
	}
}

I'm trying to upload 2000 json files.

Each one has info of N channels (wifi channels).

This code works on localhost, but I'm trying to upload it to a docker instance, it loads the first batch, but the second makes the docker crash. I'm unable to get more info from the docker logs

docker logs [containerId]

Neither from the Arango logs.

Anyone could know why this is happening?

Thank you

naulacambra avatar May 19 '20 21:05 naulacambra

My guess is that you're running out of memory in the container. I'd try increasing the memory allotted to the container, and check out the memory and environment docs for ArangoDB configuration. You may need to set ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY lower than what the container "should" have if there's a lot going on in a container.

Also, you can skip the docs = nil, and maybe do docs := make([]ChannelinfoJson, 0, batchSize) once and then docs = docs[:0].

irridia avatar Oct 01 '20 05:10 irridia