arangodb-docker icon indicating copy to clipboard operation
arangodb-docker copied to clipboard

Several calls to createDocuments() crashes Arango docker instance

Open naulacambra opened this issue 5 years ago • 1 comments
trafficstars

I'm trying to upload a bunch of json data to an arango server using Arango GoDriver

Here is my code

package main

import (
	"context"
	"encoding/csv"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"regexp"
	"strconv"
	"time"

	driver "github.com/arangodb/go-driver"
	"github.com/arangodb/go-driver/http"
)

type ChannelInfoJson struct {
	Channel int           `json:"Channel"`
	Values  []interface{} `json:"Values"`
	From    time.Time     `json:"From"`
	To      time.Time     `json:"To"`
}

func getClient() driver.Client {
	conn, _ := http.NewConnection(http.ConnectionConfig{
		Endpoints: []string{"http://[url]:8529/"},
	})

	client, _ := driver.NewClient(driver.ClientConfig{
		Connection:     conn,
		Authentication: driver.BasicAuthentication("root", "root"),
	})

	return client
}

func getDb(ctx context.Context, dbName string) driver.Database {
	client := getClient()
	db, _ := client.Database(ctx, dbName)

	return db
}

func main() {
	ctx := context.Background()
	db := getDb(ctx, "dbName")

	

	var col driver.Collection = nil
	exists, _ := db.CollectionExists(ctx, colName)

	if !exists {
              // Create collection options
	      options := &driver.CreateCollectionOptions{
		      WaitForSync: true,
	       }
		col, _ = db.CreateCollection(
			ctx,
			colName,
			options,
		)
	} else {
		col, _ = db.Collection(ctx, colName)
	}

	files, _ := ioutil.ReadDir("D:\\Documents\\json")
	docs := []ChannelInfoJson{}
	batchSize := 10

	for fileIndex, f := range files {

		// loading file
		json_file, _ := ioutil.ReadFile(fmt.Sprintf("D:\\Documentos\\TFG\\json\\%s", f.Name()))

               // unmarshal data
		var channelInfoArr []interface{}
		_ = json.Unmarshal([]byte(json_file), &channelInfoArr)

		for _, channelInfo := range channelInfoArr {
			fields, _ := channelInfo.(map[string]interface{})

			from, _ := time.Parse("02-Jan-2006 15:04:05", fields["From"].(string))
			to, _ := time.Parse("02-Jan-2006 15:04:05", fields["To"].(string))
			doc := ChannelInfoJson{
				Channel: int(field["Channel"].(float64)),
				Values:  field["Values"].([]interface{}),
				From:    from,
				To:      to,
			}
			docs = append(docs, doc)
		}

		if (fileIndex+1)%batchSize == 0 {
			_, _, err := col.CreateDocuments(ctx, docs)
			if err != nil {
				fmt.Println("Error in batch creation", err)
				os.Exit(-1)
			}

			docs = nil
			docs = []ChannelInfoJson{}
		}
	}
}

I'm trying to upload 2000 json files.

Each one has info of N channels (wifi channels).

This code works on localhost, but when I try to upload it to a docker instance, it loads the first batch, but the second makes the docker crash. I'm unable to get more info from the docker logs

docker logs [containerId]

Neither from the Arango logs.

Anyone could know why this is happening?

Thank you

naulacambra avatar May 19 '20 21:05 naulacambra

Hi, there are several possible reasons and ways to proceed. Docker may restrict resources without these restrictions being visible in /proc. Since ArangoDB 3.6.3 you may specify ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY with a number for this, to work around it.

Second way to get more information about whats going on could be to try the devel / nightly container, it has a crash handler which will print the position its in on SEGFAULTs.

dothebart avatar May 20 '20 14:05 dothebart