google-cloud-go icon indicating copy to clipboard operation
google-cloud-go copied to clipboard

spanner: sessions are not being closed when client is closed

Open egonelbre opened this issue 1 year ago • 2 comments

I was debugging https://github.com/GoogleCloudPlatform/cloud-spanner-emulator/issues/159 problem and found out that using spanner.Client leaves about 100 sessions open per client. This in test environments can lead to a situation where creating 1000 clients ands up creating 80k sessions. This slows down trivial queries with new clients to 1s+.

It seems that client.Close does not close the batch created sessions.

Code

Reproducer
package main

import (
	"context"
	"errors"
	"fmt"
	"os"
	"os/signal"
	"time"

	"cloud.google.com/go/spanner"
	database "cloud.google.com/go/spanner/admin/database/apiv1"
	"cloud.google.com/go/spanner/admin/database/apiv1/databasepb"
	instance "cloud.google.com/go/spanner/admin/instance/apiv1"
	"cloud.google.com/go/spanner/admin/instance/apiv1/instancepb"
	spannerdirect "cloud.google.com/go/spanner/apiv1"
	"cloud.google.com/go/spanner/apiv1/spannerpb"
	"golang.org/x/sync/errgroup"
	"google.golang.org/api/iterator"
	"google.golang.org/api/option"
	"google.golang.org/api/option/internaloption"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
)

func main() {
	err := run()
	if err != nil {
		fmt.Fprint(os.Stderr, err)
		os.Exit(1)
	}
}

func run() error {
	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
	defer cancel()

	const (
		ProjectID, InstanceID, DatabaseID = "proj", "inst", "db"

		Database = "projects/" + ProjectID + "/instances/" + InstanceID + "/databases/" + DatabaseID
	)

	{ // create a new instance
		admin, err := instance.NewInstanceAdminClient(ctx)
		if err != nil {
			return fmt.Errorf("failed to create instance admin: %w", err)
		}
		op, err := admin.CreateInstance(ctx, &instancepb.CreateInstanceRequest{
			Parent:     "projects/" + ProjectID,
			InstanceId: InstanceID,
		})
		if err != nil {
			if spanner.ErrCode(err) == codes.AlreadyExists {
				goto instanceReady
			}
			return fmt.Errorf("create instance failed: %w", err)
		}
		if _, err := op.Wait(ctx); err != nil {
			return fmt.Errorf("failed to wait instance creation: %w", err)
		}

	instanceReady:
		if err := admin.Close(); err != nil {
			return fmt.Errorf("failed to close instance admin: %w", err)
		}
	}

	{ // create a database
		admin, err := database.NewDatabaseAdminClient(ctx)
		if err != nil {
			return fmt.Errorf("failed to create database admin: %w", err)
		}

		ddl, err := admin.CreateDatabase(ctx, &databasepb.CreateDatabaseRequest{
			Parent:          "projects/" + ProjectID + "/instances/" + InstanceID,
			DatabaseDialect: databasepb.DatabaseDialect_GOOGLE_STANDARD_SQL,
			CreateStatement: "CREATE DATABASE " + DatabaseID,
			ExtraStatements: []string{},
		})
		if err != nil {
			if spanner.ErrCode(err) == codes.AlreadyExists {
				goto databaseReady
			}
			return fmt.Errorf("failed to create database: %w", err)
		}

		if _, err := ddl.Wait(ctx); err != nil {
			return fmt.Errorf("failed to wait database creation: %w", err)
		}

	databaseReady:
		if err := admin.Close(); err != nil {
			return fmt.Errorf("failed to close database admin: %w", err)
		}
	}

	const N = 1000

	var group errgroup.Group
	group.SetLimit(128)

	for k := 0; k < N; k++ {
		if ctx.Err() != nil {
			break
		}

		k := k
		group.Go(func() error {
			client, err := spanner.NewClient(ctx, Database)
			if err != nil {
				return fmt.Errorf("failed to create client: %w", err)
			}

			start := time.Now()
			err = client.Single().Query(ctx, spanner.Statement{SQL: `SELECT 1`}).Do(
				func(row *spanner.Row) error {
					var v int64
					return row.Columns(&v)
				})
			if err != nil {
				return fmt.Errorf("failed to query: %w", err)
			}
			finish := time.Now()

			client.Close()

			if k%100 == 0 { // just a few random samples
				fmt.Printf("%v%%  last:%v\n", k*100/N, finish.Sub(start))
			}

			return nil
		})
	}

	if err := group.Wait(); err != nil {
		return err
	}

	count, err := fetchSessionCount(ctx, Database)
	if err != nil {
		return fmt.Errorf("failed to fetch count: %w", err)
	}

	fmt.Printf("SESSIONS %v\n", count)

	return nil
}

func fetchSessionCount(ctx context.Context, database string) (int64, error) {
	client, err := spannerdirect.NewClient(ctx,
		option.WithEndpoint(os.Getenv("SPANNER_EMULATOR_HOST")),
		option.WithGRPCDialOption(grpc.WithInsecure()),
		option.WithoutAuthentication(),
		option.WithGRPCDialOption(grpc.WithBlock()),
		internaloption.SkipDialSettingsValidation(),
	)
	if err != nil {
		return 0, fmt.Errorf("failed to create monitoring client: %w", err)
	}
	defer func() {
		err = errors.Join(err, client.Close())
	}()

	count, err := countSessions(ctx, client, database)
	if err != nil {
		return 0, fmt.Errorf("count sessions failed: %w", err)
	}

	return count, nil
}

func countSessions(ctx context.Context, client *spannerdirect.Client, database string) (int64, error) {
	var count int64
	req := &spannerpb.ListSessionsRequest{
		Database:  database,
		PageSize:  1e6,
		PageToken: "", // TODO: handle pagination
	}

	it := client.ListSessions(ctx, req)
	for {
		resp, err := it.Next()
		if err == iterator.Done {
			return count, nil
		}
		if err != nil {
			return count, fmt.Errorf("failed to iterate: %w", err)
		}

		if false {
			fmt.Println(resp.Name)
		}

		count++
	}
}

Expected behavior

After client.Close returns, there are no sessions left open. This assumes normal operations, of course.

Actual behavior

100 new sessions open after client.Close().

Screenshots

Plot of time taken for SELECT 1 over time, where each query creates a new client.

image

egonelbre avatar Jun 13 '24 10:06 egonelbre

@egonelbre It was decided before that client libraries won't do the session cleanup or(DeleteSession RPC calls), so if a session is idle for an hour in backend it should be done automatically.

https://github.com/googleapis/google-cloud-go/pull/6679 cc: @olavloite

rahul2393 avatar Jul 01 '24 05:07 rahul2393

With multiplexed sessions this wouldn't help anymore, because they won't be deleted in spanner emulator https://github.com/GoogleCloudPlatform/cloud-spanner-emulator/blob/master/frontend/collections/session_manager.cc#L117

egonelbre avatar Nov 25 '25 20:11 egonelbre