Vm instance
PGSync version: 2.21
Postgres version: 13.1
Elasticsearch version: 7.1 Python version: 3.3
Problem Description: We have small instance, e2-micro, 1 shared core 1 GB memory. Initial steps are ok, but when we start with daemon instance is killed. Which is optimal instance performance for 2-3k items ? Thanks.
Schema: [ { "database": "db", "index": "index", "plugins": [ "Test" ], "setting": { "analysis": { "filter": { "edge_ngram_filter": { "type": "edge_ngram", "min_gram": 1, "max_gram": 5 } }, "tokenizer": { "2gram_tokenizer": { "type": "ngram", "min_gram": 2, "max_gram": 2, "token_chars": [ "letter", "digit" ] }, "3gram_tokenizer": { "type": "ngram", "min_gram": 3, "max_gram": 3, "token_chars": [ "letter", "digit" ] } }, "normalizer": { "case_insensitive": { "filter": "lowercase" } }, "analyzer": { "default": { "tokenizer": "classic", "filter": [ "classic", "lowercase", "decimal_digit" ], "char_filter": "html_strip" }, "2gram_analyzer": { "tokenizer": "2gram_tokenizer", "filter": [ "classic", "lowercase", "decimal_digit" ], "char_filter": "html_strip" }, "3gram_analyzer": { "tokenizer": "3gram_tokenizer", "filter": [ "classic", "lowercase", "decimal_digit" ], "char_filter": "html_strip" }, "edge_ngram_analyzer": { "tokenizer": "classic", "filter": [ "classic", "lowercase", "decimal_digit", "edge_ngram_filter" ], "char_filter": "html_strip" } } } }, "mapping": { "visit": { "properties": { "id": { "type": "integer" }, "createdAt": { "type": "date" }, "status": { "type": "keyword" }, "planDate": { "type": "date" }, "planned": { "type": "keyword" }, "typeOfResource": { "type": "keyword" } } }, "mission": { "properties": { "id": { "type": "integer" }, "startDate": { "type": "date" }, "endDate": { "type": "date" }, "duration": { "type": "integer" }, "type": { "type": "keyword" } } }, "client": { "properties": { "id": { "type": "integer" }, "name": { "type": "keyword" } } }, "resourcePerson": { "properties": { "id": { "type": "integer" }, "fullName": { "type": "keyword" }, "gender": { "type": "keyword" }, "age": { "type": "keyword" } } }, "service": { "properties": { "id": { "type": "integer" }, "isActive": { "type": "keyword" }, "isExternal": { "type": "boolean" }, "isOnDemand": { "type": "boolean" }, "paymentCurrency": { "type": "keyword" }, "name": { "type": "keyword" } } }, "address": { "properties": { "id": { "type": "integer" }, "city": { "type": "keyword" }, "state": { "type": "keyword" }, "country": { "type": "keyword" }, "streetName": { "type": "keyword" }, "zipcode": { "type": "keyword" } } }, "poa": { "properties": { "id": { "type": "integer" }, "type": { "type": "keyword" }, "name": { "type": "keyword" } } }, "resourceType": { "properties": { "name": { "type": "keyword" }
}
},
"poaLabels": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"label": {
"type": "keyword"
}
}
}
},
"nodes": {
"table": "visit",
"schema": "public",
"columns": [
"id",
"createdAt",
"planDate",
"resourceId",
"status",
"deletedAt"
],
"children": [
{
"table": "mission",
"columns": [
"id",
"startDate",
"endDate",
"type",
"serviceId",
"deletedAt"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "service",
"columns": [
"id",
"createdAt",
"name"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "client",
"columns": [
"id",
"createdAt",
"name"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
}
},
{
"table": "process",
"columns": [
"id",
"deletedAt",
"name"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
}
}
]
},
{
"table": "poa",
"columns": [
"id",
"createdAt",
"type"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "store",
"columns": [
"id",
"createdAt",
"name"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "store_address",
"columns": [
"city",
"state",
"country",
"zipcode",
"streetName"
],
"relationship": {
"variant": "object",
"type": "one_to_many"
}
}
]
},
{
"table": "poa_poa_labels_poa_label",
"columns": [
"poaLabelId"
],
"relationship": {
"variant": "object",
"type": "one_to_many"
},
"children": [
{
"table": "poa_label",
"columns": [
"id",
"label"
],
"relationship": {
"variant":"object",
"type": "one_to_one"
}
}
]
}
]
}
]
},
{
"table": "resource",
"columns": [
"id"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "user",
"columns": [
"id"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
},
"children": [
{
"table": "user_person",
"columns": [
"name",
"surname",
"gender"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
}
}
]
},
{
"table": "resource_type",
"columns": [
"name"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
}
}
]
},
{
"table": "feedback",
"columns": [
"id",
"deletedAt"
],
"relationship": {
"variant": "object",
"type": "one_to_one"
}
}
]
}
}
]
Env:
PG_USER=XXXX
PG_PORT=XXXX
PG_HOST=XXXX
PG_PASSWORD=XXXX
ELASTICSEARCH_PORT=XXXX
ELASTICSEARCH_USER=elastic
ELASTICSEARCH_PASSWORD=XXXX
ELASTICSEARCH_SCHEME=https
ELASTICSEARCH_HOST=XXXX
REDIS_HOST=XXXX
REDIS_PORT=XXXX
REDIS_DB=2
ELASTICSEARCH_CHUNK_SIZE=50
ELASTICSEARCH_MAX_CHUNK_BYTES=1024000
ELASTICSEARCH_MAX_RETRIES=14
ELASTICSEARCH_QUEUE_SIZE=1
ELASTICSEARCH_THREAD_COUNT=1
ELASTICSEARCH_TIMEOUT=320
ELASTICSEARCH_STREAMING_BULK=True
ELASTICSEARCH_RAISE_ON_ERROR=False
ELASTICSEARCH_RAISE_ON_EXCEPTION=False
QUERY_CHUNK_SIZE=50
REDIS_READ_CHUNK_SIZE=50
REDIS_WRITE_CHUNK_SIZE=50
CONSOLE_LOGGING_HANDLER_MIN_LEVEL=DEBUG
I believe you are running into the OOM killer. The resource requirements totally depends on your data and structure. Do you have all services running under the VM i.e Postgres, Elasticsearch. Nevertheless, I think 1GB is a bit low. I would suggest doubling it at least
I understand, but we have different problem our cpu is over 100 %. Also i checked our PostgreSQL db , CPU is on 100 %. DB performance: vCPUs 1 Memory 3.75 GB SSD storage 15 GB. Also we updated our memory.
I can't copy whole query "SELECT count(*) AS count_1 FROM public.visit AS visit_1 LEFT OUTER JOIN LATERAL (SELECT CAST(JSON_BUILD_OBJECT('mission', CAST(JSON_BUILD_OBJECT('id', JSON_BUILD_ARRAY(mission_1.id)) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_2._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_3._keys) AS JSONB) AS _keys, CAST(JSON_BUILD_OBJECT('id', mission_1.id, 'startDate', mission_1."startDate", 'endDate', mission_1."endDate", 'type', mission_1.type, 'serviceId', mission_1."serviceId", 'deletedAt', mission_1."deletedAt", 'service', anon_2.service, 'poa', anon_3.poa) AS JSONB) AS mission, mission_1.id AS id FROM public.mission AS mission_1 LEFT OUTER JOIN LATERAL (SELECT CAST(JSON_BUILD_OBJECT('service', CAST(JSON_BUILD_OBJECT('id', service_1.id) AS JSONB)) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_4._keys) AS JSONB) || CAST(JSON_BUILD_ARRAY(anon_5._keys) AS JSONB) AS keys, CAST(JSON_BUILD_OBJECT('id', service_1.id, 'createdAt', service_1."createdAt", 'name', service_1.name, 'client', anon_4.client, 'process', anon"

The initial sync is always going to require resources proportional to your data size and structure.
This is a one off.
I would suggest allocating as much resources needed to get the initial data synced to ES and then you can downsize afterwards.
Since PGSync is always going to be event driven and performing a delta sync.