mongodb-erlang icon indicating copy to clipboard operation
mongodb-erlang copied to clipboard

Performance issue while reading big collections

Open AlfUA opened this issue 5 years ago • 2 comments

Hello! Looks like I've faced a performance issue while trying to get a big amount of data from the collection. Please look through the code which reproduces the issue. The same query with the same indexes performed from C # (using the proper driver) returns a dataset during less than 30 seconds. A binary data which is being inserted into the document is just a big string converted to binary with list_to_binary function. The average size of the document is 54kb. `make_test() -> {ok, _ } = application:ensure_all_started(mongodb), Database = <<"test_db">>, Collection = <<"test_collection">>, {ok, Connection} = mc_worker_api:connect ([{database, Database}]), {InsertTime, {ok, FirstTimeStamp, LastTimeStamp}} = timer:tc(?MODULE, create_and_insert, [Collection, Connection]), io:format("InsertTime: ~p~n", [InsertTime/1000000]), {FindTime, _} = timer:tc(?MODULE, find_data, [Collection, Connection, FirstTimeStamp, LastTimeStamp]), io:format("FindTime: ~p~n", [FindTime/1000000]).

create_and_insert(Collection, Connection) -> ok = mc_worker_api:ensure_index(Connection, Collection, #{<<"key">> => #{<<"projectId">> => 1, <<"timestamp">> => 1}}), create_and_insert(Collection, Connection, binary_data(), 15000, 0, 0, first).

find_data(Collection, Connection, FirstTimeStamp, LastTimeStamp) -> Selector = {'$and', [{<<"projectId">>, 123}, {<<"timestamp">>, {'$gte', FirstTimeStamp}}, {<<"timestamp">>, {'$lte', LastTimeStamp}}]}, {ok, Cursor} = mc_worker_api:find(Connection, Collection, Selector), io:format("Cursor: ~p~n", [Cursor]), mc_cursor:rest(Cursor), mc_cursor:close(Cursor).

create_and_insert(_Collection, _Connection, _Data, 0, FirstTimeStamp, LastTimeStamp, other) -> {ok, FirstTimeStamp, LastTimeStamp}; create_and_insert(Collection, Connection, Data, Count, 0, 0, first) -> FirstTimeStamp = erlang:system_time(nanosecond), mc_worker_api:insert(Connection, Collection, [ #{<<"projectId">> => 123, <<"timestamp">> => FirstTimeStamp, <<"data">> => {bin, bin, Data} }]), create_and_insert(Collection, Connection, Data, Count - 1, FirstTimeStamp, 0, other); create_and_insert(Collection, Connection, Data, Count, FirstTimeStamp, _LastTimeStamp, other) -> NewLastTimeStamp = erlang:system_time(nanosecond), mc_worker_api:insert(Connection, Collection, [ #{<<"projectId">> => 123, <<"timestamp">> => NewLastTimeStamp, <<"data">> => {bin, bin, Data} }]), create_and_insert(Collection, Connection, Data, Count - 1, FirstTimeStamp, NewLastTimeStamp, other).

binary_data() -> list_to_binary(mongo_test_data:get_fake_data()).`

The result is the following:

1> mongotest:make_test(). InsertTime: 6.418741 FindTime: 2460.389425

Please let me know if you need more details.

AlfUA avatar Sep 02 '18 14:09 AlfUA

Hi, Can you please check erlang get code against data written by C# and vice versa?

comtihon avatar Sep 09 '18 09:09 comtihon

Hello, just right now I haven't got an opportunity to check it. But early we already tried to write data from Erlang and read it to C#, so Mongo worked as a mediator between two components and it works well.

AlfUA avatar Sep 09 '18 09:09 AlfUA