couchdb icon indicating copy to clipboard operation
couchdb copied to clipboard

Flaky eunit tests on Windows CI worker

Open big-r81 opened this issue 2 years ago • 8 comments

Flaky test and fixtures setups on Windows CI:

[2023-01-20T09:12:17.190Z] Errors
[2023-01-20T09:12:17.190Z] ======
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] fixture setup [27,1,1,3]
[2023-01-20T09:12:17.190Z] ------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_dbs_info_test:create_db/1 (test/eunit/chttpd_dbs_info_test.erl, line 367)
[2023-01-20T09:12:17.190Z] in call from chttpd_dbs_info_test:setup/0 (test/eunit/chttpd_dbs_info_test.erl, line 43)
[2023-01-20T09:12:17.190Z] **error:{badmatch,{error,connection_closed}}
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] chttpd_dbs_info_test:117 skip_limit_test_ (t_dbs_info_when_shards_db_design_doc_exist)
[2023-01-20T09:12:17.190Z] --------------------------------------------------------------------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_dbs_info_test:t_dbs_info_when_shards_db_design_doc_exist/1 (test/eunit/chttpd_dbs_info_test.erl, line 316)
[2023-01-20T09:12:17.190Z] in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:run_test/1 (eunit_proc.erl, line 531)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 356)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 514)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 456)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 346)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:run_group/2 (eunit_proc.erl, line 570)
[2023-01-20T09:12:17.190Z] **error:{assertEqual,[{module,chttpd_dbs_info_test},
[2023-01-20T09:12:17.190Z]               {line,316},
[2023-01-20T09:12:17.190Z]               {expression,"FilteredDbs"},
[2023-01-20T09:12:17.190Z]               {expected,["db2-696c6977bd9fda5dfba921221d06c725"]},
[2023-01-20T09:12:17.190Z]               {value,[]}]}
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] fixture setup [27,2,1,2]
[2023-01-20T09:12:17.190Z] ------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_dbs_info_test:create_db/1 (test/eunit/chttpd_dbs_info_test.erl, line 367)
[2023-01-20T09:12:17.190Z] in call from chttpd_dbs_info_test:setup/0 (test/eunit/chttpd_dbs_info_test.erl, line 43)
[2023-01-20T09:12:17.190Z] in call from chttpd_dbs_info_test:setup_with_shards_db_ddoc/0 (test/eunit/chttpd_dbs_info_test.erl, line 54)
[2023-01-20T09:12:17.190Z] **error:{badmatch,{error,connection_closed}}
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] fixture setup [26,1,1,21]
[2023-01-20T09:12:17.190Z] -------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_db_test:create_db/1 (test/eunit/chttpd_db_test.erl, line 44)
[2023-01-20T09:12:17.190Z] in call from chttpd_db_test:setup/0 (test/eunit/chttpd_db_test.erl, line 36)
[2023-01-20T09:12:17.190Z] **error:{badmatch,{error,connection_closed}}
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] fixture setup [38,1,1,9]
[2023-01-20T09:12:17.190Z] ------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_security_tests:create_db/1 (test/eunit/chttpd_security_tests.erl, line 60)
[2023-01-20T09:12:17.190Z] in call from chttpd_security_tests:setup/0 (test/eunit/chttpd_security_tests.erl, line 46)
[2023-01-20T09:12:17.190Z] **error:{badmatch,{error,connection_closed}}
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] chttpd_socket_buffer_size_test:51 small_recbuf
[2023-01-20T09:12:17.190Z] ----------------------------------------------
[2023-01-20T09:12:17.190Z] 
[2023-01-20T09:12:17.190Z] ::in function chttpd_socket_buffer_size_test:'-small_recbuf/2-fun-1-'/1 (test/eunit/chttpd_socket_buffer_size_test.erl, line 54)
[2023-01-20T09:12:17.190Z] in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:run_test/1 (eunit_proc.erl, line 531)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 356)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 514)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 456)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 346)
[2023-01-20T09:12:17.190Z] in call from eunit_proc:run_group/2 (eunit_proc.erl, line 570)
[2023-01-20T09:12:17.190Z] **error:{assert,[{module,chttpd_socket_buffer_size_test},
[2023-01-20T09:12:17.437Z]          {line,54},
[2023-01-20T09:12:17.437Z]          {expression,"Response =:= 400 orelse Response =:= request_failed"},
[2023-01-20T09:12:17.437Z]          {expected,true},
[2023-01-20T09:12:17.437Z]          {value,false}]}
[2023-01-20T09:12:17.437Z] 
[2023-01-20T09:12:17.437Z] chttpd_socket_buffer_size_test:59 small_buffer
[2023-01-20T09:12:17.437Z] ----------------------------------------------
[2023-01-20T09:12:17.437Z] 
[2023-01-20T09:12:17.437Z] ::in function chttpd_socket_buffer_size_test:'-small_buffer/2-fun-1-'/1 (test/eunit/chttpd_socket_buffer_size_test.erl, line 62)
[2023-01-20T09:12:17.437Z] in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:run_test/1 (eunit_proc.erl, line 531)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 356)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 514)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 456)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 346)
[2023-01-20T09:12:17.437Z] in call from eunit_proc:run_group/2 (eunit_proc.erl, line 570)
[2023-01-20T09:12:17.437Z] **error:{assert,[{module,chttpd_socket_buffer_size_test},
[2023-01-20T09:12:17.437Z]          {line,62},
[2023-01-20T09:12:17.437Z]          {expression,"Response =:= 400 orelse Response =:= request_failed"},
[2023-01-20T09:12:17.437Z]          {expected,true},
[2023-01-20T09:12:17.437Z]          {value,false}]}
[2023-01-20T09:12:17.437Z] 
[2023-01-20T09:12:17.437Z] fixture setup [4,1,1,2]
[2023-01-20T09:12:17.437Z] -----------------------
[2023-01-20T09:12:17.437Z] 
[2023-01-20T09:12:17.437Z] ::in function chttpd_view_test:create_db/1 (test/eunit/chttpd_view_test.erl, line 47)
[2023-01-20T09:12:17.437Z] in call from chttpd_view_test:setup/0 (test/eunit/chttpd_view_test.erl, line 39)
[2023-01-20T09:12:17.437Z] **error:{badmatch,{error,connection_closed}}

big-r81 avatar Jan 20 '23 19:01 big-r81

Adding a failing test:

UsersDbSecurityTest [test/elixir/test/users_db_security_test.exs]
  * test user db security [L#260]
  * test user db security (3419.2ms) [L#260]

  1) test user db security (UsersDbSecurityTest)
     test/elixir/test/users_db_security_test.exs:260
     Assertion with == failed
     code:  assert doc["iterations"] == 4
     left:  3
     right: 4
     stacktrace:
       (couchdbtest 0.1.0) test/elixir/lib/couch/dbtest.ex:444: Couch.DBTest.retry_until/5
       test/elixir/test/users_db_security_test.exs:398: (test)

big-r81 avatar Sep 23 '24 19:09 big-r81

Linking a PR thread: https://github.com/apache/couchdb/pull/4406

big-r81 avatar Sep 23 '24 19:09 big-r81

These ones have been plaguing us for years. I was looking at the wireshark capture of some of them and still couldn't quite figure out what was going on.

nickva avatar Sep 23 '24 20:09 nickva

Moved from #4406:

module 'smoosh_persist'
  smoosh_persist:151: persist_unpersist_test_ (t_write_read_delete)...[0.015 s] ok
  smoosh_persist:152: persist_unpersist_test_ (t_fail_write_read_delete)...[0.001 s] ok
  smoosh_persist:153: persist_unpersist_test_ (t_corrupted_read)...[0.031 s] ok
  smoosh_persist:154: persist_unpersist_test_ (t_check_setup)...*failed*
in function smoosh_persist:t_check_setup/1 (src/smoosh_persist.erl, line 210)
in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
in call from eunit_proc:run_test/1 (eunit_proc.erl, line 531)
in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 356)
in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 514)
in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 456)
in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 346)
in call from eunit_proc:run_group/2 (eunit_proc.erl, line 570)
**error:{assertEqual,[{module,smoosh_persist},
              {line,210},
              {expression,"check_setup ( )"},
              {expected,{error,{"write",eacces}}},
              {value,ok}]}
  output:<<"">>
  smoosh_persist:155: persist_unpersist_test_ (t_persist_unpersist_disabled)...ok
  smoosh_persist:156: persist_unpersist_test_ (t_persist_unpersist_enabled)...[0.018 s] ok
  smoosh_persist:157: persist_unpersist_test_ (t_persist_unpersist_errors)...*failed*
in function smoosh_persist:t_persist_unpersist_errors/1 (src/smoosh_persist.erl, line 277)
in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
in call from eunit_proc:run_test/1 (eunit_proc.erl, line 531)
in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 356)
in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 514)
in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 456)
in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 346)
in call from eunit_proc:run_group/2 (eunit_proc.erl, line 570)
**error:{assertEqual,[{module,smoosh_persist},
              {line,277},
              {expression,"persist ( Q1 , # { } , # { } )"},
              {expected,{error,eacces}},
              {value,ok}]}
  output:<<"">>
  [done in 1.579 s]

big-r81 avatar Sep 23 '24 20:09 big-r81

I think, one problem is, that our test suites doesn't clean properly.

After running make elixir we have many open couchjs processes on Windows. I expect this for the other test suites too:

...
ViewOffsetTest [test/elixir/test/view_offsets_test.exs]
  * test basic view offsets (129.3ms) [L#34]
  * test repeated view offsets (1431.8ms) [L#62]

ReshardHelpers [test/elixir/test/reshard_helpers.exs]

PartitionHelpers [test/elixir/test/partition_helpers.exs]

Finished in 98.5 seconds (0.00s async, 98.5s sync)
542 tests, 0 failures, 8 excluded

Randomized with seed 857316
ERROR: Access denied
PS C:\relax\releases\3.4.1\apache-couchdb-3.4.1-RC1> Get-Process *      

Handles  NPM(K)    PM(K)      WS(K)     CPU(s)     Id  SI ProcessName                                                                                          
-------  ------    -----      -----     ------     --  -- -----------
    114       8     2848       8216              4480   0 AggregatorHost
    172      10     2128       9052              2964   0 blnsvr                                                                                               
    141      10     6520      13300              4388   0 conhost
    179       8     1356       6248       2,73   8616   0 conhost                                                                                              
     77       9     8044      14000       0,16    876   0 couchjs
     77       9     4684      11868       0,13   1056   0 couchjs
     77       9     8448      13796       0,17   1380   0 couchjs
     77       9     5800      11712       0,08   1764   0 couchjs                                                                                              
     77       9     6232      12256       0,67   1800   0 couchjs
     77       9     5796      11852       0,08   2156   0 couchjs
     77       9     5868      11724       0,08   2608   0 couchjs                                                                                              
     77       9     6644      12280       0,80   2860   0 couchjs
     77       9     7048      12348       0,08   3256   0 couchjs
     77      10     6260      13068       0,30   3296   0 couchjs
     77      10     5476      11440       0,33   4148   0 couchjs                                                                                              
     77       9     5860      11696       0,09   4948   0 couchjs
     77       9     5776      11892       0,08   4976   0 couchjs
     77       9     4760      11336       0,09   4988   0 couchjs                                                                                              
     77       9     5796      11712       0,08   5628   0 couchjs
     77       9     5728      11700       0,08   5660   0 couchjs
     77      10     5880      11728       0,09   5788   0 couchjs
     77       9     6616      11600       0,09   5920   0 couchjs
     77       9     5792      11784       0,08   6252   0 couchjs
     77       9     5832      11860       0,09   6376   0 couchjs
     77       9     5048      11748       0,11   6564   0 couchjs                                                                                              
     77       9     4952      11572       0,11   6748   0 couchjs
     77      10     5892      11972       0,08   6876   0 couchjs
     77      10     6384      13380       0,28   7052   0 couchjs                                                                                              
     77       9     6440      11948       0,11   7944   0 couchjs
     77      10     5976      11868       0,09   8108   0 couchjs
     77      10     5968      12472       0,34   8240   0 couchjs
     77       9     7512      13228       0,11   8244   0 couchjs                                                                                              
     77       9     6800      12020       0,11   8344   0 couchjs
     77       9     8080      13920       0,09   8380   0 couchjs                                                                                              
     77       9     5772      11756       0,06   8428   0 couchjs
     77       9     5772      11696       0,08   8444   0 couchjs                                                                                              
     77       9     6280      12972       0,13   8488   0 couchjs
     77       9     5828      11780       0,08   8996   0 couchjs
     77       9     4416      11300       0,11   9028   0 couchjs
     77      10     5856      11880       0,09   9036   0 couchjs                                                                                              
     77       9     5796      11800       0,06   9252   0 couchjs
     77       9     5756      11744       0,09   9460   0 couchjs
     77      10     6160      11780       0,08   9512   0 couchjs
     77       9     5888      11968       0,08   9640   0 couchjs                                                                                              
     77       9     5860      11756       0,08   9876   0 couchjs
     77       9     6608      11612       0,09  10008   0 couchjs
     77       9     7076      13452       2,64  10028   0 couchjs
     77       9     5784      11764       0,05  10124   0 couchjs                                                                                              
     77       9     7468      13944       3,20  10304   0 couchjs
     77       9     5780      11676       0,08  10600   0 couchjs
     77       9     5904      12256       0,13  10648   0 couchjs
     77       9     5780      11676       0,09  10808   0 couchjs                                                                                              
     77       9     8004      13516       0,17  10992   0 couchjs
     77       9     5124      11276       0,13  11484   0 couchjs
     77      10     6292      13320       1,09  11492   0 couchjs
     77       9     5828      11664       0,08  11556   0 couchjs                                                                                              
     77       9     6068      13216       2,25  11584   0 couchjs
     77       9     6368      13212       0,11  11932   0 couchjs
     77       9     4624      11392       0,09  12116   0 couchjs
     77       9     6036      11764       0,09  12184   0 couchjs                                                                                              

What is also strange are the following lines:

Randomized with seed 857316
ERROR: Access denied

big-r81 avatar Sep 23 '24 21:09 big-r81

Unless we restart the couch service we normally expect a pool couchjs processes to be started, we expect up to 100 of those while the server is running. I see that on Ubuntu as well.

nickva avatar Sep 23 '24 21:09 nickva

Access denied is interesting though. I wonder where that comes from...

nickva avatar Sep 23 '24 21:09 nickva

We saw approximately the same set of failures in chttpd tests on the arm freebsd ci worker [1]. The only change there from the working scenario was using a smaller VM (8GB). That is interesting, as it's a totally different OS, yet same tests failed. This seems to be socket code related and somehow memory related. It's still not very clear, but I thought we'd document at least if we this additional scenario sheds some light on the cause (if it is the same cause).

(Thanks to @dch for donating the freebsd CI workers, help debug and fix the issue)

08:13:48      chttpd_misc_test:51: -welcome_test_/0-fun-4- (should_have_version)...[0.037 s] ok
08:13:53      chttpd_misc_test:52: -welcome_test_/0-fun-2- (should_have_features)...*timed out*
08:13:53  in function gen:do_call/4 (gen.erl, line 259)
08:13:53  in call from gen_server:call/3 (gen_server.erl, line 415)
08:13:53  in call from ibrowse_http_client:send_req/7 (src/ibrowse_http_client.erl, line 114)
08:13:53  in call from ibrowse:do_send_req/7 (src/ibrowse.erl, line 500)
08:13:53  in call from ibrowse:try_routing_request/14 (src/ibrowse.erl, line 392)
08:13:53  in call from test_request:request/6 (src/test_request.erl, line 95)
08:13:53  in call from chttpd_misc_test:req_get/1 (test/eunit/chttpd_misc_test.erl, line 285)
08:13:53  in call from chttpd_misc_test:should_have_features/1 (test/eunit/chttpd_misc_test.erl, line 90)
08:13:53      undefined
08:13:53    chttpd_misc_test:53: -welcome_test_/0-fun-0- (should_have_uuid)...[0.003 s] ok
08:13:53    [done in 5.322 s]
0:32:57  undefined
10:32:57  *** context setup failed ***
10:32:57  **in function chttpd_changes_test:reqraw/3 (test/eunit/chttpd_changes_test.erl, line 1055)
10:32:57  in call from chttpd_changes_test:req/3 (test/eunit/chttpd_changes_test.erl, line 1043)
10:32:57  in call from chttpd_changes_test:'-create_docs/2-fun-1-'/2 (test/eunit/chttpd_changes_test.erl, line 999)
10:32:57  in call from lists:foreach_1/2 (lists.erl, line 1686)
10:32:57  in call from chttpd_changes_test:setup_basic/0 (test/eunit/chttpd_changes_test.erl, line 963)
10:32:57  **error:{badmatch,{error,req_timedout}}
10:32:57 
10:33:38  *** context setup failed ***
10:33:38  **in function chttpd_changes_test:reqraw/3 (test/eunit/chttpd_changes_test.erl, line 1055)
10:33:38  in call from chttpd_changes_test:req/3 (test/eunit/chttpd_changes_test.erl, line 1043)
10:33:38  in call from chttpd_changes_test:'-create_docs/2-fun-1-'/2 (test/eunit/chttpd_changes_test.erl, line 999)
10:33:38  in call from lists:foreach_1/2 (lists.erl, line 1686)
10:33:38  in call from chttpd_changes_test:setup_basic/0 (test/eunit/chttpd_changes_test.erl, line 963)
10:33:38  **error:{badmatch,{error,req_timedout}}
10:31:53  in function chttpd_dbs_info_test:should_return_db2_for_get_dbs_info_with_skip_1/1 (test/eunit/chttpd_dbs_info_test.erl, line 225)
10:31:53  in call from eunit_test:run_testfun/1 (eunit_test.erl, line 71)
10:31:53  in call from eunit_proc:run_test/1 (eunit_proc.erl, line 543)
10:31:53  in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 368)
10:31:53  in call from eunit_proc:handle_test/2 (eunit_proc.erl, line 526)
10:31:53  in call from eunit_proc:tests_inorder/3 (eunit_proc.erl, line 468)
10:31:53  in call from eunit_proc:with_timeout/3 (eunit_proc.erl, line 358)
10:31:53  in call from eunit_proc:run_group/2 (eunit_proc.erl, line 582)
10:31:53  **error:{assertEqual,[{module,chttpd_dbs_info_test},
10:31:53                {line,225},
10:31:53                {expression,"FilteredDbs"},
10:31:53                {expected,["db2-f79bfc712a17ad78c9687aeb4f6276de"]},
10:31:53                {value,["db1-f79bfc712a17ad78c9687aeb4f6276de",
10:31:53                        "db2-f79bfc712a17ad78c9687aeb4f6276de"]}]}
10:31:53    output:<<"">>

nickva avatar Apr 29 '25 16:04 nickva