firebird icon indicating copy to clipboard operation
firebird copied to clipboard

LIST() and LISTAGG() produce space-padding of characters wher datatype is Char(N)

Open pavel-zotov opened this issue 1 month ago • 0 comments

Applying LIST([DISTINCT] <s>) (and also LISTAGG([DISTINCT] <s>) WITHIN GROUP(ORDER BY <s>)) causes excessive space-padding of characters if <s> is defined as Char(N). Length of displayed (padded) data depends on both character set and presense of DISTINCT clause. Problem does not exist for Varchar and blob datatypes.

set bail on;
set list on;
set blob all;
set names utf8;
shell if exist r:\temp\tmp4test.fdb del r:\temp\tmp4test.fdb 2>nul;
create database 'localhost:r:\temp\tmp4test.fdb' user 'sysdba' password 'masterkey';


recreate table test(a char(7) character set win1250, x char(7) character set win1250 , u char(7) character set utf8, v char(7) character set utf8);
-- recreate table test(a varchar(7) character set win1250, x varchar(7) character set win1250 , u varchar(7) character set utf8, v varchar(7) character set utf8); -- [ !! ] no padding in this case
insert into test(a, x, u, v) values('q', 'ß', 'q', 'ω'); -- 'ω' requires 2 bytes
insert into test(a, x, u, v) values('w', '€', 'w', '€'); -- '€' requires 3 bytes
insert into test(a, x, u, v) values('e', 'ţ', 'e', '𝄢'); -- '𝄢' requires 4 bytes
commit;

select 
	list_1250_ascii_only
	,char_length(list_1250_ascii_only)
    ,'' as "-------------------------------------"
	,list_distinct_1250_ascii_only
	,char_length(list_distinct_1250_ascii_only)
    ,'' as "-------------------------------------"
	,list_1250_non_ascii
	,char_length(list_1250_non_ascii)
    ,'' as "-------------------------------------"
	,list_distinct_1250_non_ascii
	,char_length(list_distinct_1250_non_ascii)
    ,'' as "-------------------------------------"
	,list_utf8_ascii_only
	,char_length(list_utf8_ascii_only)
    ,'' as "-------------------------------------"
	,list_distinct_utf8_ascii_only
	,char_length(list_distinct_utf8_ascii_only)
    ,'' as "-------------------------------------"
	,list_utf8_non_ascii
	,char_length(list_utf8_non_ascii)
    ,'' as "-------------------------------------"
	,list_distinct_utf8_non_ascii
	,char_length(list_distinct_utf8_non_ascii)
    ,'' as "====================================="
	,listagg_1250_ascii_only
	,char_length(listagg_1250_ascii_only)
    ,'' as "-------------------------------------"
	,listagg_distinct_1250_ascii_only
	,char_length(listagg_distinct_1250_ascii_only)
    ,'' as "-------------------------------------"
	,listagg_1250_non_ascii
	,char_length(listagg_1250_non_ascii)
    ,'' as "-------------------------------------"
	,listagg_distinct_1250_non_ascii
	,char_length(listagg_distinct_1250_non_ascii)
    ,'' as "-------------------------------------"
	,listagg_utf8_ascii_only
	,char_length(listagg_utf8_ascii_only)
    ,'' as "-------------------------------------"
	,listagg_distinct_utf8_ascii_only
	,char_length(listagg_distinct_utf8_ascii_only)
    ,'' as "-------------------------------------"
	,listagg_utf8_non_ascii
	,char_length(listagg_utf8_non_ascii)
    ,'' as "-------------------------------------"
	,listagg_distinct_utf8_non_ascii
	,char_length(listagg_distinct_utf8_non_ascii)
	
from (
	select 
	 	 replace(list(a, ':'),' ','*') as list_1250_ascii_only
		,replace(list(distinct a, ':'),' ','*') list_distinct_1250_ascii_only

		,replace(list(x, ':'),' ','*') as list_1250_non_ascii
		,replace(list(distinct x, ':'),' ','*') list_distinct_1250_non_ascii

		,replace(list(u, ':'),' ','*') list_utf8_ascii_only
		,replace(list(distinct u, ':'),' ','*') list_distinct_utf8_ascii_only

		,replace(list(v, ':'),' ','*') list_utf8_non_ascii
		,replace(list(distinct v, ':'),' ','*') list_distinct_utf8_non_ascii 
		-- ====================================================================================================
		,replace(listagg(a, ':') within group(order by a),' ','*') as listagg_1250_ascii_only
		,replace(listagg(distinct a, ':') within group(order by a),' ','*') as listagg_distinct_1250_ascii_only
		
		,replace(listagg(x, ':') within group(order by x),' ','*') as listagg_1250_non_ascii
		,replace(listagg(distinct x, ':') within group(order by x),' ','*') as listagg_distinct_1250_non_ascii

		,replace(listagg(u, ':') within group(order by u),' ','*') as listagg_utf8_ascii_only
		,replace(listagg(distinct u, ':') within group(order by u),' ','*') as listagg_distinct_utf8_ascii_only

		,replace(listagg(v, ':'),' ','*') listagg_utf8_non_ascii
		,replace(listagg(distinct v, ':'),' ','*') listagg_distinct_utf8_non_ascii 


	from test
)
;

Output:

LIST_1250_ASCII_ONLY            0:76
q*:w*:e*
CHAR_LENGTH                     8
-------------------------------------
LIST_DISTINCT_1250_ASCII_ONLY   0:6e
e*:q*:w*
CHAR_LENGTH                     8
-------------------------------------
LIST_1250_NON_ASCII             0:66
ß******:€******:ţ******
CHAR_LENGTH                     23
-------------------------------------
LIST_DISTINCT_1250_NON_ASCII    0:5e
€******:ß******:ţ******
CHAR_LENGTH                     23
-------------------------------------
LIST_UTF8_ASCII_ONLY            0:56
q******:w******:e******
CHAR_LENGTH                     23
-------------------------------------
LIST_DISTINCT_UTF8_ASCII_ONLY   0:51
e***************************:q***************************:w***************************
CHAR_LENGTH                     86
-------------------------------------
LIST_UTF8_NON_ASCII             0:4c
ω******:€******:𝄢 ******
CHAR_LENGTH                     23
-------------------------------------
LIST_DISTINCT_UTF8_NON_ASCII    0:47
ω**************************:€*************************:𝄢 ************************
CHAR_LENGTH                     80
=====================================
LISTAGG_1250_ASCII_ONLY         0:42
e*:q*:w*
CHAR_LENGTH                     8
-------------------------------------
LISTAGG_DISTINCT_1250_ASCII_ONLY 0:3a
e*:q*:w*
CHAR_LENGTH                     8
-------------------------------------
LISTAGG_1250_NON_ASCII          0:32
€******:ß******:ţ******
CHAR_LENGTH                     23
-------------------------------------
LISTAGG_DISTINCT_1250_NON_ASCII 0:2a
€******:ß******:ţ******
CHAR_LENGTH                     23
-------------------------------------
LISTAGG_UTF8_ASCII_ONLY         0:22
e***************************:q***************************:w***************************
CHAR_LENGTH                     86
-------------------------------------
LISTAGG_DISTINCT_UTF8_ASCII_ONLY 0:1d
e***************************:q***************************:w***************************
CHAR_LENGTH                     86
-------------------------------------
LISTAGG_UTF8_NON_ASCII          0:18
ω******:€******:𝄢 ******
CHAR_LENGTH                     23
-------------------------------------
LISTAGG_DISTINCT_UTF8_NON_ASCII 0:13
ω**************************:€*************************:𝄢 ************************
CHAR_LENGTH                     80

Checked on WI-T6.0.0.1357 but same results on all FB since 2.5 (of course, without LISTAGG() that did appear recently in 6.x)

pavel-zotov avatar Nov 24 '25 10:11 pavel-zotov