std
std copied to clipboard
Remove BOM in CsvFileIterator
When reading a file with an UTF-8 BOM (or any other BOM, for that matter) , the BOM is part of the fgetcsv() read, which corrupts the header row:
- the BOM is part of the first column's name
- if an enclosure character (e.g.
") is used, it's not detected for the first column name
Note sure whether removing the BOM should be part of this library, or whether we should rely on a filter.
Sample UTF-8 BOM filter:
class BOMFilter extends \php_user_filter
{
private $checked = false;
public function onCreate(): bool
{
return true;
}
public function onClose(): void
{
}
public function filter($in, $out, &$consumed, $closing): int
{
while ($bucket = stream_bucket_make_writeable($in)) {
if (! $this->checked) {
if (substr($bucket->data, 0, 3) === "\xEF\xBB\xBF") {
$bucket->data = substr($bucket->data, 3);
}
$this->checked = true;
}
$consumed += $bucket->datalen;
stream_bucket_append($out, $bucket);
}
return \PSFS_PASS_ON;
}
}
Usage:
stream_filter_register('bom-filter', BOMFilter::class);
$fp = fopen($file, 'rb');
stream_filter_append($fp, 'bom-filter');
$rows = new CsvFileIterator($fp);