NetTopologySuite.IO.ShapeFile
NetTopologySuite.IO.ShapeFile copied to clipboard
Maybe a little bug about class DbaseFileHeader's encoding ?
The attribute column header and value of a shp file I read contain Chinese characters. It is correct using qgis. When using the following two methods to read the column header, the column header is garbled, but the value is read correctly:
var sdr = new NetTopologySuite.IO.ShapeFile.Extended.ShapeDataReader(shpPath);
foreach (var f in shp.ReadByMBRFilter(shp.ShapefileBounds))
{
//■■■■■■■■■■■■■■■■■■■■■■■■■
//header of f.Attributes here is not currect
//■■■■■■■■■■■■■■■■■■■■■■■■■
}
or
using (var rd = new ShapefileDataReader(readFile, factory, readEncoding))
{
readHeader = rd.DbaseHeader;
string[] fieldNames = new string[readHeader.NumFields];
features = new List<Feature>(readHeader.NumRecords);
for (int i = 0; i < fieldNames.Length; i++)
{
//■■■■■■■■■■■■■■■■■■■■■■■■■
//rd.GetName(i + 1) here is not currect
//■■■■■■■■■■■■■■■■■■■■■■■■■
fieldNames[i] = rd.GetName(i + 1);
}
·····
even
//test all encoding ■■■■■■■■
foreach (var encoding in Encoding.GetEncodings())
{using (var rd = new ShapefileDataReader(readFile, factory, encoding ))
{
readHeader = rd.DbaseHeader;
string[] fieldNames = new string[readHeader.NumFields];
features = new List<Feature>(readHeader.NumRecords);
for (int i = 0; i < fieldNames.Length; i++)
{
//■■■■■■■■■■■■■■■■■■■■■■■■■
//rd.GetName(i + 1) here is not currect
//■■■■■■■■■■■■■■■■■■■■■■■■■
fieldNames[i] = rd.GetName(i + 1);
}
·····
}
so i rewrite the class DbaseFileHeader
,change the function public void ReadHeader(BinaryReader reader, string filename)
to:
public void ReadHeader(BinaryReader reader, string filename)
{
// type of reader.
_fileType = reader.ReadByte();
if (_fileType != 0x03)
throw new NotSupportedException("Unsupported DBF reader Type " + _fileType);
// parse the update date information.
int year = reader.ReadByte();
int month = reader.ReadByte();
int day = reader.ReadByte();
_updateDate = new DateTime(year + 1900, month, day);
// read the number of records.
_numRecords = reader.ReadInt32();
// read the length of the header structure.
_headerLength = reader.ReadInt16();
// read the length of a record
_recordLength = reader.ReadInt16();
// skip the reserved bytes in the header.
//in.skipBytes(20);
byte[] data = reader.ReadBytes(20);
byte lcid = data[29 - 12]; //get the 29th byte in the file... we've first to read into arry was no 12
//■■■■■■■■■■■■■■■■■■■■■■■■■
//_encoding = DetectEncodingFromMark(lcid, filename);
//■■■■■■■■■■■■■■■■■■■■■■■■■
_encoding = this.Encoding;
//■■■■■■■■■■■■■■■■■■■■■■■■■
//Replace reader with one with correct encoding..
reader = new BinaryReader(reader.BaseStream, _encoding);
// calculate the number of Fields in the header
_numFields = (_headerLength - FileDescriptorSize - 1) / FileDescriptorSize;
// read all of the header records
_fieldDescriptions = new DbaseFieldDescriptor[_numFields];
for (int i = 0; i < _numFields; i++)
{
_fieldDescriptions[i] = new DbaseFieldDescriptor();
// read the field name
byte[] buffer = reader.ReadBytes(11);
// NOTE: only this _encoding.GetString method is available in Silverlight
String name = _encoding.GetString(buffer, 0, buffer.Length);
int nullPoint = name.IndexOf((char)0);
if (nullPoint != -1)
name = name.Substring(0, nullPoint);
_fieldDescriptions[i].Name = name;
// read the field type
_fieldDescriptions[i].DbaseType = (char)reader.ReadByte();
// read the field data address, offset from the start of the record.
_fieldDescriptions[i].DataAddress = reader.ReadInt32();
// read the field length in bytes
int tempLength = reader.ReadByte();
if (tempLength < 0) tempLength = tempLength + 256;
_fieldDescriptions[i].Length = tempLength;
// read the field decimal count in bytes
_fieldDescriptions[i].DecimalCount = reader.ReadByte();
// read the reserved bytes.
//reader.skipBytes(14);
reader.ReadBytes(14);
}
// Last byte is a marker for the end of the field definitions.
// Trond Benum: This fails for some presumeably valid test shapefiles, so I have commented it out.
byte lastByte = reader.ReadBytes(1)[0];
// if (lastByte != 0x0d)
// throw new ShapefileException("DBase Header is not terminated");
// Assure we are at the end of the header!
if (reader.BaseStream.Position != _headerLength)
reader.BaseStream.Seek(_headerLength, SeekOrigin.Begin);
}
Actually I just rewrite the code _encoding = DetectEncodingFromMark(lcid, filename);
to :_encoding = this.Encoding;
Then I can get the header correctly by using Encoding with PageCode=936.
var dbf = shpPath.Substring(0,shpPath.LastIndexOf(".shp"))+".dbf";
FileStream stream = new FileStream(dbf, FileMode.Open, FileAccess.Read, FileShare.Read);
var fileReader = new BinaryReader(stream, Encoding.GetEncoding(936));
var header = new DbaseFileHeaderEx(Encoding.GetEncoding(936));
// read the header
header.ReadHeader(fileReader, dbf);