mail-parser
mail-parser copied to clipboard
Parsing mail consisting of only text body
Parsing a mail consisting of only text body causes the parser to return that text body as both text and HTML body.
use mail_parser::MessageParser;
fn main() {
let m = "From: Test <[email protected]>
Subject: test
To: [email protected]
Content-Language: en-US
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
test";
let m = MessageParser::default().parse(m.as_bytes()).unwrap();
println!("{:?} {}", m.text_body, m.text_body_count());
println!("{:?} {}", m.html_body, m.html_body_count());
let t = m.text_bodies().next();
println!("{t:?}");
let h = m.html_bodies().next();
println!("{h:?}");
}
Output:
[0] 1
[0] 1
Some(MessagePart { headers: [Header { name: From, value: Address(List([Addr { name: Some("Test"), address: Some("[email protected]") }])), offset_field: 0, offset_start: 5, offset_end: 30 }, Header { name: Subject, value: Text("test"), offset_field: 30, offset_start: 38, offset_end: 44 }, Header { name: To, value: Address(List([Addr { name: None, address: Some("[email protected]") }])), offset_field: 44, offset_start: 47, offset_end: 66 }, Header { name: ContentLanguage, value: Text("en-US"), offset_field: 66, offset_start: 83, offset_end: 90 }, Header { name: ContentType, value: ContentType(ContentType { c_type: "text", c_subtype: Some("plain"), attributes: Some([Attribute { name: "charset", value: "UTF-8" }, Attribute { name: "format", value: "flowed" }]) }), offset_field: 90, offset_start: 103, offset_end: 145 }, Header { name: ContentTransferEncoding, value: Text("7bit"), offset_field: 145, offset_start: 171, offset_end: 177 }], is_encoding_problem: false, body: Text("test"), encoding: None, offset_header: 0, offset_body: 178, offset_end: 182 })
Some(MessagePart { headers: [Header { name: From, value: Address(List([Addr { name: Some("Test"), address: Some("[email protected]") }])), offset_field: 0, offset_start: 5, offset_end: 30 }, Header { name: Subject, value: Text("test"), offset_field: 30, offset_start: 38, offset_end: 44 }, Header { name: To, value: Address(List([Addr { name: None, address: Some("[email protected]") }])), offset_field: 44, offset_start: 47, offset_end: 66 }, Header { name: ContentLanguage, value: Text("en-US"), offset_field: 66, offset_start: 83, offset_end: 90 }, Header { name: ContentType, value: ContentType(ContentType { c_type: "text", c_subtype: Some("plain"), attributes: Some([Attribute { name: "charset", value: "UTF-8" }, Attribute { name: "format", value: "flowed" }]) }), offset_field: 90, offset_start: 103, offset_end: 145 }, Header { name: ContentTransferEncoding, value: Text("7bit"), offset_field: 145, offset_start: 171, offset_end: 177 }], is_encoding_problem: false, body: Text("test"), encoding: None, offset_header: 0, offset_body: 178, offset_end: 182 })
The documentation does not mention such behaviour.
I hope this is not by design choice because if it is then there seems to be no way to determine whether the mail actually has a text/HTML body or not which is an important information and the loss of that information a crucial flaw.