[StdLib] Add C3-native `sscanf`
Libc provides the sscanf (and family) to parse a string from a format to some variables. Basically the exact opposite of printf.
As we have the C3 version of printf, it would also be great to have a C3 version of sscanf.
Example:
fn void main() {
String seq = "\e[97;5u";
int codepoint;
int modifier;
if (string::sscanf(seq, "\e[%d;%du", &codepoint, &modifier) == 2) {
io::printfn("Parsed codepoint %d and modifier %d", codepoint, modifier);
} else {
io::eprintn("Failed to parse string");
}
}
Scanf is such a bad function in C though. I don't see people copying it to be honest.
What do you mean with "bad function" and "don't see people copying it"?
@BWindey there are some security issues for example, among others https://sternumiot.com/iot-blog/scanf-c-function-syntax-examples-and-security-best-practices/
Seems like I'll be writing my own kind of sscanf purpose-built for my own good.
The problems with the C implementation could be resolved in a C3-native implementation though. It's up to you whether this should be added to the standard library or not. (The libc-binding is already present.)
Ok, worked on an implementation today. It's not yet fully tested, documentation is lacking, and only supports base 10 for numbers, but it's a start. Oh, and code is horrible XD
But as proof of concept this counts!
The output of below code is:
Original: Hi nr. -35.13, you're a very gentle person. Is that true?
Format: Hi nr. %f, you're a %s person. Is that %b?
Parsed nr = -35.130000, trait = very gentle, question = 1
import std::io;
import std::math;
faultdef
FORMAT_NO_MATCH,
UNKNOWN_TYPE_SPECIFIER,
ARG_DOES_NOT_MATCH_TYPE_SPECIFIER,
NOT_ENOUGH_ARGS_FOR_FORMAT,
MALFORMED_INTEGER,
NO_STRING_END,
INVALID_BOOLEAN,
DID_NOT_CONSUME_FULL_FORMAT,
DID_NOT_CONSUME_FULL_STRING,
MALFORMED_FLOAT;
fn bool any_is_intlike(any a) {
return a.type.kindof == SIGNED_INT || a.type.kindof == UNSIGNED_INT;
}
fn bool any_is_string_like(any a) {
return a.type == String.typeid
|| (a.type.kindof == SLICE && a.type.inner == char.typeid);
}
fn bool any_is_bool_like(any a) {
return a.type == bool.typeid;
}
fn bool any_is_float_like(any a) {
return a.type.kindof == FLOAT;
}
macro void multiply_to_any(any a, mult) {
switch (a.type) {
case char:
*(char*) a.ptr *= (char) mult;
case ichar:
*(ichar*) a.ptr *= (ichar) mult;
case short:
*(short*) a.ptr *= (short) mult;
case ushort:
*(ushort*) a.ptr *= (ushort) mult;
case int:
*(int*) a.ptr *= (int) mult;
case uint:
*(uint*) a.ptr *= (uint) mult;
case long:
*(long*) a.ptr *= (long) mult;
case ulong:
*(ulong*) a.ptr *= (ulong) mult;
case int128:
*(int128*) a.ptr *= (int128) mult;
case uint128:
*(uint128*) a.ptr *= (uint128) mult;
case float:
*(float*) a.ptr *= (float) mult;
case double:
*(double*) a.ptr *= (double) mult;
default:
break;
}
}
macro void add_to_any(any a, add) {
switch (a.type) {
case char:
*(char*) a.ptr += (char) add;
case ichar:
*(ichar*) a.ptr += (ichar) add;
case short:
*(short*) a.ptr += (short) add;
case ushort:
*(ushort*) a.ptr += (ushort) add;
case int:
*(int*) a.ptr += (int) add;
case uint:
*(uint*) a.ptr += (uint) add;
case long:
*(long*) a.ptr += (long) add;
case ulong:
*(ulong*) a.ptr += (ulong) add;
case int128:
*(int128*) a.ptr += (int128) add;
case uint128:
*(uint128*) a.ptr += (uint128) add;
case float:
*(float*) a.ptr += (float) add;
case double:
*(double*) a.ptr += (double) add;
default:
break;
}
}
fn void negate_any(any a) {
switch (a.type) {
case char:
*(char*) a.ptr = -(*(char*) a.ptr);
case ichar:
*(ichar*) a.ptr = -(*(ichar*) a.ptr);
case short:
*(short*) a.ptr = -(*(short*) a.ptr);
case ushort:
*(ushort*) a.ptr = -(*(ushort*) a.ptr);
case int:
*(int*) a.ptr = -(*(int*) a.ptr);
case uint:
*(uint*) a.ptr = -(*(uint*) a.ptr);
case long:
*(long*) a.ptr = -(*(long*) a.ptr);
case ulong:
*(ulong*) a.ptr = -(*(ulong*) a.ptr);
case int128:
*(int128*) a.ptr = -(*(int128*) a.ptr);
case uint128:
*(uint128*) a.ptr = -(*(uint128*) a.ptr);
case float:
*(float*) a.ptr = -(*(float*) a.ptr);
case double:
*(double*) a.ptr = -(*(double*) a.ptr);
default:
break;
}
}
fn int? String.parse_format(String self, String fmt, any ...args) {
usz fmt_idx = 0;
usz slf_idx = 0;
usz arg_idx = 0;
while (fmt_idx < fmt.len && slf_idx < self.len) {
if (fmt[fmt_idx] == '%') {
if (fmt_idx + 1 < fmt.len && fmt[fmt_idx + 1] != '%' && arg_idx >= args.len) {
return NOT_ENOUGH_ARGS_FOR_FORMAT?;
}
fmt_idx++;
any arg;
// This check is needed for supporting "%%"
if (arg_idx < args.len) {
arg = args[arg_idx];
}
switch (fmt[fmt_idx]) {
case '%':
// "%%" is interpreted as literal '%'
if (self[slf_idx] != '%') {
return FORMAT_NO_MATCH?;
}
slf_idx++;
fmt_idx++;
case 'd':
if (!any_is_intlike(arg)) {
return ARG_DOES_NOT_MATCH_TYPE_SPECIFIER?;
}
mem::set(arg.ptr, 0, arg.type.sizeof);
bool is_negative = false;
switch (self[slf_idx]) {
case '-':
is_negative = true;
nextcase;
case '+':
slf_idx++;
if (slf_idx >= self.len) {
return MALFORMED_INTEGER?;
}
default: break;
}
while (slf_idx < self.len && self[slf_idx] >= '0' && self[slf_idx] <= '9') {
multiply_to_any(arg, 10);
add_to_any(arg, self[slf_idx] - '0');
slf_idx++;
}
if (is_negative) {
negate_any(arg);
}
fmt_idx++;
arg_idx++;
case 's':
if (!any_is_string_like(arg)) {
return ARG_DOES_NOT_MATCH_TYPE_SPECIFIER?;
}
// Disallow "%s%x" (where 'x' != '%').
if (fmt_idx + 2 < fmt.len && fmt[fmt_idx + 1] == '%' && fmt[fmt_idx + 2] == '%') {
return NO_STRING_END?;
}
usz start_slice = slf_idx;
// Determine part after '%s' in format-string that needs
// to be matched in `self` before ending the string
usz match_start = fmt_idx + 1;
usz match_stop = fmt_idx + 1;
while (match_stop < fmt.len && match_stop != '%') {
match_stop++;
}
String match = fmt[match_start..match_stop - 1];
while (
slf_idx < self.len
&& (fmt_idx + 1 >= fmt.len || self[slf_idx:match.len] != match)
) {
slf_idx++;
}
*(String*) arg.ptr = self[start_slice..slf_idx-1];
fmt_idx++;
arg_idx++;
case 'b':
if (!any_is_bool_like(arg)) {
return ARG_DOES_NOT_MATCH_TYPE_SPECIFIER?;
}
switch (self[slf_idx]) {
case 'y':
case '1':
*(bool*) arg.ptr = true;
slf_idx++;
case 'n':
case '0':
*(bool*) arg.ptr = false;
slf_idx++;
case 't':
if (
slf_idx + "true".len >= self.len
|| self[slf_idx:"true".len] != "true"
) {
return INVALID_BOOLEAN?;
}
*(bool*) arg.ptr = true;
slf_idx += "true".len;
case 'f':
if (
slf_idx + "false".len >= self.len
|| self[slf_idx:"false".len] != "false"
) {
return INVALID_BOOLEAN?;
}
*(bool*) arg.ptr = false;
slf_idx += "false".len;
default:
return INVALID_BOOLEAN?;
}
fmt_idx++;
arg_idx++;
case 'f':
if (!any_is_float_like(arg)) {
return ARG_DOES_NOT_MATCH_TYPE_SPECIFIER?;
}
mem::set(arg.ptr, 0, arg.type.sizeof);
bool is_negative = false;
switch (self[slf_idx]) {
case '-':
is_negative = true;
nextcase;
case '+':
slf_idx++;
if (slf_idx >= self.len) {
return MALFORMED_FLOAT?;
}
default: break;
}
ulong whole_part = 0;
// Whole part
while (slf_idx < self.len && self[slf_idx] >= '0' && self[slf_idx] <= '9') {
whole_part *= 10;
whole_part += (ulong) (self[slf_idx] - '0');
slf_idx++;
}
add_to_any(arg, whole_part);
if (slf_idx >= self.len) {
if (is_negative) {
negate_any(arg);
}
arg_idx++;
break;
}
bool jump_fraction_part = false;
switch (self[slf_idx]) {
case 'e':
case 'E':
jump_fraction_part = true;
nextcase;
case '.':
slf_idx++;
if (slf_idx >= self.len) {
return MALFORMED_FLOAT?;
}
default:
return MALFORMED_FLOAT?;
}
if (!jump_fraction_part) {
ulong fraction_part = 0;
ushort amount_digits = 0;
while (
slf_idx < self.len
&& self[slf_idx] >= '0'
&& self[slf_idx] <= '9'
) {
fraction_part *= 10;
fraction_part += (ulong) (self[slf_idx] - '0');
slf_idx++;
amount_digits++;
}
add_to_any(
arg,
(double) fraction_part * math::pow(10, -amount_digits)
);
if (slf_idx >= self.len) {
if (is_negative) {
negate_any(arg);
}
arg_idx++;
break;
}
}
if (self[slf_idx] == 'e' || self[slf_idx] == 'E') {
slf_idx++;
if (slf_idx >= self.len) {
return MALFORMED_FLOAT?;
}
bool exp_is_negative = false;
int exponent = 0;
switch (self[slf_idx]) {
case '-':
exp_is_negative = true;
nextcase;
case '+':
slf_idx++;
if (slf_idx >= self.len) {
return MALFORMED_FLOAT?;
}
default: break;
}
while (slf_idx < self.len && self[slf_idx] >= '0' && self[slf_idx] <= '9') {
exponent *= 10;
exponent += (int) (self[slf_idx] - '0');
slf_idx++;
}
if (exp_is_negative) {
exponent = -exponent;
}
multiply_to_any(arg, math::pow(10, exponent));
}
if (is_negative) {
negate_any(arg);
}
fmt_idx++;
arg_idx++;
default:
return UNKNOWN_TYPE_SPECIFIER?;
}
} else if (fmt[fmt_idx] != self[slf_idx]) {
return FORMAT_NO_MATCH?;
} else {
fmt_idx++;
slf_idx++;
}
}
if (fmt_idx != fmt.len) {
return DID_NOT_CONSUME_FULL_FORMAT?;
} else if (slf_idx != self.len) {
return DID_NOT_CONSUME_FULL_STRING?;
}
return 0;
}
fn void main() {
String to_parse = "Hi nr. -35.13, you're a very gentle person. Is that true?";
String format = "Hi nr. %f, you're a %s person. Is that %b?";
double nr;
String trait;
bool question;
if (catch error = to_parse.parse_format(format, &nr, &trait, &question)) {
io::printfn("Failed to parse string: %s", error);
} else {
io::printfn("Original: %s", to_parse);
io::printfn("Format: %s\n", format);
io::printfn("Parsed nr = %f, trait = %s, question = %b", nr, trait, question);
}
}
You can have a look at the formatter how it handles any