uapi-json
uapi-json copied to clipboard
Investigate new parsers
Investigate parsers:
- xml2js without custom configuration
- xpath
I've done some investigation with xpath
and raw xml2js
.
Looks like xml2js
is faster option, though it does not really matter, as difference is less than 200ms, for 1000 files parsing.
Attached is sample script I used.
Please take a look and compare, which syntax do you really prefer to be used for next-gen parser: xml2js
dotted like or xpath
with expressions.
const fs = require('fs');
const path = require('path');
const xpath = require('xpath');
const Dom = require('xmldom').DOMParser;
const xml2js = require('xml2js');
const TRIES = 1000;
const testFilePath = path.join(__dirname, 'test', 'FakeResponses', 'Air', 'AirGetTickets-several-tickets.xml');
const testFile = fs.readFileSync(testFilePath).toString();
function getNamespaceInfo(str, namespace) {
const re = new RegExp(`xmlns:(${namespace}(?:_v\\d+_\\d+)?)="([^\\"]+)"`);
const [, documentNamespace, url] = str.match(re);
return { documentNamespace, url };
}
const fn = {
xml2js: {
getNamespaces: (str, list) => {
return list.reduce(
(acc, namespace) => ({
...acc,
[namespace]: getNamespaceInfo(str, namespace).documentNamespace,
}),
{}
);
},
parsePassenger: (ns, passenger) => {
const key = passenger.$.Key;
const { First: firstName, Last: lastName } = passenger[`${ns.common}:BookingTravelerName`][0].$;
return {
key,
firstName,
lastName,
};
},
parseCoupon: (coupon) => {
const {
MarketingCarrier: airline,
MarketingFlightNumber: flightNumber,
Origin: origin,
Destination: destination,
DepartureTime: departure,
NotValidBefore: notValidBefore,
NotValidAfter: notValidAfter,
Status: status,
} = coupon.$;
return {
airline,
flightNumber,
origin,
destination,
departure,
notValidBefore,
notValidAfter,
status,
};
},
parseTicket: (ns, ticket) => {
const { TicketNumber: ticketNumber } = ticket.$;
const coupons = ticket[`${ns.air}:Coupon`]
.map(coupon => fn.xml2js.parseCoupon(coupon));
return {
ticketNumber,
coupons,
};
},
parseEtr: (ns, etr) => {
const locatorCode = etr[`${ns.air}:AirReservationLocatorCode`][0];
const fareCalculation = etr[`${ns.air}:FareCalc`][0];
const tickets = etr[`${ns.air}:Ticket`]
.map(ticket => fn.xml2js.parseTicket(ns, ticket));
const passengers = etr[`${ns.common}:BookingTraveler`]
.map(passenger => fn.xml2js.parsePassenger(ns, passenger));
return {
locatorCode,
fareCalculation,
passengers,
tickets,
};
},
parse: async (str) => {
const obj = await new Promise((res, rej) => {
xml2js.parseString(str, (err, resp) => {
if (err) {
rej(err);
return;
}
res(resp);
});
});
const ns = fn.xml2js.getNamespaces(str, ['air', 'common']);
// eslint-disable-next-line no-unused-vars
const tickets = obj['SOAP:Envelope']['SOAP:Body'][0][`${ns.air}:AirRetrieveDocumentRsp`][0][`${ns.air}:ETR`]
.map(etr => fn.xml2js.parseEtr(ns, etr));
// console.log(JSON.stringify(tickets, null, 2));
}
},
xpath: {
getAttributes: (element, attributesMap) => {
return Object.entries(attributesMap).reduce(
(acc, [attributeName, key]) => ({
...acc,
[key]: element.getAttribute(attributeName)
}),
{}
);
},
getNamespaces: (str, list) => {
return list.reduce(
(acc, namespace) => ({
...acc,
[namespace]: getNamespaceInfo(str, namespace).url,
}),
{}
);
},
parsePassenger: (passenger, expr) => {
const key = passenger.getAttribute('Key');
const [travelerName] = expr('./common:BookingTravelerName', passenger);
// const firstName = travelerName.getAttribute('First');
// const lastName = travelerName.getAttribute('Last');
return {
key,
...fn.xpath.getAttributes(travelerName, { First: 'firstName', Last: 'lastName' }),
// firstName,
// lastName,
};
},
parseCoupon: (coupon) => {
return fn.xpath.getAttributes(coupon, {
MarketingCarrier: 'airline',
MarketingFlightNumber: 'flightNumber',
Origin: 'origin',
Destination: 'destination',
DepartureTime: 'departure',
NotValidBefore: 'notValidBefore',
NotValidAfter: 'notValidAfter',
Status: 'status',
});
},
parseTicket: (ticket, expr) => {
const ticketNumber = ticket.getAttribute('TicketNumber');
const coupons = expr('./air:Coupon', ticket)
.map(coupon => fn.xpath.parseCoupon(coupon));
return {
ticketNumber,
coupons,
};
},
parseEtr: (etr, expr) => {
const locatorCode = expr('string(./air:AirReservationLocatorCode)', etr);
const fareCalculation = expr('string(./air:FareCalc)', etr);
const tickets = expr('./air:Ticket', etr)
.map(ticket => fn.xpath.parseTicket(ticket, expr));
const passengers = expr('./common:BookingTraveler', etr)
.map(passenger => fn.xpath.parsePassenger(passenger, expr));
return {
locatorCode,
fareCalculation,
passengers,
tickets,
};
},
parse: (str) => {
const res = new Dom().parseFromString(str);
const namespaces = fn.xpath.getNamespaces(str, ['SOAP', 'air', 'common']);
const expr = xpath.useNamespaces(namespaces);
// eslint-disable-next-line no-unused-vars
const tickets = expr('/SOAP:Envelope/SOAP:Body/air:AirRetrieveDocumentRsp/air:ETR', res)
.map(ticket => fn.xpath.parseEtr(ticket, expr));
// console.log(JSON.stringify(tickets, null, 2));
},
},
};
async function main() {
let i;
// Xpath
console.time('Xpath');
for (i = 0; i < TRIES; i += 1) {
fn.xpath.parse(testFile);
}
console.timeEnd('Xpath');
// Xml2Js
console.time('Xml2Js');
for (i = 0; i < TRIES; i += 1) {
// eslint-disable-next-line no-await-in-loop
await fn.xml2js.parse(testFile);
}
console.timeEnd('Xml2Js');
}
main();
I prefer xpath
as it resembles more original XML structure, therefore is more readable.
I would really like some active users and contributors to join discussion, as if code become less complex, more useful contributions to be expected
@nausik , @cursedcoder , @shmuga , @InkFaust , @Smotrov , @creduo , @yaerhf , @articice , @jthoma , @engrashid , @JairoPanduro
Despite that xml2js
might be slightly faster than xpath
with 200ms difference as you've mentioned, I think xpath
is much easier to read and understand. If the speed difference doesn't matter that much, I'd prefer to stick with xpath
rather than xml2js
.
I'd also love to hear from other contributors on this matter.
xpath <3
Readability matters more, since that helps adaptability by more contributors. I would go with xpath for this one.
On Tue, Jun 30, 2020, 4:26 PM Yevhenii Huselietov [email protected] wrote:
xpath <3
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/Travelport-Ukraine/uapi-json/issues/483#issuecomment-651719934, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABOWA54M6R3LCRKEXCGSXM3RZHAFNANCNFSM4OKUV7KQ .