parsebuddy

ParserBuddy alows parsing arbitral text input with structured grammar.

It allows you to create grammar that will parse sentence like

buy tickets for The Shawshank Redemption in Los Angeles on wednesday at 2pm for 4 people

and return data like

{
  movie: 'The Shawshank Redemption',
  city: 'Los Angeles',
  weekday: 2,
  hour: 14,
  ticketsCount: 4,
}

Demo grammar

TODO: write tutorial instead of copy-pasting the code

However, I've tried to explain everything with comments

import {
  createGrammar,
  sequence,
  word,
  defineParser,
  fork,
  literal,
  number,
} from '..';
import { moviesList } from './movies';
import { cities } from './cities';

export enum CinemaMarker {
  movie = 'Movie Name',
  date = 'Date',
  city = 'City',
  hour = 'Hour',
  ticketsCount = 'Tickets Count',
}

// data that will transport parsing result
interface TicketsData {
  movie?: string;
  date?: Date;
  city?: string;
  hour?: number;
  ticketsCount?: number;
  weekday?: number;
}

// parser able to pass one of the movies from the list
const movie = defineParser<{}, string>(
  function(options, emit) {
    // it's fork (fork picks one of the items)
    return fork({
      // get array of the movies and return literal parser (literal requires some exact input). when matched - emit movie name
      children: moviesList.map((movieName) =>
        literal({ text: movieName }, emit),
      ),
    });
  },
  { name: 'movie' },
);

// same as movie, but with cities list
const city = defineParser<{}, string>(
  function(options, emit) {
    return fork({
      children: cities.map((city) => literal({ text: city }, emit)),
    });
  },
  { name: 'city' },
);

// parser that will parse any weekday name but instead of emiting it's name, it will emit number from 0 to 6 (0 = monday, 6 = sunday)
const weekday = defineParser<{}, number>(
  function(options, emit) {
    return fork({
      placeholder: 'weekday',
      children: [
        literal({ text: 'monday' }, () => {
          emit(0);
        }),
        literal({ text: 'tuesday' }, () => {
          emit(1);
        }),
        literal({ text: 'wednesday' }, () => {
          emit(2);
        }),
        literal({ text: 'thursday' }, () => {
          emit(3);
        }),
        literal({ text: 'friday' }, () => {
          emit(4);
        }),
        literal({ text: 'saturday' }, () => {
          emit(5);
        }),
        literal({ text: 'sunday' }, () => {
          emit(6);
        }),
      ],
    });
  },
  { name: 'weekday' },
);

// parser that takes input like 3am or 3pm - it will return number from 1 to 24 (if it's pm it just adds 12 to parsed number)
const hour = defineParser<{}, number>(
  function(options, emit) {
    // lets hold parsed hour number and am/pm
    let hour: number;
    let ampm: 'am' | 'pm';

    return sequence({
      // when parsing is complete
      onMatch: () => {
        // if it's pm - emit parsed number + 12
        if (ampm === 'pm') {
          emit(hour + 12);
        }
        // just emit parsed number
        if (ampm === 'am') {
          emit(hour);
        }
      },
      children: [
        // first we expect number between 1-12
        number(
          {
            // before user aproach it, show some meaningful placeholder in suggestion
            placeholder: '1-12',
            min: 1,
            max: 12,
            // we will generate 12 suggestions for the numbers - this flag must be explicitly enabled as for many cases you'd have much more suggestions (for unlimited numbers)
            generateSuggestions: true,
            onlyInteger: true,
          },
          (matchedHour) => {
            // save matched hour when matched, but dont emit it yet as we need to know if it's followed by pm or am
            hour = matchedHour;
          },
        ),
        // one of am or pm
        fork({
          placeholder: 'am/pm',
          children: [
            literal({ text: 'am' }, () => {
              // when matched overwrite variable holding am/pm so we'll know what to emit
              ampm = 'am';
            }),
            literal({ text: 'pm' }, () => {
              ampm = 'pm';
            }),
          ],
        }),
      ],
    });
  },
  { name: 'hour' },
);

const ticketsCount = defineParser<{}, number>(
  function(options, emit) {
    // we can have 1 person or 2-4 people. so we need to show different 2nd word (person/people) - that's why we'll create fork
    return fork({
      children: [
        // first case - just '1 person' - emit number 1
        word({ text: '1 person' }, () => {
          emit(1);
        }),
        // 2nd option - sequence of number followed by word 'people'
        sequence({
          children: [
            number(
              {
                placeholder: '2-5 people',
                min: 2,
                max: 5,
                generateSuggestions: true,
                onlyInteger: true,
              },
              (count) => emit(count), // when matched - just emit the number
            ),
            word({ text: 'people' }),
          ],
        }),
      ],
    });
  },
  { name: 'ticketsCount' },
);

// compose final grammar
export const cinemaGrammar = createGrammar<TicketsData, CinemaMarker>({
  // as we need data transporter (we want to have some meaningful informations from sentence that is parsed)
  // we need to let parser know how to create new data transporter and how to clone it (in case we'd have 2 or more suggestions from the same input)
  dataHolder: {
    // init it by creating empty object
    init: () => ({}),
    // clone it with simple flat object clone
    clone: (data) => ({ ...data }),
  },
  // create grammar root parser that emits data to data transporter
  parser: sequence({
    children: [
      word({ text: 'buy tickets for' }),
      movie(
        { placeholder: 'movie name', marker: CinemaMarker.movie },
        (movie, data) => ({ ...data, movie }),
      ),
      word({ text: 'in' }),
      city(
        { placeholder: 'cinema location', marker: CinemaMarker.city },
        (city, data) => ({ ...data, city }),
      ),
      word({ text: 'on' }),
      weekday(
        { placeholder: 'weekday', marker: CinemaMarker.date },
        (weekday, data) => ({ ...data, weekday }),
      ),
      word({ text: 'at' }),
      hour(
        { placeholder: 'hour', marker: CinemaMarker.hour },
        (hour, data) => ({ ...data, hour }),
      ),
      word({ text: 'for' }),
      ticketsCount(
        {
          placeholder: 'people count',
          marker: CinemaMarker.ticketsCount,
        },
        (ticketsCount, data) => ({ ...data, ticketsCount }),
      ),
    ],
  }),
});

// just helper types

type PromiseType<T> = T extends Promise<infer U> ? U : T;

export type CinemaGrammarResult = PromiseType<
  ReturnType<typeof cinemaGrammar.parseAll>
>[0];

export type CinemaGrammarMatch = CinemaGrammarResult['matches'][0];

parsebuddy
parsebuddy copied to clipboard

Metadata

parsebuddy

Demo grammar

← Metadata

Owner

Metadata

parsebuddy parsebuddy copied to clipboard

Metadata

parsebuddy

Demo grammar

← Metadata

Owner

Metadata

parsebuddy
parsebuddy copied to clipboard