Parsing recordsΒΆ

This example shows how to parse simple non-recursive structures and records. It uses both s-tags (for simple fields) and m-tags (for fields that contain multiple elements). Our imaginary records describe IRC users. Each record consists of a nickname followed by an opening curly brace, a list of attributes (one per line), and a closing curly brace. Attributes are name, country and a list of IRC channels.

[records.re]

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// re2c $INPUT -o $OUTPUT -i
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>

struct mtag_t
{
    int pred;
    const char *tag;
};

typedef std::vector<mtag_t> mtagpool_t;

static void mtag(int *pt, const char *t, mtagpool_t *tp)
{
    mtag_t l = {*pt, t};
    *pt = (int) tp->size();
    tp->push_back(l);
}

static void print_channels(const mtagpool_t &tp, int x, int y)
{
    if (x == -1) return;
    print_channels(tp, tp[x].pred, tp[y].pred);
    const char *px = tp[x].tag, *py = tp[y].tag;
    fprintf(stderr, "    %.*s\n", (int) (py - px), px);
}

#define YYMTAGP(t) mtag(&t, YYCURSOR, &tp)
#define YYMTAGN(t) mtag(&t, NULL,     &tp)

static int lex(const char *YYCURSOR)
{
    const char *YYMARKER, *n1, *n2, *a1, *a2, *c1, *c2;
    mtagpool_t tp;
    int h1, h2;
    /*!stags:re2c format = "const char *@@;"; */
    /*!mtags:re2c format = "int @@;"; */
loop:
    tp.clear();
    /*!mtags:re2c format = "@@ = -1;"; */
    /*!re2c
        re2c:define:YYCTYPE = char;
        re2c:yyfill:enable = 0;
        re2c:flags:tags = 1;

        end     = "\x00";
        eol     = "\n";
        wsp     = [ \t]*;
        eq      = wsp "=" wsp;
        nick    = [a-zA-Z0-9_]+;
        name    = [A-Z] ("." | [a-z]+);
        names   = name (wsp name)*;
        country = [A-Za-z ]+;
        channel = ([a-z0-9-]+ "/")? "#" [a-z0-9-]+;

        *         { fprintf(stderr, "error: %s\n", YYCURSOR); return 1; }
        end       { return 0; }
        wsp | eol { goto loop; }

        @n1 nick @n2 wsp "{" wsp eol
            wsp "name"     eq @a1 names   @a2 wsp eol
            wsp "country"  eq @c1 country @c2 wsp eol
            wsp "channels" eq (wsp #h1 channel #h2 wsp ";")* wsp eol
        wsp "}" {
            fprintf(stderr, "\n%.*s\n", (int) (n2 - n1), n1);
            fprintf(stderr, "  name:     %.*s\n", (int) (a2 - a1), a1);
            fprintf(stderr, "  country:  %.*s\n", (int) (c2 - c1), c1);
            fprintf(stderr, "  channels:\n");
            print_channels(tp, h1, h2);
            goto loop;
        }
    */
}

int main()
{
    const char *fname = "etc_passwd";
    FILE *f;

    // prepare input file
    f = fopen(fname, "w");
    fprintf(f,
        "h4cker1970 {\n"
        "    name     = Jon Smith\n"
        "    country  = UK\n"
        "    channels = freenode/#gentoo-dev; freenode/#gentoo-arch; freenode/#alpha;\n"
        "}\n"
        "\n"
        "mitek {\n"
        "    name     = Mitrofan Rygoravich\n"
        "    country  = Belarus\n"
        "    channels = bynets/#haskell; freenode/#unix;\n"
        "}\n"
        "\n");
    fclose(f);

    // read input file into buffer
    f = fopen(fname, "r");
    fseek(f, 0, SEEK_END);
    const size_t fsize = (size_t) ftell(f);
    fseek(f, 0, SEEK_SET);
    char *buffer = (char*) malloc(fsize + 1);
    fread(buffer, 1, fsize, f);
    buffer[fsize] = 0;
    fclose(f);

    assert(lex(buffer) == 0);

    // cleanup
    remove(fname);
    free(buffer);
    return 0;
}

Compile as re2c -o records.c records.re.