Records & structsΒΆ

This example shows how to parse simple non-recursive structures and records. It uses both s-tags (for simple fields) and m-tags (for fields that contain multiple elements). Our imaginary records describe IRC users. Each record consists of a nickname followed by an opening curly brace, a list of attributes (one per line), and a closing curly brace. Attributes are name, country and a list of IRC channels. Below is an example:

[records.dat]

h4cker1970 {
    name     = Jon Smith
    country  = UK
    channels = freenode/#gentoo-dev; freenode/#gentoo-arch; freenode/#alpha;
}

mitek {
    name     = Mitrofan Rygoravich
    country  = Belarus
    channels = bynets/#haskell; freenode/#unix;
}

[records.re]

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <stdio.h>
#include <stdlib.h>
#include <vector>

struct mtag_t
{
    int pred;
    const char *tag;
};

typedef std::vector<mtag_t> mtagpool_t;

static void mtag(int *pt, const char *t, mtagpool_t *tp)
{
    mtag_t l = {*pt, t};
    *pt = (int) tp->size();
    tp->push_back(l);
}

static void print_channels(const mtagpool_t &tp, int x, int y)
{
    if (x == -1) return;
    print_channels(tp, tp[x].pred, tp[y].pred);
    const char *px = tp[x].tag, *py = tp[y].tag;
    printf("    %.*s\n", (int) (py - px), px);
}

#define YYMTAGP(t) mtag(&t, YYCURSOR, &tp)
#define YYMTAGN(t) mtag(&t, NULL,     &tp)

static int lex(const char *YYCURSOR)
{
    const char *YYMARKER, *n1, *n2, *a1, *a2, *c1, *c2;
    mtagpool_t tp;
    int h1, h2;
    /*!stags:re2c format = "const char *@@;"; */
    /*!mtags:re2c format = "int @@;"; */
loop:
    tp.clear();
    /*!mtags:re2c format = "@@ = -1;"; */
    /*!re2c
        re2c:define:YYCTYPE = char;
        re2c:yyfill:enable = 0;

        end     = "\x00";
        eol     = "\n";
        wsp     = [ \t]*;
        eq      = wsp "=" wsp;
        nick    = [a-zA-Z0-9_]+;
        name    = [A-Z] ("." | [a-z]+);
        names   = name (wsp name)*;
        country = [A-Za-z ]+;
        channel = ([a-z0-9-]+ "/")? "#" [a-z0-9-]+;

        *         { printf("error: %s\n", YYCURSOR); return 1; }
        end       { return 0; }
        wsp | eol { goto loop; }

        @n1 nick @n2 wsp "{" wsp eol
            wsp "name"     eq @a1 names   @a2 wsp eol
            wsp "country"  eq @c1 country @c2 wsp eol
            wsp "channels" eq (wsp #h1 channel #h2 wsp ";")* wsp eol
        wsp "}" {
            printf("\n%.*s\n", (int) (n2 - n1), n1);
            printf("  name:     %.*s\n", (int) (a2 - a1), a1);
            printf("  country:  %.*s\n", (int) (c2 - c1), c1);
            printf("  channels:\n");
            print_channels(tp, h1, h2);
            goto loop;
        }
    */
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        fprintf(stderr, "no input files\n");
        return 1;
    }

    FILE *file = fopen(argv[1], "r");

    fseek(file, 0, SEEK_END);
    const size_t fsize = (size_t) ftell(file);
    fseek(file, 0, SEEK_SET);

    char *buffer = (char*) malloc(fsize + 1);

    fread(buffer, 1, fsize, file);
    buffer[fsize] = 0;

    const int status = lex(buffer);

    fclose(file);
    free(buffer);

    return status;
}

Compile:

$ re2c --tags -o records.cc records.re
$ g++ -o records records.cc

Run:

$ ./records records.dat

h4cker1970
  name:     Jon Smith
  country:  UK
  channels:
    freenode/#gentoo-dev
    freenode/#gentoo-arch
    freenode/#alpha

mitek
  name:     Mitrofan Rygoravich
  country:  Belarus
  channels:
    bynets/#haskell
    freenode/#unix