Multiple blocks

This example demonstrates the use of multiple interrelated /*!re2c ... */ blocks. We pick a deliberately simple task (parsing integers), so that all complexity is associated with relations between blocks and not with the lexical grammar.

[multiple_blocks.re]

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <limits.h>
#include <stdio.h>

template<int base>
static bool adddgt(unsigned long &u, unsigned int d)
{
    if (u > (ULONG_MAX - d) / base) {
        return false;
    }
    u = u * base + d;
    return true;
}

static bool lex(const char *s, unsigned long &u)
{
    const char *YYMARKER;
    const char *YYCTXMARKER;
    u = 0;

    /*!re2c
        re2c:yyfill:enable = 0;
        re2c:define:YYCTYPE = char;
        re2c:define:YYCURSOR = s;

        end = "\x00";
    */

    /*!re2c
        *                  { return false; }
        '0b' / [01]        { goto bin; }
        "0"                { goto oct; }
        "" / [1-9]         { goto dec; }
        '0x' / [0-9a-fA-F] { goto hex; }
    */

bin:
    /*!re2c
        *     { return false; }
        end   { return true; }
        [01]  { if (!adddgt<2>(u, s[-1] - '0')) return false; goto bin; }
    */

oct:
    /*!re2c
        *     { return false; }
        end   { return true; }
        [0-7] { if (!adddgt<8>(u, s[-1] - '0')) return false; goto oct; }
    */

dec:
    /*!re2c
        *     { return false; }
        end   { return true; }
        [0-9] { if (!adddgt<10>(u, s[-1] - '0')) return false; goto dec; }
    */

hex:
    /*!re2c
        *     { return false; }
        end   { return true; }
        [0-9] { if (!adddgt<16>(u, s[-1] - '0'))      return false; goto hex; }
        [a-f] { if (!adddgt<16>(u, s[-1] - 'a' + 10)) return false; goto hex; }
        [A-F] { if (!adddgt<16>(u, s[-1] - 'A' + 10)) return false; goto hex; }
    */
}

int main(int argc, char **argv)
{
    for (int i = 1; i < argc; ++i) {
        unsigned long u;
        if (lex(argv[i], u)) {
            printf("%lu\n", u);
        } else {
            printf("error\n");
        }
    }
    return 0;
}

Notes:

  • Configurations and definitions (lines 20 - 26) are not scoped to a single re2c block — they are global. Each block may override configurations, but this affects the global scope.
  • Blocks don’t have to be in the same function: they can be in separate functions or elsewhere as long as the exposed interface fits into the lexical scope.

Compile:

$ re2c -o multiple_blocks.cc multiple_blocks.re
$ g++ -o multiple_blocks multiple_blocks.cc

Run:

$ ./multiple_blocks 0 12345678901234567890 0xFFFFffffFFFFffff 0x1FFFFffffFFFFffff 0xAbcDEf 0x00 007 0B0 0b110101010 ""
0
12345678901234567890
18446744073709551615
error
11259375
0
7
0
426
error