| @Article{Bumbulis94, |
| author = {Peter Bumbulis and Donald D. Cowan}, |
| title = {RE2C -- A More Versatile Scanner Generator}, |
| journal = "ACM Letters on Programming Languages and Systems", |
| volume = 2, |
| number = "1--4", |
| year = 1994, |
| abstract = { |
| It is usually claimed that lexical analysis routines are still coded by |
| hand, despite the widespread availability of scanner generators, for |
| efficiency reasons. While efficiency is a consideration, there exist |
| freely available scanner generators such as GLA \cite{Gray88} that can |
| generate scanners that are faster than most hand-coded ones. However, |
| most generated scanners are tailored for a particular environment, and |
| retargetting these scanners to other environments, if possible, is |
| usually complex enough to make a hand-coded scanner more appealing. In |
| this paper we describe RE2C, a scanner generator that not only generates |
| scanners which are faster (and usually smaller) than those produced by |
| any other scanner generator known to the authors, including GLA, but |
| also adapt easily to any environment. |
| } |
| } |
| @Article{Gray88, |
| author = {Robert W. Gray}, |
| title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That |
| Programmers Can Use}, |
| journal = {USENIX Conference Proceedings}, |
| year = {1988}, |
| month = {June}, |
| pages = {147-160}, |
| abstract = {Writing an efficient lexical analyzer for even a simple |
| language is not a trivial task, and should not be done by hand. We |
| describe GLA, a tool that generates very efficient scanners. These |
| scanners do not use the conventional transition matrix, but instead |
| use a few 128 element vectors. Scanning time is only slightly |
| greater than the absolute minimum --- the time it takes to look at |
| each character in a file. The GLA language allows simple, concise |
| specification of scanners. Augmenting regular expressions with |
| auxiliary scanners easily handles nasty problems such as C comments |
| and C literal constants. We formalize the connection between token |
| scanning and token processing by associating a processor with |
| appropriate patterns. A library of canned descriptions simplifies the |
| specification of commonly used language pieces --- such as, |
| C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully |
| tuned lexical analysis support modules are provided for error |
| handling, input buffering, storing identifiers in hash tables and |
| manipulating denotations.} |
| } |