Skip to content
Snippets Groups Projects
Forked from Hammer / hammer
631 commits behind the upstream repository.
cfgrammar.h 4.43 KiB
/* Context-free grammar representation and analysis */

#include "internal.h"


typedef struct HCFGrammar_ {
  HCFChoice   *start;   // start symbol (nonterminal)
  HHashSet    *nts;     // HCFChoices, each representing the alternative
                        // productions for one nonterminal
  HHashSet    *geneps;  // set of NTs that can generate the empty string
  HHashTable  **first;  // memoized first sets of the grammar's symbols
  HHashTable  **follow; // memoized follow sets of the grammar's NTs
  size_t      kmax;     // maximum lookahead depth allocated
  HArena      *arena;
  HAllocator  *mm__;

  // constant set containing only the empty string.
  // this is only a member of HCFGrammar because it needs a pointer to arena.
  const struct HStringMap_ *singleton_epsilon;
} HCFGrammar;


/* Representing input characters (bytes) in HHashTables.
 * To use these as keys, we must avoid 0 as because NULL means "not set".
 */
typedef uintptr_t HCharKey;
static inline HCharKey char_key(uint8_t c) { return (0x100 | c); }
static inline uint8_t key_char(HCharKey k) { return (0xFF & k); }

/* Mapping strings of input tokens to arbitrary values (or serving as a set).
 * Common prefixes are folded into a tree of HHashTables, branches labeled with
 * input tokens.
 * Each path through the tree represents the string along its branches.
 */
typedef struct HStringMap_ {
  void *epsilon_branch;         // points to leaf value
  void *end_branch;             // points to leaf value
  HHashTable *char_branches;    // maps to inner nodes (HStringMaps)
  HArena *arena;
} HStringMap;

HStringMap *h_stringmap_new(HArena *a);
void h_stringmap_put_end(HStringMap *m, void *v);
void h_stringmap_put_epsilon(HStringMap *m, void *v);
void h_stringmap_put_after(HStringMap *m, uint8_t c, HStringMap *ends);
void h_stringmap_put_char(HStringMap *m, uint8_t c, void *v);
void h_stringmap_update(HStringMap *m, const HStringMap *n);
void h_stringmap_replace(HStringMap *m, void *old, void *new);
void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool end);
void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);

static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
 { return h_hashtable_get(m->char_branches, (void *)char_key(c)); }


/* Convert 'parser' into CFG representation by desugaring and compiling the set
 * of nonterminals.
 * A NULL return means we are unable to represent the parser as a CFG.
 */
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser);
HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *start);

HCFGrammar *h_cfgrammar_new(HAllocator *mm__);

/* Frees the given grammar and associated data.
 * Does *not* free parsers' CFG forms as created by h_desugar.
 */
void h_cfgrammar_free(HCFGrammar *g);

/* Does the given symbol derive the empty string (under g)? */
bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol);

/* Does the sentential form s derive the empty string? s NULL-terminated. */
bool h_derives_epsilon_seq(HCFGrammar *g, HCFChoice **s);

/* Compute first_k set of symbol x. Memoized. */
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x);

/* Compute first_k set of sentential form s. s NULL-terminated. */
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s);

/* Compute follow_k set of symbol x. Memoized. */
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x);

/* Compute the predict_k set of production "A -> rhs".
 * Always returns a newly-allocated HStringMap.
 */
HStringMap *h_predict(size_t k, HCFGrammar *g,
                        const HCFChoice *A, const HCFSequence *rhs);


/* Pretty-printers for grammars and associated data. */
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent);
void h_pprint_sequence(FILE *f, const HCFGrammar *g, const HCFSequence *seq);
void h_pprint_symbol(FILE *f, const HCFGrammar *g, const HCFChoice *x);
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent);
void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
                        void (*valprint)(FILE *f, void *env, void *val), void *env,
                        const HStringMap *map);
void h_pprint_char(FILE *file, char c);