diff --git a/src/backends/llvm/llvm.h b/src/backends/llvm/llvm.h index 36f53fcb564c6b5af8ddff71fe2a5fa7b1a2a8f5..a05693bc3035ab7c9a2f0f6e0328e5c2a236a4c6 100644 --- a/src/backends/llvm/llvm.h +++ b/src/backends/llvm/llvm.h @@ -13,7 +13,7 @@ LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr; LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr; -void h_llvm_make_charset_membership_test(HAllocator* mm__, +bool h_llvm_make_charset_membership_test(HAllocator* mm__, LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, LLVMValueRef r, HCharset cs, LLVMBasicBlockRef yes, LLVMBasicBlockRef no); diff --git a/src/backends/llvm/llvm_charset.c b/src/backends/llvm/llvm_charset.c index 1e8591dc799e2ffdea49a2462f86a5d730d5cf0d..e2f52b18b59367af8b43f9cb5a7f3c74f1f4961b 100644 --- a/src/backends/llvm/llvm_charset.c +++ b/src/backends/llvm/llvm_charset.c @@ -9,6 +9,12 @@ #include "../../internal.h" #include "llvm.h" +/* + * Set this #define to enable some debug logging and internal consistency + * checking. + */ +#define HAMMER_LLVM_CHARSET_DEBUG + typedef enum { /* * Accept action; this entire range is in the charset. This action type @@ -790,6 +796,97 @@ static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep, "", "", 0); } +/* + * Build IR for a CHARSET_ACTION_SCAN + */ + +static bool h_llvm_build_ir_for_scan(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, + HCharset cs, uint8_t idx_start, uint8_t idx_end, + LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + if (!cs) return false; + if (idx_start > idx_end) return false; + + /* + * Scan the range of indices, and for each thing in the charset, + * compare and conditional branch. + */ + LLVMPositionBuilderAtEnd(builder, in); + + for (int i = idx_start; i <= idx_end; ++i) { + if (charset_isset(cs, i)) { + char bbname[16]; + uint8_t c = (uint8_t)i; + snprintf(bbname, 16, "cs_memb_%02x", c); + LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, + LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r"); + LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname); + LLVMBuildCondBr(builder, icmp, yes, bb); + LLVMPositionBuilderAtEnd(builder, bb); + } + } + + LLVMBuildBr(builder, no); + + return true; +} + +/* + * Turn an llvm_charset_exec_plan_t into IR + */ + +static bool h_llvm_cep_to_ir(HAllocator* mm__, + LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, + LLVMValueRef r, llvm_charset_exec_plan_t *cep, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + bool rv; + + if (!cep) return false; + + switch (cep->action) { + case CHARSET_ACTION_SCAN: + rv = h_llvm_build_ir_for_scan(mod, func, builder, + cep->cs, cep->idx_start, cep->idx_end, r, in, yes, no); + break; + case CHARSET_ACTION_ACCEPT: + /* Easy case; just unconditionally branch to the yes output */ + LLVMPositionBuilderAtEnd(builder, in); + LLVMBuildBr(builder, yes); + break; + case CHARSET_ACTION_BITMAP: +#ifdef HAMMER_LLVM_CHARSET_DEBUG + fprintf(stderr, + "CHARSET_ACTION_BITMAP not yet implemented (cep %p)\n", + (void *)cep); +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ + rv = false; + break; + case CHARSET_ACTION_COMPLEMENT: + /* This is trivial; just swap the 'yes' and 'no' outputs and build the child */ + rv = h_llvm_cep_to_ir(mm__, mod, func, builder, r, cep->children[0], in, no, yes); + break; + case CHARSET_ACTION_SPLIT: +#ifdef HAMMER_LLVM_CHARSET_DEBUG + fprintf(stderr, + "CHARSET_ACTION_SPLIT not yet implemented (cep %p)\n", + (void *)cep); +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ + rv = false; + break; + default: + /* Unknown action type */ +#ifdef HAMMER_LLVM_CHARSET_DEBUG + fprintf(stderr, + "cep %p has unknown action type\n", + (void *)cep); +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ + rv = false; + break; + } + + return rv; +} + /* * Construct LLVM IR to decide if a runtime value is a member of a compile-time * character set, and branch depending on the result. @@ -802,9 +899,11 @@ static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset * - cs [in]: the HCharset to test membership in * - yes [in]: the basic block to branch to if r is in cs * - no [in]: the basic block to branch to if r is not in cs + * + * Returns: true on success, false on failure */ -void h_llvm_make_charset_membership_test(HAllocator* mm__, +bool h_llvm_make_charset_membership_test(HAllocator* mm__, LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, LLVMValueRef r, HCharset cs, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { @@ -818,38 +917,47 @@ void h_llvm_make_charset_membership_test(HAllocator* mm__, * negations efficiently, so the challenge here is to turn a character map * into a minimal set of such propositions. * - * TODO: actually do this; right now for the sake of a first pass we're just - * testing r == x for every x in cs. + * We achieve this by building a tree of actions to minimize a cost metric, + * and then transforming the tree into IR. */ + bool rv; + /* Try building a charset exec plan */ llvm_charset_exec_plan_t *cep = h_llvm_build_charset_exec_plan(mm__, cs); - if (cep) { - /* For now just check it and free it */ - bool ok = h_llvm_check_charset_exec_plan(cep); - if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep); - else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep); - h_llvm_pretty_print_charset_exec_plan(mm__, cep); - h_llvm_free_charset_exec_plan(mm__, cep); - cep = NULL; - } else { + if (!cep) { fprintf(stderr, "got null from h_llvm_build_charset_exec_plan()\n"); + return false; } - for (int i = 0; i < 256; ++i) { - if (charset_isset(cs, i)) { - char bbname[16]; - uint8_t c = (uint8_t)i; - snprintf(bbname, 16, "cs_memb_%02x", c); - LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r"); - LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname); - LLVMBuildCondBr(builder, icmp, yes, bb); - LLVMPositionBuilderAtEnd(builder, bb); - } +#ifdef HAMMER_LLVM_CHARSET_DEBUG + bool ok = h_llvm_check_charset_exec_plan(cep); + if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep); + else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep); + h_llvm_pretty_print_charset_exec_plan(mm__, cep); + if (!ok) { + fprintf(stderr, "h_llvm_make_charset_membership_test() error-exiting " + "because consistency check failed\n"); + h_llvm_free_charset_exec_plan(mm__, cep); + cep = NULL; + return false; } +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ - LLVMBuildBr(builder, no); + /* Create input block */ + LLVMBasicBlockRef start = LLVMAppendBasicBlock(func, "cs_start"); + /* + * Make unconditional branch into input block from wherever our caller + * had us positioned. + */ + LLVMBuildBr(builder, start); + + rv = h_llvm_cep_to_ir(mm__, mod, func, builder, r, cep, start, yes, no); + + h_llvm_free_charset_exec_plan(mm__, cep); + cep = NULL; + + return rv; } #endif /* defined(HAMMER_LLVM_BACKEND) */ diff --git a/src/parsers/charset.c b/src/parsers/charset.c index 741adcd861cc2d29bcd0d7425965d0d8cf9ff37b..5870fc2ce095a8ba32d20d99c0fb97003feca390 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -85,6 +85,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func, * LLVM to build a function to parse a charset; the args are a stream and an * arena. */ + bool ok; LLVMValueRef stream = LLVMGetFirstParam(func); stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream"); @@ -109,7 +110,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func, /* We have a char, need to check if it's in the charset */ HCharset cs = (HCharset)env; /* Branch to either success or end, conditional on whether r is in cs */ - h_llvm_make_charset_membership_test(mm__, mod, func, builder, r, cs, success, fail); + ok = h_llvm_make_charset_membership_test(mm__, mod, func, builder, r, cs, success, fail); /* Basic block: success */ LLVMPositionBuilderAtEnd(builder, success); @@ -145,7 +146,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func, // ret %struct.HParseResult_.3* %rv LLVMBuildRet(builder, rv); - return true; + return ok; } #endif /* defined(HAMMER_LLVM_BACKEND) */