From 5d4587ce6580c47a07903af16804aba668fdfc24 Mon Sep 17 00:00:00 2001
From: Andrea Shepard <andrea@persephoneslair.org>
Date: Mon, 21 Nov 2016 05:07:35 +0000
Subject: [PATCH] Implement IR generation for CHARSET_ACTION_SCAN,
 CHARSET_ACTION_ACCEPT and CHARSET_ACTION_COMPLEMENT

---
 src/backends/llvm/llvm.h         |   2 +-
 src/backends/llvm/llvm_charset.c | 156 ++++++++++++++++++++++++++-----
 src/parsers/charset.c            |   5 +-
 3 files changed, 136 insertions(+), 27 deletions(-)

diff --git a/src/backends/llvm/llvm.h b/src/backends/llvm/llvm.h
index 36f53fcb..a05693bc 100644
--- a/src/backends/llvm/llvm.h
+++ b/src/backends/llvm/llvm.h
@@ -13,7 +13,7 @@
 LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr;
 LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr;
 
-void h_llvm_make_charset_membership_test(HAllocator* mm__,
+bool h_llvm_make_charset_membership_test(HAllocator* mm__,
                                          LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
                                          LLVMValueRef r, HCharset cs,
                                          LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
diff --git a/src/backends/llvm/llvm_charset.c b/src/backends/llvm/llvm_charset.c
index 1e8591dc..e2f52b18 100644
--- a/src/backends/llvm/llvm_charset.c
+++ b/src/backends/llvm/llvm_charset.c
@@ -9,6 +9,12 @@
 #include "../../internal.h"
 #include "llvm.h"
 
+/*
+ * Set this #define to enable some debug logging and internal consistency
+ * checking.
+ */
+#define HAMMER_LLVM_CHARSET_DEBUG
+
 typedef enum {
   /*
    * Accept action; this entire range is in the charset.  This action type
@@ -790,6 +796,97 @@ static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset
   h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep, "", "", 0);
 }
 
+/*
+ * Build IR for a CHARSET_ACTION_SCAN
+ */
+
+static bool h_llvm_build_ir_for_scan(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
+                                     HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                     LLVMValueRef r,
+                                     LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  if (!cs) return false;
+  if (idx_start > idx_end) return false;
+
+  /*
+   * Scan the range of indices, and for each thing in the charset,
+   * compare and conditional branch.
+   */
+  LLVMPositionBuilderAtEnd(builder, in);
+
+  for (int i = idx_start; i <= idx_end; ++i) {
+    if (charset_isset(cs, i)) {
+      char bbname[16];
+      uint8_t c = (uint8_t)i;
+      snprintf(bbname, 16, "cs_memb_%02x", c);
+      LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ,
+          LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r");
+      LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname);
+      LLVMBuildCondBr(builder, icmp, yes, bb);
+      LLVMPositionBuilderAtEnd(builder, bb);
+    }
+  }
+
+  LLVMBuildBr(builder, no);
+
+  return true;
+}
+
+/*
+ * Turn an llvm_charset_exec_plan_t into IR
+ */
+
+static bool h_llvm_cep_to_ir(HAllocator* mm__,
+                             LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
+                             LLVMValueRef r, llvm_charset_exec_plan_t *cep,
+                             LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  bool rv;
+
+  if (!cep) return false;
+
+  switch (cep->action) {
+    case CHARSET_ACTION_SCAN:
+      rv = h_llvm_build_ir_for_scan(mod, func, builder,
+          cep->cs, cep->idx_start, cep->idx_end, r, in, yes, no);
+      break;
+    case CHARSET_ACTION_ACCEPT:
+      /* Easy case; just unconditionally branch to the yes output */
+      LLVMPositionBuilderAtEnd(builder, in);
+      LLVMBuildBr(builder, yes);
+      break;
+    case CHARSET_ACTION_BITMAP:
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+      fprintf(stderr,
+              "CHARSET_ACTION_BITMAP not yet implemented (cep %p)\n",
+              (void *)cep);
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
+      rv = false;
+      break;
+    case CHARSET_ACTION_COMPLEMENT:
+      /* This is trivial; just swap the 'yes' and 'no' outputs and build the child */
+      rv = h_llvm_cep_to_ir(mm__, mod, func, builder, r, cep->children[0], in, no, yes);
+      break;
+    case CHARSET_ACTION_SPLIT:
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+      fprintf(stderr,
+              "CHARSET_ACTION_SPLIT not yet implemented (cep %p)\n",
+              (void *)cep);
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
+      rv = false;
+      break;
+    default:
+      /* Unknown action type */
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+      fprintf(stderr,
+              "cep %p has unknown action type\n",
+              (void *)cep);
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
+      rv = false;
+      break;
+  }
+
+  return rv;
+}
+
 /*
  * Construct LLVM IR to decide if a runtime value is a member of a compile-time
  * character set, and branch depending on the result.
@@ -802,9 +899,11 @@ static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset
  *  - cs [in]: the HCharset to test membership in
  *  - yes [in]: the basic block to branch to if r is in cs
  *  - no [in]: the basic block to branch to if r is not in cs
+ *
+ * Returns: true on success, false on failure
  */
 
-void h_llvm_make_charset_membership_test(HAllocator* mm__,
+bool h_llvm_make_charset_membership_test(HAllocator* mm__,
                                          LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
                                          LLVMValueRef r, HCharset cs,
                                          LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
@@ -818,38 +917,47 @@ void h_llvm_make_charset_membership_test(HAllocator* mm__,
    * negations efficiently, so the challenge here is to turn a character map
    * into a minimal set of such propositions.
    *
-   * TODO: actually do this; right now for the sake of a first pass we're just
-   * testing r == x for every x in cs.
+   * We achieve this by building a tree of actions to minimize a cost metric,
+   * and then transforming the tree into IR.
    */
 
+  bool rv;
+
   /* Try building a charset exec plan */
   llvm_charset_exec_plan_t *cep = h_llvm_build_charset_exec_plan(mm__, cs);
-  if (cep) {
-    /* For now just check it and free it */
-    bool ok = h_llvm_check_charset_exec_plan(cep);
-    if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep);
-    else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep);
-    h_llvm_pretty_print_charset_exec_plan(mm__, cep);
-    h_llvm_free_charset_exec_plan(mm__, cep);
-    cep = NULL;
-  } else {
+  if (!cep) {
     fprintf(stderr, "got null from h_llvm_build_charset_exec_plan()\n");
+    return false;
   }
 
-  for (int i = 0; i < 256; ++i) {
-    if (charset_isset(cs, i)) {
-      char bbname[16];
-      uint8_t c = (uint8_t)i;
-      snprintf(bbname, 16, "cs_memb_%02x", c);
-      LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ,
-          LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r");
-      LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname);
-      LLVMBuildCondBr(builder, icmp, yes, bb);
-      LLVMPositionBuilderAtEnd(builder, bb);
-    }
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+  bool ok = h_llvm_check_charset_exec_plan(cep);
+  if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep);
+  else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep);
+  h_llvm_pretty_print_charset_exec_plan(mm__, cep);
+  if (!ok) {
+    fprintf(stderr, "h_llvm_make_charset_membership_test() error-exiting "
+            "because consistency check failed\n");
+    h_llvm_free_charset_exec_plan(mm__, cep);
+    cep = NULL;
+    return false;
   }
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
 
-  LLVMBuildBr(builder, no);
+  /* Create input block */
+  LLVMBasicBlockRef start = LLVMAppendBasicBlock(func, "cs_start");
+  /*
+   * Make unconditional branch into input block from wherever our caller
+   * had us positioned.
+   */
+  LLVMBuildBr(builder, start);
+
+  rv = h_llvm_cep_to_ir(mm__, mod, func, builder, r, cep, start, yes, no);
+
+  h_llvm_free_charset_exec_plan(mm__, cep);
+  cep = NULL;
+
+  return rv;
 }
 
 #endif /* defined(HAMMER_LLVM_BACKEND) */
diff --git a/src/parsers/charset.c b/src/parsers/charset.c
index 741adcd8..5870fc2c 100644
--- a/src/parsers/charset.c
+++ b/src/parsers/charset.c
@@ -85,6 +85,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func,
    * LLVM to build a function to parse a charset; the args are a stream and an
    * arena.
    */
+  bool ok;
 
   LLVMValueRef stream = LLVMGetFirstParam(func);
   stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
@@ -109,7 +110,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func,
   /* We have a char, need to check if it's in the charset */
   HCharset cs = (HCharset)env;
   /* Branch to either success or end, conditional on whether r is in cs */
-  h_llvm_make_charset_membership_test(mm__, mod, func, builder, r, cs, success, fail);
+  ok = h_llvm_make_charset_membership_test(mm__, mod, func, builder, r, cs, success, fail);
 
   /* Basic block: success */
   LLVMPositionBuilderAtEnd(builder, success);
@@ -145,7 +146,7 @@ static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func,
   // ret %struct.HParseResult_.3* %rv
   LLVMBuildRet(builder, rv);
 
-  return true;
+  return ok;
 }
 
 #endif /* defined(HAMMER_LLVM_BACKEND) */
-- 
GitLab