From 473435b23a6e1320d2ee6536542d0503becf7bc2 Mon Sep 17 00:00:00 2001
From: Andrea Shepard <andrea@persephoneslair.org>
Date: Mon, 21 Nov 2016 08:30:17 +0000
Subject: [PATCH] Implement CHARSET_ACTION_BITMAP

---
 src/backends/llvm/llvm_charset.c | 93 +++++++++++++++++++++++++++++---
 1 file changed, 87 insertions(+), 6 deletions(-)

diff --git a/src/backends/llvm/llvm_charset.c b/src/backends/llvm/llvm_charset.c
index 16b0550d..cef110a5 100644
--- a/src/backends/llvm/llvm_charset.c
+++ b/src/backends/llvm/llvm_charset.c
@@ -797,6 +797,10 @@ static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset
 }
 
 /* Forward declares for IR-emission functions */
+static bool h_llvm_build_ir_for_bitmap(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
+                                       HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                       LLVMValueRef r,
+                                       LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
 static bool h_llvm_build_ir_for_scan(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
                                      HCharset cs, uint8_t idx_start, uint8_t idx_end,
                                      LLVMValueRef r,
@@ -810,6 +814,87 @@ static bool h_llvm_cep_to_ir(HAllocator* mm__,
                              LLVMValueRef r, llvm_charset_exec_plan_t *cep,
                              LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
 
+/*
+ * Build IR for a CHARSET_ACTION_BITMAP
+ */
+
+static bool h_llvm_build_ir_for_bitmap(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
+                                       HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                       LLVMValueRef r,
+                                       LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  int i, j;
+  uint32_t bitmap_entry;
+
+  if (!cs) return false;
+  if (idx_start > idx_end) return false;
+
+  /*
+   * Embed a 8x32 bitmap in the IR, turn the input value into an index by
+   * right-shifting 5 bits, load the relevant bitmap byte, then derive a mask
+   * from the low-order 5 bits of the input value.  & the mask with the bitmap
+   * byte, and compare.  If non-zero, accept, otherwise reject.
+   */
+  LLVMPositionBuilderAtEnd(builder, in);
+
+  /* Construct the bitmap */
+  LLVMValueRef bitmap_entries[8];
+  for (i = 0; i < 8; ++i) {
+    bitmap_entry = 0x0;
+    /*
+     * Bit order; LSB is lowest-numbered char index 32*i, MSB is 32*i + 31.
+     * and then the mask we need is just 1 << (r & 0x1f).
+     */
+    for (j = 0; j < 32; ++j) {
+      /* Set the bit if necessary */
+      if (charset_isset(cs, (uint8_t)(32*i + j))) {
+        bitmap_entry |= ((uint32_t)(0x1) << j);
+      }
+    }
+
+    /* Make an LLVMValueRef for it */
+    bitmap_entries[i] = LLVMConstInt(LLVMInt32Type(), bitmap_entry, 0);
+  }
+  /* Now make an array out of them */
+  LLVMValueRef bitmap_initializer = LLVMConstArray(LLVMInt32Type(), bitmap_entries, 8);
+  /* ...and we need a global variable to stick it in to GEP it */
+  LLVMValueRef bitmap = LLVMAddGlobal(mod, LLVMTypeOf(bitmap_initializer), "bitmap");
+  LLVMSetInitializer(bitmap, bitmap_initializer);
+
+  /* Compute the index into the bitmap */
+  LLVMValueRef word_index = LLVMBuildLShr(builder, r,
+      LLVMConstInt(LLVMInt8Type(), 5, 0), "word_index");
+
+  /* Get a pointer to that word in the bitmap */
+  LLVMValueRef gep_indices[2];
+  gep_indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+  gep_indices[1] = word_index;
+  LLVMValueRef bitmap_word_p =
+    LLVMBuildInBoundsGEP(builder, bitmap, gep_indices, 2, "bitmap_word_p");
+  LLVMValueRef bitmap_word =
+    LLVMBuildLoad(builder, bitmap_word_p, "bitmap_word");
+  /*
+   * Extract the low-order 5 bits of r, and expand to a 32-bit int for the
+   * mask
+   */
+  LLVMValueRef bit_index = LLVMBuildAnd(builder, r,
+      LLVMConstInt(LLVMInt8Type(), 0x1f, 0), "bit_index");
+  LLVMValueRef bit_index_zext = LLVMBuildZExt(builder, bit_index,
+      LLVMInt32Type(), "bit_index_zext");
+  /* Compute mask */
+  LLVMValueRef mask = LLVMBuildShl(builder, LLVMConstInt(LLVMInt32Type(), 1, 0),
+      bit_index_zext, "mask");
+  /* AND the mask with the bitmap word */
+  LLVMValueRef masked_bitmap_word = LLVMBuildAnd(builder, bitmap_word, mask,
+      "masked_bitmap_word");
+  /* Compare it to zero */
+  LLVMValueRef bitmap_icmp = LLVMBuildICmp(builder, LLVMIntNE,
+      masked_bitmap_word, LLVMConstInt(LLVMInt32Type(), 0, 0), "bitmap_icmp");
+  /* If not zero, the char is in the set */
+  LLVMBuildCondBr(builder, bitmap_icmp, yes, no);
+
+  return true;
+}
+
 /*
  * Build IR for a CHARSET_ACTION_SCAN
  */
@@ -916,12 +1001,8 @@ static bool h_llvm_cep_to_ir(HAllocator* mm__,
       rv = true;
       break;
     case CHARSET_ACTION_BITMAP:
-#ifdef HAMMER_LLVM_CHARSET_DEBUG
-      fprintf(stderr,
-              "CHARSET_ACTION_BITMAP not yet implemented (cep %p)\n",
-              (void *)cep);
-#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
-      rv = false;
+      rv = h_llvm_build_ir_for_bitmap(mod, func, builder,
+          cep->cs, cep->idx_start, cep->idx_end, r, in, yes, no);
       break;
     case CHARSET_ACTION_COMPLEMENT:
       /* This is trivial; just swap the 'yes' and 'no' outputs and build the child */
-- 
GitLab