Skip to content
Snippets Groups Projects
Commit 143ac5e1 authored by Meredith L. Patterson's avatar Meredith L. Patterson Committed by GitHub
Browse files

Merge pull request #34 from puellavulnerata/llvm-backend

LLVM Backend - charset support and some refactoring with generators of common chunks of LLVM IR
parents 52599833 718df9a8
No related branches found
No related tags found
No related merge requests found
......@@ -145,6 +145,130 @@ void h_llvm_free(HParser *parser) {
llvm_parser->mod = NULL;
}
/*
* Construct LLVM IR to decide if a runtime value is a member of a compile-time
* character set, and branch depending on the result.
*
* Parameters:
* - mod [in]: an LLVMModuleRef
* - func [in]: an LLVMValueRef to the function to add the new basic blocks
* - builder [in]: an LLVMBuilderRef, positioned appropriately
* - r [in]: an LLVMValueRef to the value to test
* - cs [in]: the HCharset to test membership in
* - yes [in]: the basic block to branch to if r is in cs
* - no [in]: the basic block to branch to if r is not in cs
*/
void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
LLVMValueRef r, HCharset cs,
LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
/*
* A charset is a 256-element bit array, 32 bytes long in total. Ours is
* static at compile time, so we can try to construct minimal LLVM IR for
* this particular charset. In particular, we should handle cases like
* only one or two bits being set, or a long consecutive range, efficiently.
*
* In LLVM IR, we can test propositions like r == x, r <= x, r >= x and their
* negations efficiently, so the challenge here is to turn a character map
* into a minimal set of such propositions.
*
* TODO: actually do this; right now for the sake of a first pass we're just
* testing r == x for every x in cs.
*/
for (int i = 0; i < 256; ++i) {
if (charset_isset(cs, i)) {
char bbname[16];
uint8_t c = (uint8_t)i;
snprintf(bbname, 16, "cs_memb_%02x", c);
LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ,
LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r");
LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname);
LLVMBuildCondBr(builder, icmp, yes, bb);
LLVMPositionBuilderAtEnd(builder, bb);
}
}
LLVMBuildBr(builder, no);
}
/*
* Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT
*
* Parameters:
* - mod [in]: an LLVMModuleRef
* - builder [in]: an LLVMBuilderRef, positioned appropriately
* - stream [in]: a value ref to an llvm_inputstreamptr, for the input stream
* - arena [in]: a value ref to an llvm_arenaptr to be used for the malloc
* - r [in]: a value ref to the value to be used to this token
* - mr_out [out]: the return value from make_result()
*
* TODO actually support TT_SINT, inputs other than 8 bit
*/
void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder,
LLVMValueRef stream, LLVMValueRef arena,
LLVMValueRef r, LLVMValueRef *mr_out) {
/* Set up call to h_arena_malloc() for a new HParsedToken */
LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
LLVMValueRef amalloc_args[] = { arena, tok_size };
/* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */
LLVMValueRef amalloc = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_arena_malloc"),
amalloc_args, 2, "h_arena_malloc");
/* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */
LLVMValueRef tok = LLVMBuildBitCast(builder, amalloc, llvm_parsedtokenptr, "tok");
/*
* tok->token_type = TT_UINT;
*
* %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0
*
* TODO if we handle TT_SINT too, adjust here and the zero-ext below
*/
LLVMValueRef toktype = LLVMBuildStructGEP(builder, tok, 0, "token_type");
/* store i32 8, i32* %token_type */
LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 8, 0), toktype);
/*
* tok->uint = r;
*
* %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1
*/
LLVMValueRef tokdata = LLVMBuildStructGEP(builder, tok, 1, "token_data");
/*
* TODO
*
* This is where we'll need to adjust to handle other types (sign vs. zero extend, omit extend if
* r is 64-bit already
*/
LLVMBuildStore(builder, LLVMBuildZExt(builder, r, LLVMInt64Type(), "r"), tokdata);
/*
* Store the index from the stream into the token
*/
/* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */
LLVMValueRef tokindex = LLVMBuildStructGEP(builder, tok, 2, "t_index");
/* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */
LLVMValueRef streamindex = LLVMBuildStructGEP(builder, stream, 2, "s_index");
/* %4 = load i64, i64* %s_index */
/* store i64 %4, i64* %t_index */
LLVMBuildStore(builder, LLVMBuildLoad(builder, streamindex, ""), tokindex);
/* Store the bit length into the token */
LLVMValueRef tokbitlen = LLVMBuildStructGEP(builder, tok, 3, "bit_length");
/* TODO handle multiple bit lengths */
LLVMBuildStore(builder, LLVMConstInt(LLVMInt64Type(), 8, 0), tokbitlen);
/*
* Now call make_result()
*
* %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3)
*/
LLVMValueRef result_args[] = { arena, tok };
LLVMValueRef mr = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "make_result"),
result_args, 2, "make_result");
*mr_out = mr;
}
HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
const HLLVMParser *llvm_parser = parser->backend_data;
HArena *arena = h_new_arena(mm__, 0);
......
......@@ -9,4 +9,11 @@
LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr;
LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr;
void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
LLVMValueRef r, HCharset cs,
LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder,
LLVMValueRef stream, LLVMValueRef arena,
LLVMValueRef r, LLVMValueRef *mr_out);
#endif // #ifndef HAMMER_LLVM__H
......@@ -85,42 +85,9 @@ static bool ch_llvm(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod
// Basic block: success
LLVMPositionBuilderAtEnd(builder, success);
// Set up call to h_arena_malloc() for a new HParsedToken
LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
LLVMValueRef amalloc_args[] = { arena, tok_size };
// %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48)
LLVMValueRef amalloc = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_arena_malloc"), amalloc_args, 2, "h_arena_malloc");
// %3 = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2*
LLVMValueRef tok = LLVMBuildBitCast(builder, amalloc, llvm_parsedtokenptr, "");
// tok->token_type = TT_UINT;
//
// %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0
LLVMValueRef toktype = LLVMBuildStructGEP(builder, tok, 0, "token_type");
// store i32 8, i32* %token_type
LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 8, 0), toktype);
// tok->uint = r;
//
// %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1
LLVMValueRef tokdata = LLVMBuildStructGEP(builder, tok, 1, "token_data");
// %r = zext i8 %2 to i64
// store i64 %r, i64* %token_data
LLVMBuildStore(builder, LLVMBuildZExt(builder, r, LLVMInt64Type(), "r"), tokdata);
// %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2
LLVMValueRef tokindex = LLVMBuildStructGEP(builder, tok, 2, "t_index");
// %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2
LLVMValueRef streamindex = LLVMBuildStructGEP(builder, stream, 2, "s_index");
// %4 = load i64, i64* %s_index
// store i64 %4, i64* %t_index
LLVMBuildStore(builder, LLVMBuildLoad(builder, streamindex, ""), tokindex);
LLVMValueRef tokbitlen = LLVMBuildStructGEP(builder, tok, 3, "bit_length");
LLVMBuildStore(builder, LLVMConstInt(LLVMInt64Type(), 8, 0), tokbitlen);
// Now call make_result()
// %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3)
LLVMValueRef result_args[] = { arena, tok };
LLVMValueRef mr = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "make_result"), result_args, 2, "make_result");
/* Make a token */
LLVMValueRef mr;
h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
// br label %ch_end
LLVMBuildBr(builder, end);
......
#include <assert.h>
#include <string.h>
#include "../internal.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include "parser_internal.h"
#include "../llvm.h"
static HParseResult* parse_charset(void *env, HParseState *state) {
uint8_t in = h_read_bits(&state->input_stream, 8, false);
......@@ -70,12 +75,82 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
return true;
}
static bool cs_llvm(LLVMBuilderRef builder, LLVMValueRef func,
LLVMModuleRef mod, void* env) {
/*
* LLVM to build a function to parse a charset; the args are a stream and an
* arena.
*/
LLVMValueRef stream = LLVMGetFirstParam(func);
stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
LLVMValueRef arena = LLVMGetLastParam(func);
/* Set up our basic blocks */
LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "cs_entry");
LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "cs_success");
LLVMBasicBlockRef fail = LLVMAppendBasicBlock(func, "cs_fail");
LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "cs_end");
/* Basic block: entry */
LLVMPositionBuilderAtEnd(builder, entry);
/* First we read the char */
LLVMValueRef bits_args[3];
bits_args[0] = stream;
bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0);
bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0);
LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits");
LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm())
/* We have a char, need to check if it's in the charset */
HCharset cs = (HCharset)env;
/* Branch to either success or end, conditional on whether r is in cs */
h_llvm_make_charset_membership_test(mod, func, builder, r, cs, success, fail);
/* Basic block: success */
LLVMPositionBuilderAtEnd(builder, success);
LLVMValueRef mr;
h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
/* br label %ch_end */
LLVMBuildBr(builder, end);
/* Basic block: fail */
LLVMPositionBuilderAtEnd(builder, fail);
/*
* We just branch straight to end; this exists so that the phi node in
* end knows where all the incoming edges are from, rather than needing
* some basic block constructed in h_llvm_make_charset_membership_test()
*/
LLVMBuildBr(builder, end);
/* Basic block: end */
LLVMPositionBuilderAtEnd(builder, end);
// %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ]
LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv");
LLVMBasicBlockRef rv_phi_incoming_blocks[] = {
success,
fail
};
LLVMValueRef rv_phi_incoming_values[] = {
mr,
LLVMConstNull(llvm_parseresultptr)
};
LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2);
// ret %struct.HParseResult_.3* %rv
LLVMBuildRet(builder, rv);
return true;
}
static const HParserVtable charset_vt = {
.parse = parse_charset,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_charset,
.compile_to_rvm = cs_ctrvm,
.llvm = cs_llvm,
.higher = false,
};
......
......@@ -963,4 +963,5 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position);
g_test_add_data_func("/core/parser/llvm/ch", GINT_TO_POINTER(PB_LLVM), test_ch);
g_test_add_data_func("/core/parser/llvm/ch_range", GINT_TO_POINTER(PB_LLVM), test_ch_range);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment