'use strict'; // ------------------------------------------------------------------ // Definitions // ------------------------------------------------------------------ // Use symbols for the names of the instructions // Unsure if this helps in any way over strings, but I feel it neater // +++++ → {type: add, value: 5} // Can have offset property const add = Symbol('add'); // > → {type: moveHead, value: 1} const moveHead = Symbol('moveHead'); // . → {type: writeByte} // Can have offset property const writeByte = Symbol('writeByte'); // , → {type: readByte} // Can have offset property const readByte = Symbol('readByte'); // [-] → {type: loop, contents: [{type: add, value: -1}]} // Can have isBalanced property const loop = Symbol('loop'); // [-] → {type: clear} // Can have offset property const clear = Symbol('clear'); // [>+>++<<-] → {type: move, changes: Map { 0 → -1, 1 → 1, 2 → 2 }} const multiply = Symbol('multiply'); // {type: jumpIfZero, target: 5} const jumpIfZero = Symbol('jumpIfZero'); // {type: jumpIfNonZero, target: 2} const jumpIfNonZero = Symbol('jumpIfNonZero'); // : → {type: writeInt} // Can have offset property const writeInt = Symbol('writeInt'); // ; → {type: readInt} // Can have offset property const readInt = Symbol('readInt'); // # → {type: breakPoint} const breakPoint = Symbol('breakPoint'); class ParsingError extends Error {} class UnknownIRError extends Error {} // ------------------------------------------------------------------ // Parsing // ------------------------------------------------------------------ // (string, bool) → [commandObjects] // enableExtensions contols whether commands :;# are recognized // May throw ParsingError function parse(program, enableExtensions = true) { // (string, int, bool) → {parsed: [commandObjects], lastIndex: int} // index is the index of the next character to consume // inLoop tells whether we're parsing a loop or the top level // lastIndex is the last index the function consumed function constructTree(program, index = 0, inLoop = false) { let commands = []; // Move this out of the loop body since we need to return // the index of the last character we parsed let i = index; for(;;) { if(i >= program.length) { // If we're parsing a loop, we have a // missing ] // If we're parsing the top level, this is // where we should exit if(inLoop) { throw new ParsingError('Missing ]'); } else { break; } i++; } else if(program[i] == ']') { // If we're parsing a loop, this is where we // should exit // If we're parsing the top level, we have a // missing [ if(inLoop) { break; } else { throw new ParsingError('Missing ['); } i++; } else if(program[i] == '+' || program[i] == '-') { // Fold a run of +s and -s into one node let value = 0; for(; i < program.length; i++) { if(program[i] == '+') { value++; } else if(program[i] == '-') { value--; } else { // Reached end of the run break; } } // Only add the command is value is not 0 if(value != 0) { commands.push({type: add, value: value}); } // i is not incremented, since it already // points to a location containig a char we // have not yet handled } else if(program[i] == '<' || program[i] == '>') { // Fold a run of s into one node let value = 0; for(; i < program.length; i++) { if(program[i] == '>') { value++; } else if(program[i] == '<') { value--; } else { // Reached end of the run break; } } // Only add the command is value is not 0 if(value != 0) { commands.push({type: moveHead, value: value}); } // see +/- for why we don't increment i } else if(program[i] == '.') { commands.push({type: writeByte}); i++; } else if(program[i] == ',') { commands.push({type: readByte}); i++; } else if(program[i] == '[') { // Parse a loop. This is done by calling the // same parser function recursively // Due to this the loop appears as one node // in the parsed result let {parsed, lastIndex} = constructTree( program, // Same program data i + 1, // Start at the next char true // We're parsing a loop ); commands.push({type: loop, contents: parsed}); // Since lastIndex was consumed by the inner // function, we don't want to consume it a // second time i = lastIndex + 1; } else if(program[i] == ':' && enableExtensions) { commands.push({type: writeInt}); i++; } else if(program[i] == ';' && enableExtensions) { commands.push({type: readInt}); i++; } else if(program[i] == '#' && enableExtensions) { commands.push({type: breakPoint}); i++; } else { // All others characters are comments, // ignore them i++; } } return {parsed: commands, lastIndex: i}; } // We only care about the parsed contents, since under normal // operarion we only get out of the loop if we've reached the end // of the program let {parsed} = constructTree(program); return parsed; } // ([commandObjects/offsetCommandObjects/flatCommandObjects]) → str function prettifyIR(ir) { // ([commandObjects/offsetCommandObjectsflatCommandObjects], string) → str function worker(ir, indent = '') { let lines = []; for(let i = 0; i < ir.length; i++) { let command = ir[i]; let line = `${indent}${i} `; if(command.type == add) { line += `add ${command.value}`; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == moveHead) { line += `moveHead ${command.value}`; lines.push(line); } else if(command.type == writeByte) { line += 'writeByte'; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == readByte) { line += 'readByte'; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == loop) { line += 'loop'; if('isBalanced' in command) { line += ` (balanced: ${command.isBalanced})`; } lines.push(line); lines = lines.concat(worker(command.contents, indent + ' ')); } else if(command.type == clear) { line += 'clear'; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == multiply) { let changes = []; for(let [offset, value] of command.changes.entries()) { changes.push(`${offset}: ${value}`); } line += `multiply ${changes.join(', ')}`; lines.push(line); } else if(command.type == jumpIfZero) { line += `jumpIfZero ${command.target}`; lines.push(line); } else if(command.type == jumpIfNonZero) { line += `jumpIfNonZero ${command.target}`; lines.push(line); } else if(command.type == writeInt) { line += 'writeInt'; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == readInt) { line += 'readInt'; if('offset' in command) { line += ` (${command.offset})`; } lines.push(line); } else if(command.type == breakPoint) { line += 'breakPoint'; lines.push(line); } else { line += `unknown ${command.type.toString()}`; lines.push(line); } } return lines } return worker(ir).join('\n'); } // ------------------------------------------------------------------ // Optimization passes // ------------------------------------------------------------------ // ([commandObjects]) → [commandObjects] function joinAdjacentOps(parsed) { // ([commandObjects], commandType) → [commandObjects] function worker(parsed, type) { let optimized = []; let prevOfType = false, value = 0; for(let command of parsed) { if(prevOfType && command.type == type) { // Update value, don't add to optimized yet value += command.value; } else if(!prevOfType && command.type == type) { // Start of a possible run of commands prevOfType = true; value = command.value; } else { // If a run has ended, add it to optimized // However, skip it if value is 0 if(prevOfType && value != 0) { optimized.push({type: type, value: value}); } // Add the command for this round if(command.type == loop) { // Recurse optimized.push({type: loop, contents: worker( command.contents, type)}); } else { optimized.push(command); } prevOfType = false; } } // Did we end with a command of given type if(prevOfType) { // Yes, add it to optimized (unless value is 0) if(value != 0) { optimized.push({type: type, value: value}); } } return optimized; } return worker(worker(parsed, moveHead), add); } // ([commandObjects]) → [commandObjects] function transformClearLoops(parsed) { let optimized = []; for(let command of parsed) { // Only match loops like [-] or [+] let isClearLoop = command.type == loop && command.contents.length == 1 && command.contents[0].type == add && (command.contents[0].value == 1 || command.contents[0].value == -1); if(isClearLoop) { optimized.push({type: clear}); } else if(command.type == loop) { // Run for inner loops optimized.push({type: loop, contents: transformClearLoops(command.contents)}); } else { optimized.push(command); } } return optimized; } // ([commandObjects]) → [offsetCommandObjects] function addOffsetProperties(parsed) { // ([commandObjects]) → {offsetted: [offsetCommandObjects], isBalanced: bool} function worker(parsed) { let offsetted = []; let isBalanced = true; let headChange = 0; let offset = 0; for(let command of parsed) { if(command.type == add) { offsetted.push({type: add, value: command.value, offset: offset}); } else if(command.type == moveHead) { offset += command.value; } else if(command.type == writeByte) { offsetted.push({type: writeByte, offset: offset}); } else if(command.type == readByte) { offsetted.push({type: readByte, offset: offset}); } else if(command.type == clear) { offsetted.push({type: clear, offset: offset}); } else if(command.type == loop) { // A loop should be self-contained // If offset is not 0, add a moveHead if(offset != 0) { offsetted.push({type: moveHead, value: offset}); // Mark we've moved the head headChange += offset; } offset = 0; // Run optimization on the loop let result = worker(command.contents); // We're only balanced if our loops are isBalanced = isBalanced && result.isBalanced; offsetted.push({type: loop, contents: result.offsetted, isBalanced: result.isBalanced}); // headChange's value becomes invalid if the // loop is not balanced. However, we only // care about its value when figuring out // our isBalanced, which will be forced to // false if any inner loop is not balanced } else if(command.type == writeInt) { offsetted.push({type: writeInt, offset: offset}); } else if(command.type == readInt) { offsetted.push({type: readInt, offset: offset}); } else if(command.type == breakPoint) { // A breakpoint should have the tape head // where it would be without the offsets // If offset is not 0, add a moveHead if(offset != 0) { offsetted.push({type: moveHead, value: offset}); // Mark we've moved the head headChange += offset; } offset = 0; // Add the breakpoint offsetted.push({type: breakPoint}); } else { throw new UnknownIRError( `Unknown command ${command.type.toString()}`); } } // We need to move the tape head in the end anyways, so // generate moveHead is offseet is not 0 if(offset != 0) { offsetted.push({type: moveHead, value: offset}); } // We're only balanced if relative to start of the loop ends // up as 0 isBalanced = isBalanced && offset + headChange == 0; return {offsetted, isBalanced}; } return worker(parsed).offsetted; } // ([offsetCommandObjects]) → [offsetCommandObjects] function transformMultiplyLoops(offsetted) { let optimized = []; for(let command of offsetted) { let isMultiplyLoop = false; // Not necessarily a multiply loop, since multiply loops, // in addition to being balanced loops with only adds, // also decrement / increment the cell under tape head // by one. However, these are loops we can run through the // next processing step, and we can drop unfit ones after // that let maybeMultiplyLoop = command.type == loop && command.isBalanced && command.contents.every(x => x.type == add); // TODO: Change this to use a Proxy thingie let changes = new Map(); if(maybeMultiplyLoop) { for(let addition of command.contents) { // We already know all of these are adds if(!changes.has(addition.offset)) { changes.set(addition.offset, 0); } let current = changes.get(addition.offset); changes.set(addition.offset, current + addition.value); } // Did we actually have a multiply loop? isMultiplyLoop = changes.has(0) && (changes.get(0) == 1 || changes.get(0) == -1); } if(isMultiplyLoop) { // If changes[0] is 1, we are dealing with // a loop of the type [>-<+], which is // (except for run time) same as [>+<-]. // Transform former into latter if(changes.get(0) == 1) { for(let [offset, value] of changes.entries()) { changes.set(offset, -value); } } optimized.push({type: multiply, changes: changes}); } else if(command.type == loop) { // Recurse optimized.push({type: loop, contents: transformMultiplyLoops(command.contents), balanced: command.balanced}); } else { // Pass through optimized.push(command); } } return optimized; } // ([offsetCommandObjects]) → [flatCommandObjects] function flattenLoops(offsetted) { // ([offsetCommandObjects], int) → [flatCommandObjects] // prevLength tells length of the flattened program up until now function worker(offsetted, prevLength = 0) { let flattened = []; for(let command of offsetted) { if(command.type == loop) { // flattened.length is the index of the next // command in out flattened // flattened.length + prevLength is the // index of it in the resulting combined // flattened // Since this should be the index of the // start of the loop body we want to point // after the next command, which is going // to be the jump let startIndex = flattened.length + prevLength + 1; let loopBody = worker(command.contents, startIndex // length = index of next ); // startIndex + loopBody.length is the index // of the next command after the loop body // The command after it is going to be the // jump back to the start of the body, so we // want to point to the command after it let endIndex = startIndex + loopBody.length + 1; // Add the first jump flattened.push({type: jumpIfZero, target: endIndex}); // Add the loop body flattened = flattened.concat(loopBody); // Add the second loop flattened.push({type: jumpIfNonZero, target: startIndex}); } else { flattened.push(command); } } return flattened; } return worker(offsetted); } // ([commandObjects]) → [flatCommandObjects] function optimize(parsed) { const optimizations = [ joinAdjacentOps, transformClearLoops, addOffsetProperties, transformMultiplyLoops, flattenLoops ] return optimizations.reduce((IR, optimization) => optimization(IR), parsed); } // ------------------------------------------------------------------ // Virtual machine // ------------------------------------------------------------------ // ([flatCommandObject], [int], int/null) → girVMState // onEof tells what to set the cell to in case of EOF, or if it null to keep // the same value function newVM(program, input, onEof = 0) { return { // Initial state for the machine program: program, ip: 0, memory: new Map(), tapeHead: 0, input: input, output: [], // Configuration onEof: onEof }; } // (girVMState, int/null) → {state: girVMState, complete: bool, cycles: int, // intParseFailed: bool, breakPointReached: bool, // lastIndex: int/null} // complete is set to true is the program completed its execution // cycles is the number of cycles the VM ran // intParseFailed is true is parseInt failed to read a valid number // breakPointReached is true if a breakPoint command was executed // lastIndex tells the last memory index accessed by the VM // If maxCycles is null, the program runs until completion function runVM(state, maxCycles = null) { // (int) → int function readMemory(index) { // Return 0 if index doesn't exist if(memory.has(index)) { return memory.get(index); } else { return 0; } } let program = state.program; let ip = state.ip; // Create a copy of the memory, since we're going to modify it // TODO: Make memory into a Proxied thing that returns 0 if it // doesn't have the requested cell let memory = new Map(state.memory.entries()); let tapeHead = state.tapeHead; // Create copies of input and output, since we might modify them let input = state.input.slice(); let output = state.output.slice(); // Flags we want to return let complete = false; let intParseFailed = false; let breakPointReached = false; // Debug features let lastIndex = null; let cycle = 0; for(; maxCycles === null || cycle < maxCycles; cycle++) { // Exit the loop if we run to the end of the program if(ip >= program.length) { // Program completed complete = true; break; } let command = program[ip]; // Calculate the index into the array of the cell we're // accessing let index = tapeHead; switch(command.type) { case add: case writeByte: case readByte: case clear: case writeInt: case readInt: // These have an offset property, add it index += command.offset; } lastIndex = index; // Run the command switch(command.type) { case add: let old = readMemory(index); memory.set(index, (command.value + old) & 0xFF); ip++; break; case moveHead: tapeHead += command.value; // Set lastIndex to the new tape head // position. Technically we do not access // the cell, but otherwise it will point // to the cell we were in previously, so // this allows better debugging lastIndex = tapeHead; ip++; break; case writeByte: output.push(readMemory(index)); ip++; break; case readByte: // Have we reached EOF? if(input.length == 0) { // Yes // If state.onEof is null, don't // change the value // If it's something else, set the // cell to that if(state.onEof !== null) { memory.set(index, state.onEof & 0xff); } } else { // No, return character memory.set(index, input.shift()); } ip++; break; case clear: memory.set(index, 0); ip++; break; case multiply: if(command.changes.get(0) != -1) { throw new UnknownIRError( `multiply where change for 0 is ${command.changes.get(0)}`); } let multiplier = readMemory(index); for(let [offset, change] of command.changes.entries()) { let index = tapeHead + offset; let old = readMemory(index); let value = old + multiplier * change; memory.set(index, value & 0xff); } ip++; break; case jumpIfZero: if(readMemory(index) == 0) { ip = command.target; } else { ip++; } break; case jumpIfNonZero: if(readMemory(index) != 0) { ip = command.target; } else { ip++; } break; case writeInt: let outputStr = readMemory(index).toString(); output.push(...encodeUTF8(outputStr)) ip++; break; case readInt: // Skip any spaces in front while(input.length > 0 && input[0] == 0x20) { input.shift(); } let number = 0; // Read digits let consumedInput = false; while(input.length > 0 && input[0] >= 0x30 && input[0] <= 0x39) { let digit = input.shift() - 0x30; number = number * 10 + digit; consumedInput = true; } // Did we read anything if(consumedInput) { // Yes, clamp value to [0, 255] and // set cell to it number = Math.max( Math.min(number, 255), 0); memory.set(index, number); } else if(input.length == 0) { // No, but there was an EOF // If state.onEof is null, don't // change the value // If it's something else, set the // cell to that if(state.onEof !== null) { memory.set(index, state.onEof & 0xff); } } else { // No, and there wasn't an EOF, so // signal an error intParseFailed = true; } ip++; break; case breakPoint: breakPointReached = true; ip++; break; default: // Unknown command type throw new UnknownIRError( `Unknown command ${command.type.toString()}`); } // Since can't use 'break' from a switch(), do it here if(intParseFailed || breakPointReached) { break; } } let newState = { program, ip, memory, tapeHead, input, output }; return {state: newState, complete: complete, cycles: cycle, intParseFailed: intParseFailed, breakPointReached: breakPointReached, lastIndex: lastIndex}; } // ------------------------------------------------------------------ // UTF-8 // ------------------------------------------------------------------ // string → [int] function encodeUTF8(string) { let encoded = []; for(let character of string) { let codepoint = character.codePointAt(0); if(codepoint < 0x80) { // 0xxxxxxx encoded.push(codepoint); } else if(codepoint < 0x0800) { // 110xxxxx 10xxxxxx let b1 = codepoint >> 6 | 0b11000000; let b2 = codepoint & 0b00111111 | 0b10000000; encoded.push(b1); encoded.push(b2); } else if(codepoint < 0x10000) { // 1110xxxx 10xxxxxx 10xxxxxx let b1 = codepoint >> 12 | 0b11100000; let b2 = codepoint >> 6 & 0b00111111 | 0b10000000; let b3 = codepoint & 0b00111111 | 0b10000000; encoded.push(b1); encoded.push(b2); encoded.push(b3); } else { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx let b1 = codepoint >> 18 | 0b11110000; let b2 = codepoint >> 12 & 0b00111111 | 0b10000000; let b3 = codepoint >> 6 & 0b00111111 | 0b10000000; let b4 = codepoint & 0b00111111 | 0b10000000; encoded.push(b1); encoded.push(b2); encoded.push(b3); encoded.push(b4); } } return encoded; } // [int] → string function decodeUTF8(encoded) { let codePoints = []; for(let i = 0; i < encoded.length;) { let codePoint = 0; let firstByte = encoded[i]; i++; let toRead = null; // Determine number of continuation bytes to read and // decode the first byte into codePoint // Since we'll do the shifts later, we just mask here if(firstByte >> 7 == 0) { // 0xxxxxxx toRead = 0; codePoint = firstByte; } else if(firstByte >> 5 == 0b110) { // 110xxxxx 10xxxxxx toRead = 1; codePoint = firstByte & 0b00011111; } else if(firstByte >> 4 == 0b1110) { // 1110xxxx 10xxxxxx 10xxxxxx toRead = 2; codePoint = firstByte & 0b00001111; } else if(firstByte >> 3 == 0b11110) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx toRead = 3; codePoint = firstByte & 0b00000111; } else { // Illegal sequence, push replacement char codePoints.push(0xFFFD); continue; } for(; toRead > 0 && i < encoded.length; toRead--) { let continuationByte = encoded[i]; i++; // Check that we have a valid continuation byte if(continuationByte >> 6 == 0b10) { // We do, add its contents to codePoint codePoint = codePoint << 6 | continuationByte & 0b00111111; } else { // We don't, break out of the loop break; } } // Did we read all required continuation bytes? if(toRead == 0) { // We did, add the codepoint to the array codePoints.push(codePoint); } else { // We didn't, push replacement char codePoints.push(0xFFFD); } } // Convert to a string let decoded = codePoints.map(x => String.fromCodePoint(x)).join(''); return decoded; } // ------------------------------------------------------------------ // User-facing functions // ------------------------------------------------------------------ // (string, bool) → [flatCommandObjects] function compile(program, enableExtensions = true) { return optimize(parse(program, enableExtensions)); } exports.compile = compile; exports.prettifyIR = prettifyIR; exports.newVM = newVM; exports.runVM = runVM; exports.encodeUTF8 = encodeUTF8; exports.decodeUTF8 = decodeUTF8;