Skip to content
Snippets Groups Projects
Commit efedf64d authored by andrew_miranti's avatar andrew_miranti
Browse files

Upgrades to inum-opcode generation for ease of integration with the semantics

parent d64128da
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/python3
import fileinput
rewrites = {
"al": "%al",
"ax": "%ax",
"cl": "%cl",
"eax": "%eax",
"imm16": "$0x7FFF",
"imm32": "$0x7FFFFFFF",
"imm64": "$0x7FFFFFFFFFFFFFFF",
"imm8" : "$0x7F",
"m128": "(%rbx)",
"m16": "(%rbx)",
"m256": "(%rbx)",
"m32": "(%rbx)",
"m64": "(%rbx)",
"m8": "(%rbx)",
"one": "$1",
"r16": "%bx",
"r32": "%ebx",
"r64": "%rbx",
"r8": "%bl",
"rax": "%rax",
"rh": "%bh",
"xmm": "%xmm1",
"xmm0": "%xmm0",
"ymm": "%ymm1"
}
imms = ["imm64", "imm32", "imm16", "imm8"]
for line in fileinput.input():
parts = line.split()
opcodes = [elem for elem in parts if elem not in rewrites]
operands = [rewrites[elem] for elem in parts if elem in rewrites]
operands.reverse()
for i in range(len(imms)):
imm_index_in_operands = -1
imm_index_in_imms = -1
for j in range(len(operands)):
if operands[j] in imms:
imm_index_in_operands = j
imm_index_in_imms = imms.index(operands[j])
break
if imm_index_in_operands == -1:
print(" ".join(opcodes) + " " + ", ".join(operands))
else:
for i in range(imm_index_in_imms, len(imms)):
operands[imm_index_in_operands] = imms[i]
print(" ".join(opcodes) + " " + ", ".join(operands))
...@@ -4,12 +4,12 @@ set -e ...@@ -4,12 +4,12 @@ set -e
CUR_DIR="$(pwd)" CUR_DIR="$(pwd)"
X86_SEMANTICS="$1" SCRIPTS="$X86_DECODER/generator"
SCRIPTS="$CUR_DIR/$(dirname $0)" DECODER="$X86_DECODER"
DECODER="$SCRIPTS/../"
INSTRUCTIONS_FILE="$SCRIPTS/datafiles/instructions.s" INSTRUCTIONS_FILE="$SCRIPTS/datafiles/instructions.s"
INUMS_FILE="$SCRIPTS/datafiles/inums.txt" INUMS_FILE="$SCRIPTS/datafiles/inums.txt"
INSTRUCTION_TO_INUM_MAP_FILE="$SCRIPTS/datafiles/full-map.txt"
SAMPLES_FILE=$(mktemp) SAMPLES_FILE=$(mktemp)
BINARY_FILE=$(mktemp) BINARY_FILE=$(mktemp)
...@@ -19,30 +19,33 @@ cat <(find /home/andrewmiranti/Github/X86-64-semantics/semantics/systemInstructi ...@@ -19,30 +19,33 @@ cat <(find /home/andrewmiranti/Github/X86-64-semantics/semantics/systemInstructi
<(find /home/andrewmiranti/Github/X86-64-semantics/semantics/memoryInstructions/ -name "*.k") \ <(find /home/andrewmiranti/Github/X86-64-semantics/semantics/memoryInstructions/ -name "*.k") \
<(find /home/andrewmiranti/Github/X86-64-semantics/semantics/registerInstructions/ -name "*.k") \ <(find /home/andrewmiranti/Github/X86-64-semantics/semantics/registerInstructions/ -name "*.k") \
<(find /home/andrewmiranti/Github/X86-64-semantics/semantics/immediateInstructions/ -name "*.k") | \ <(find /home/andrewmiranti/Github/X86-64-semantics/semantics/immediateInstructions/ -name "*.k") | \
sed -E "s/^.*Instructions\/(.*)\.k$/\1/;/label/d;/opcodes/d" | \ "$SCRIPTS/rewrite-ops.py" > $INSTRUCTIONS_FILE
sort | \
uniq > $INSTRUCTIONS_FILE
sed 's/_/ /g' $INSTRUCTIONS_FILE | "$SCRIPTS/rewrite-ops.py" > $SAMPLES_FILE cut -d'|' -f2 $INSTRUCTIONS_FILE > $SAMPLES_FILE
as -W -o $BINARY_FILE $SAMPLES_FILE as -W -o $BINARY_FILE $SAMPLES_FILE
objdump --insn-width 15 -d $BINARY_FILE | tail -n +8 | grep -Po '\s*[A-Fa-f0-9]+:\s*([A-Fa-f0-9]{2} )*' | sed -E 's/\s//g;s/(\w+):(\w+)/Disassemble(0x\1,0x\2);/' > $K_INPUT_FILE objdump --insn-width 15 -d $BINARY_FILE | tail -n +8 | grep -Po '\s*[A-Fa-f0-9]+:\s*([A-Fa-f0-9]{2} )*' | sed -E 's/\s//g;s/(\w+):(\w+)/Disassemble(0x\1,0x\2);/' > $K_INPUT_FILE
cd $SCRIPTS cp "$DECODER/config/extractor-true.k" "$DECODER/inum-extractor-configuration.k"
./generate.py > $DECODER/generated_decoder_rules.k cp "$DECODER/config/test-configuration.k" "$DECODER/full-configuration.k"
$SCRIPTS/generate.py > $DECODER/generated_decoder_rules.k
cp "$DECODER/config/extractor-true.k" "$DECODER/inum-extractor-configuration.k" cp "$DECODER/config/extractor-true.k" "$DECODER/inum-extractor-configuration.k"
cp "$DECODER/config/inum-to-opcode.k" "$DECODER/inum-to-opcode.k" cp "$DECODER/config/inum-to-opcode.k" "$DECODER/inum-to-opcode.k"
cd $DECODER cd $DECODER
kompile test-decoder.k --debug -v --backend ocaml -I "$X86_SEMANTICS/semantics/" kompile test-decoder.k --debug -v --backend ocaml -I "$X86_SEMANTICS/semantics/" -I "$X86_DECODER"
echo Running
cd $DECODER
pwd
./run-on-large-program.sh $K_INPUT_FILE > $INUMS_FILE ./run-on-large-program.sh $K_INPUT_FILE > $INUMS_FILE
paste -d, <$INUMS_FILE <(grep -Eo '^(rep\w* )?\w+' $SAMPLES_FILE) |\ paste -d'|' $INSTRUCTIONS_FILE $INUMS_FILE > $INSTRUCTION_TO_INUM_MAP_FILE
paste -d, <(cat $INUMS_FILE) <(grep -Eo '^(rep\w* )?\w+' $SAMPLES_FILE) |\
sort -n -k 1 -t, |\ sort -n -k 1 -t, |\
uniq |\ uniq |\
uniq --group -w4 > "$SCRIPTS/datafiles/inum-opcode-map-groups.csv" uniq --group -w4 > "$SCRIPTS/datafiles/inum-opcode-map-groups.csv"
cd $SCRIPTS cd $SCRIPTS
./generate-opcode-decoder.py > "$DECODER/inum-to-opcode.k" ./generate-opcode-decoder.py > "$DECODER/inum-to-opcode.k"
cd $DECODER cp "$DECODER/config/extractor-false.k" "$DECODER/inum-extractor-configuration.k"
cp "./config/extractor-false.k" "./inum-extractor-configuration.k"
rm "$SAMPLES_FILE" "$BINARY_FILE" "$K_INPUT_FILE" rm "$SAMPLES_FILE" "$BINARY_FILE" "$K_INPUT_FILE"
......
...@@ -174,7 +174,7 @@ def generate_decoder(desired, f): ...@@ -174,7 +174,7 @@ def generate_decoder(desired, f):
desired = set([name.upper() for name in desired]) desired = set([name.upper() for name in desired])
for inst in instructions.instruction_list: for inst in instructions.instruction_list:
if len(desired) == 0 or inst.iclass.upper() in desired: if len(desired) == 0 or inst.iclass.upper() in desired or inst.disasm.upper() in desired or inst.disasm_intel.upper() in desired:
used_inums.add(inst.id) used_inums.add(inst.id)
print("// UNAME: " + inst.uname, file=f) print("// UNAME: " + inst.uname, file=f)
print(gen_from_inst(inst).generate_rule().replace("XED_", ""), file=f) print(gen_from_inst(inst).generate_rule().replace("XED_", ""), file=f)
......
...@@ -189,6 +189,7 @@ class Instruction(nonterminals.NonterminalAlternative): ...@@ -189,6 +189,7 @@ class Instruction(nonterminals.NonterminalAlternative):
self.pattern = self.antecedent self.pattern = self.antecedent
self.props = props self.props = props
self.disasm = props["DISASM"] if len(props["DISASM"]) > 0 else props["DISASM_ATTSV"] self.disasm = props["DISASM"] if len(props["DISASM"]) > 0 else props["DISASM_ATTSV"]
self.disasm_intel = props["DISASM"] if len(props["DISASM"]) > 0 else props["DISASM_INTEL"]
self.iclass = props["ICLASS"].strip() self.iclass = props["ICLASS"].strip()
self.iform = props["IFORM"].strip() self.iform = props["IFORM"].strip()
self.category = props["CATEGORY"].strip() self.category = props["CATEGORY"].strip()
...@@ -205,7 +206,7 @@ class Instruction(nonterminals.NonterminalAlternative): ...@@ -205,7 +206,7 @@ class Instruction(nonterminals.NonterminalAlternative):
def __copy__(self): def __copy__(self):
return Instruction(self.props) return Instruction(self.props)
instruction_properties = ["ATTRIBUTES", "CATEGORY", "DISASM", "DISASM_ATTSV", "ICLASS", "UNAME", "VERSION"] # There are more, but these are the ones we care about. instruction_properties = ["ATTRIBUTES", "CATEGORY", "DISASM", "DISASM_ATTSV", "DISASM_INTEL", "ICLASS", "UNAME", "VERSION"] # There are more, but these are the ones we care about.
repeatable_properties = ["PATTERN", "OPERANDS", "IFORM"] # Some instructions have variants that share the above patterns. repeatable_properties = ["PATTERN", "OPERANDS", "IFORM"] # Some instructions have variants that share the above patterns.
prop = re.compile("^(\\w+)\\s*:([^#]*)(#.*)?$") prop = re.compile("^(\\w+)\\s*:([^#]*)(#.*)?$")
def parse_instruction(lines): def parse_instruction(lines):
......
#!/usr/bin/python3 #!/usr/bin/python3
from itertools import product
import fileinput import fileinput
import os
rewrites = { rewrites = {
"al": "%al", "al": "%al",
...@@ -26,13 +28,31 @@ rewrites = { ...@@ -26,13 +28,31 @@ rewrites = {
"rh": "%bh", "rh": "%bh",
"xmm": "%xmm1", "xmm": "%xmm1",
"xmm0": "%xmm0", "xmm0": "%xmm0",
"ymm": "%ymm1" "ymm": "%ymm1",
"rel8": ". + 0x7F",
"rel16": ". + 0x7FFF",
"rel32": ". + 0x7FFFFFFF",
"rel64": ". + 0x7FFFFFFFFFFFFFFF"
} }
multiples_map = {}
multiples_map[rewrites["imm64"]] = [rewrites["imm64"], rewrites["imm32"], rewrites["imm16"], rewrites["imm8"]]
multiples_map[rewrites["imm32"]] = [rewrites["imm32"], rewrites["imm16"], rewrites["imm8"]]
multiples_map[rewrites["imm16"]] = [rewrites["imm16"], rewrites["imm8"]]
multiples_map[rewrites["rel64"]] = [rewrites["rel64"], rewrites["rel32"], rewrites["rel16"], rewrites["rel8"]]
multiples_map[rewrites["rel32"]] = [rewrites["rel32"], rewrites["rel16"], rewrites["rel8"]]
multiples_map[rewrites["rel16"]] = [rewrites["rel16"], rewrites["rel8"]]
for key in rewrites:
rewrite = rewrites[key]
if rewrite not in multiples_map:
multiples_map[rewrite] = [rewrite]
for line in fileinput.input(): for line in fileinput.input():
parts = line.split() parts = os.path.basename(line[:line.find(".")]).split("_")
opcodes = [elem for elem in parts if elem not in rewrites] opcodes = [elem for elem in parts if elem not in rewrites]
operands = [rewrites[elem] for elem in parts if elem in rewrites] operand_possibilities = [multiples_map[rewrites[elem]] for elem in parts if elem in rewrites]
operands.reverse() operand_possibilities.reverse()
print(" ".join(opcodes) + " " + ", ".join(operands)) for operands in product(*operand_possibilities): # Wow, python really does have a function for everything. https://stackoverflow.com/questions/533905/get-the-cartesian-product-of-a-series-of-lists
print(line[:-1] + "|" + " ".join(opcodes) + " " + ", ".join(operands))
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment