Skip to content

Commit

Permalink
Implement inline memcpy()/memmove()/memset() lowering + special HuC62…
Browse files Browse the repository at this point in the history
…80 block move handling.
  • Loading branch information
asiekierka committed Jul 1, 2023
1 parent fcb18d4 commit 9df5c7f
Show file tree
Hide file tree
Showing 11 changed files with 405 additions and 2 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/MOS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_llvm_target(MOSCodeGen
MOSCombiner.cpp
MOSCopyOpt.cpp
MOSFrameLowering.cpp
MOSHuCBlockCopy.cpp
MOSISelLowering.cpp
MOSIndexIV.cpp
MOSInlineAsmLowering.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/MOS/MCTargetDesc/MOSMCTargetDesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ enum OperandType : unsigned {
OPERAND_IMM8 = MCOI::OPERAND_FIRST_TARGET,
OPERAND_ADDR8,
OPERAND_ADDR16,
OPERAND_IMM16
};

} // namespace MOSOp
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/MOS/MOS.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace llvm {

void initializeMOSCombinerPass(PassRegistry &);
void initializeMOSCopyOptPass(PassRegistry &);
void initializeMOSHuCBlockCopyPass(PassRegistry &);
void initializeMOSIncDecPhiPass(PassRegistry &);
void initializeMOSIndexIVPass(PassRegistry &);
void initializeMOSInsertCopiesPass(PassRegistry &);
Expand Down
297 changes: 297 additions & 0 deletions llvm/lib/Target/MOS/MOSHuCBlockCopy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
//===-- MOSHuCBlockCopy.cpp - MOS Increment Decrement PHI --------------------===//
//
// Part of LLVM-MOS, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the MOS pass to lower G_MEMCPY/G_MEMSET/G_MEMSET_INLINE
// opcodes to special HuC6280 variants prior to legalization, as information
// which is difficult to extract mid-legalization (whether operands point to
// opaque constants) is required.
//
//===----------------------------------------------------------------------===//

#include "MOSHuCBlockCopy.h"

#include "MCTargetDesc/MOSMCTargetDesc.h"
#include "MOS.h"
#include "MOSSubtarget.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <optional>

#define DEBUG_TYPE "mos-hucblockcopy"

using namespace llvm;

namespace {

class MOSHuCBlockCopy : public MachineFunctionPass {
public:
static char ID;

MOSHuCBlockCopy() : MachineFunctionPass(ID) {
llvm::initializeMOSHuCBlockCopyPass(*PassRegistry::getPassRegistry());
HuCIrqSafeBlockCopies = true;
}

bool runOnMachineFunction(MachineFunction &MF) override;

private:
// TODO: Make this configurable, ideally as an integer setting the maximum
// transfer command limit.
bool HuCIrqSafeBlockCopies;
};

} // namespace

static std::optional<MachineOperand>
getConstantOperand(MachineRegisterInfo &MRI, MachineInstr &StartMI,
int OpIdx) {
Register VReg = StartMI.getOperand(OpIdx).getReg();
MachineInstr *MI;

while ((MI = MRI.getVRegDef(VReg))) {
switch (MI->getOpcode()) {
case TargetOpcode::COPY:
VReg = MI->getOperand(1).getReg();
if (VReg.isPhysical())
return std::nullopt;
break;
case TargetOpcode::G_INTTOPTR:
VReg = MI->getOperand(1).getReg();
break;
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_GLOBAL_VALUE:
// MOSMCInstLower does not support CImmediates.
if (MI->getOperand(1).getType() == MachineOperand::MO_CImmediate) {
return MachineOperand::CreateImm(MI->getOperand(1).getCImm()
->getZExtValue());
}
return MI->getOperand(1);
default:
return std::nullopt;
}
}

return std::nullopt;
}

static std::optional<uint64_t>
getUInt64FromConstantOper(MachineOperand &Operand) {
if (Operand.isImm())
return Operand.getImm();
if (Operand.isCImm())
return Operand.getCImm()->getZExtValue();
return std::nullopt;
}

static MachineOperand offsetMachineOperand(MachineOperand Operand,
int64_t Offset) {
if (Operand.isImm())
return MachineOperand::CreateImm(Operand.getImm() + Offset);
if (Operand.isCImm())
return MachineOperand::CreateCImm(Operand.getCImm() + Offset);
if (Operand.isGlobal())
return MachineOperand::CreateGA(Operand.getGlobal(),
Operand.getOffset() + Offset);
llvm_unreachable("Unsupported machine operand type!");
}

bool MOSHuCBlockCopy::runOnMachineFunction(MachineFunction &MF) {
const MOSSubtarget &STI = MF.getSubtarget<MOSSubtarget>();
if (!STI.hasHUC6280())
return false;

MachineRegisterInfo &MRI = MF.getRegInfo();
bool Changed = false;

for (MachineBasicBlock &MBB : MF) {
for (auto I = MBB.begin(), E = MBB.end(); I != E; ++I) {
MachineInstr &MI = *I;
MachineInstr *MIPrev;

std::optional<MachineOperand> Src = std::nullopt;
Register DstReg = 0;
std::optional<MachineOperand> Dst = std::nullopt;
std::optional<MachineOperand> Len = std::nullopt;
bool IsLoadStorePair = false;
bool IsSet = false;

// Match supported combinations.
// TODO: Support memmove (with TDD opcode).
if (MI.getOpcode() == MOS::G_MEMCPY
|| MI.getOpcode() == MOS::G_MEMCPY_INLINE
|| MI.getOpcode() == MOS::G_MEMSET) {
DstReg = MI.getOperand(0).getReg();
Dst = getConstantOperand(MRI, MI, 0);
Src = getConstantOperand(MRI, MI, 1);
Len = getConstantOperand(MRI, MI, 2);
IsSet = MI.getOpcode() == MOS::G_MEMSET;
} else if (MI.getOpcode() == MOS::G_STORE) {
Register Reg = MI.getOperand(0).getReg();
// Setting Dst/Len is safe here, as Src will only be set later.
DstReg = MI.getOperand(1).getReg();
Dst = getConstantOperand(MRI, MI, 1);
Len = MachineOperand::CreateImm(MRI.getType(Reg)
.getSizeInBytes());
if (MRI.getType(Reg).isScalar()) {
// Large stores will take up less room as a store/TII pair.
Src = getConstantOperand(MRI, MI, 0);
IsSet = true;
if (!Src.has_value())
continue;
auto SrcValue = getUInt64FromConstantOper(Src.value());
if (!SrcValue.has_value())
continue;
// TODO: Support other repeating values than 0x00...
if (SrcValue.value() != 0)
continue;
} else if (MRI.getType(Reg).isPointer() && I != MBB.begin()) {
// InstCombinePass combines 16/32/64-bit memcpy() calls into
// a load/store pair; cover those.
MIPrev = I->getPrevNode();
if (MIPrev->getOpcode() == MOS::G_LOAD) {
if (MIPrev->getOperand(0).getReg() == Reg) {
if (MRI.getType(MIPrev->getOperand(1).getReg()).isPointer()) {
Src = getConstantOperand(MRI, *MIPrev, 1);
}
IsLoadStorePair = true;
}
}
}
} else {
continue;
}

if (!Src.has_value() || !Dst.has_value() || !Len.has_value())
continue;

if (IsSet) {
// A TII-based memory set is always slower than the alternative.
// Skip using it unless -Os, -Oz is set.
if (!MF.getFunction().hasOptSize())
continue;
auto SrcValue = getUInt64FromConstantOper(Src.value());
if (!SrcValue.has_value() || *SrcValue > 0xFF)
continue;
}

// On HuC platforms, block copies can be emitted, and sets can be done
// with them too. However, some requirements have to be considered:
// 1) The source, destination, and length have to be constant; however,
// they can be opaque constants (such as symbols).
// 2) A block copy instruction stalls all interrupts until it completes.
// As such, one instruction should only do some amount of transfers,
// to prevent stalling video interrupts mid-execution.
// Each transfer is 7 bytes and (17 + 6n) cycles, where n is the length
// of the transfer in bytes.
uint64_t BytesPerTransfer = HuCIrqSafeBlockCopies ? 16 : UINT16_MAX;
uint64_t SizeMin, SizeMax;
// Note that non-indexed LDA/STA memory calls are 1 cycle slower on
// HuC6280 compared to other 6502 derivatives.
if (MF.getFunction().hasMinSize()) {
// Copies:
// => inline LDA/STA: 6n bytes
// => TII: 7 bytes
// => memcpy(): ~23 bytes
// Sets:
// => inline LDA/STA: 2 + 3n bytes
// => LDA/STA/TII: 5 + 7 bytes
// => __memset(): ~21? bytes
SizeMin = IsSet ? 5 : 2;
SizeMax = IsSet ? (BytesPerTransfer * 2 + 1)
: (BytesPerTransfer * 3);
} else if (MF.getFunction().hasOptSize()) {
// Try to strike a balance.
SizeMin = IsSet ? 5 : 4;
SizeMax = IsSet ? (BytesPerTransfer * 3 + 1)
: (BytesPerTransfer * 4);
} else {
// Copies:
// => inline LDA/STA: 10n cycles
// => TII: 17 + 6n cycles
// Sets:
// => inline LDA/STA: 2 + 5n cycles
// => LDA/STA/TII: 24 + 6n cycles
SizeMin = 5;
SizeMax = BytesPerTransfer * 5;
}
uint64_t KnownLen = UINT16_MAX;

// If we require IRQ-safe chunks, the length has to be known.
auto LenValue = getUInt64FromConstantOper(Len.value());
if (LenValue.has_value()) {
KnownLen = LenValue.value();
if (KnownLen < SizeMin || KnownLen > SizeMax) {
continue;
}
} else if (BytesPerTransfer < UINT16_MAX) {
continue;
}

// Proceed with the custom lowering.
MachineIRBuilder Builder(MBB, MI);

if (IsSet) {
// Emit a G_STORE, then set Src = Dst, Dst = Dst + 1, Len = Len - 1.
auto StoreReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
Builder.buildConstant(StoreReg,
getUInt64FromConstantOper(Src.value()).value());
Builder.buildStore(StoreReg, DstReg,
*MF.getMachineMemOperand(MachinePointerInfo(),
MachineMemOperand::MOStore,
1, Align(1)));

Src = Dst;
Dst = offsetMachineOperand(Dst.value(), 1);
Len = offsetMachineOperand(Len.value(), -1);
}

if (KnownLen <= BytesPerTransfer) {
Builder.buildInstr(MOS::HuCMemcpy)
.add(Src.value()).add(Dst.value()).add(Len.value());
} else {
for (uint64_t Ofs = 0; Ofs < KnownLen; Ofs += BytesPerTransfer) {
Builder.buildInstr(MOS::HuCMemcpy)
.add(offsetMachineOperand(Src.value(), Ofs))
.add(offsetMachineOperand(Dst.value(), Ofs))
.add(MachineOperand::CreateImm(std::min(KnownLen - Ofs,
BytesPerTransfer)));
}
}

--I;
MI.eraseFromParent();
if (IsLoadStorePair) {
MIPrev->eraseFromParent();
}

Changed = true;
}
}

return Changed;
}

char MOSHuCBlockCopy::ID = 0;

INITIALIZE_PASS_BEGIN(MOSHuCBlockCopy, DEBUG_TYPE,
"Emit HuC6280 block copies", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(MOSHuCBlockCopy, DEBUG_TYPE,
"Emit HuC6280 block copies", false, false)

MachineFunctionPass *llvm::createMOSHuCBlockCopyPass() {
return new MOSHuCBlockCopy();
}
25 changes: 25 additions & 0 deletions llvm/lib/Target/MOS/MOSHuCBlockCopy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===-- MOSHuCBlockCopy.h - MOS Increment Decrement PHI ------------*- C++ -*-===//
//
// Part of LLVM-MOS, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the MOS pass to separate an increment/decrement from an
// ADC of a PHI of -1 or 1.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_MOS_MOSHUCBLOCKCOPY_H
#define LLVM_LIB_TARGET_MOS_MOSHUCBLOCKCOPY_H

#include "llvm/CodeGen/MachineFunctionPass.h"

namespace llvm {

MachineFunctionPass *createMOSHuCBlockCopyPass();

} // namespace llvm

#endif // not LLVM_LIB_TARGET_MOS_MOSHUCBLOCKCOPY_H
5 changes: 4 additions & 1 deletion llvm/lib/Target/MOS/MOSInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ def imm8at2 : imm8at<2>;
class imm16at<int offset> : Operand<i32> {
let ParserMatchClass = ImmediateAsmOperand<"Imm16">;
let EncoderMethod = "encodeImm<MOS::Imm16, " # offset # ">";
let OperandType = "OPERAND_IMM16";
let OperandNamespace = "MOSOp";
let Type = i16;
}
def imm16 : imm16at<1>;
def imm16at5 : imm16at<5>;
Expand Down Expand Up @@ -834,4 +837,4 @@ class ConditionalBranch<string opcodestr, bits<2> flagType, bits<1> value> :
let a{2-1} = flagType;
let a{0} = value;
let opcode = OpcodeABC<a, 0b100, 0b00>;
}
}
14 changes: 14 additions & 0 deletions llvm/lib/Target/MOS/MOSInstrLogical.td
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,20 @@ let Predicates = [Has65C02] in {
}
}

//===---------------------------------------------------------------------===//
// HuC Block Copy Instructions
//===---------------------------------------------------------------------===//

let Predicates = [HasHUC6280] in {
// TII abs, abs, imm16
def HuCMemcpy : MOSLogicalInstr {
dag InOperandList = (ins addr16:$source, addr16:$dest, imm16:$length);

let mayLoad = true;
let mayStore = true;
}
}

//===---------------------------------------------------------------------===//
// Addition/Subtraction Patterns
//===---------------------------------------------------------------------===//
Expand Down
Loading

0 comments on commit 9df5c7f

Please sign in to comment.