Skip to content

Commit

Permalink
[llvm-ar][libObject] Fix relative paths when nesting thin archives.
Browse files Browse the repository at this point in the history
Summary:
When adding one thin archive to another, we currently chop off the relative path to the flattened members. For instance, when adding `foo/child.a` (which contains `x.txt`) to `parent.a`, when flattening it we should add it as `foo/x.txt` (which exists) instead of `x.txt` (which does not exist).

As a note, this also undoes the `IsNew` parameter of handling relative paths in r288280. The unit test there still passes.

This was reported as part of testing the kernel build with llvm-ar: https://patchwork.kernel.org/patch/10767545/ (see the second point).

Reviewers: mstorsjo, pcc, ruiu, davide, david2050, inglorion

Reviewed By: ruiu

Subscribers: void, jdoerfert, tpimh, mgorny, hans, nickdesaulniers, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D57842

llvm-svn: 353995
  • Loading branch information
rupprecht committed Feb 13, 2019
1 parent 1113940 commit 451c2ef
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 72 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/Object/ArchiveWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ struct NewArchiveMember {
sys::TimePoint<std::chrono::seconds> ModTime;
unsigned UID = 0, GID = 0, Perms = 0644;

bool IsNew = false;
NewArchiveMember() = default;
NewArchiveMember(MemoryBufferRef BufRef);

Expand All @@ -37,6 +36,8 @@ struct NewArchiveMember {
bool Deterministic);
};

std::string computeArchiveRelativePath(StringRef From, StringRef To);

Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
Expand Down
112 changes: 48 additions & 64 deletions llvm/lib/Object/ArchiveWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
return BufOrErr.takeError();

NewArchiveMember M;
assert(M.IsNew == false);
M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
Expand Down Expand Up @@ -98,7 +97,6 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
return errorCodeToError(std::error_code(errno, std::generic_category()));

NewArchiveMember M;
M.IsNew = true;
M.Buf = std::move(*MemberBufferOrErr);
M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
Expand Down Expand Up @@ -191,35 +189,6 @@ static bool useStringTable(bool Thin, StringRef Name) {
return Thin || Name.size() >= 16 || Name.contains('/');
}

// Compute the relative path from From to To.
static std::string computeRelativePath(StringRef From, StringRef To) {
if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
return To;

StringRef DirFrom = sys::path::parent_path(From);
auto FromI = sys::path::begin(DirFrom);
auto ToI = sys::path::begin(To);
while (*FromI == *ToI) {
++FromI;
++ToI;
}

SmallString<128> Relative;
for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
sys::path::append(Relative, "..");

for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
sys::path::append(Relative, *ToI);

#ifdef _WIN32
// Replace backslashes with slashes so that the path is portable between *nix
// and Windows.
std::replace(Relative.begin(), Relative.end(), '\\', '/');
#endif

return Relative.str();
}

static bool is64BitKind(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
Expand All @@ -234,27 +203,11 @@ static bool is64BitKind(object::Archive::Kind Kind) {
llvm_unreachable("not supported for writting");
}

static void addToStringTable(raw_ostream &Out, StringRef ArcName,
const NewArchiveMember &M, bool Thin) {
StringRef ID = M.Buf->getBufferIdentifier();
if (Thin) {
if (M.IsNew)
Out << computeRelativePath(ArcName, ID);
else
Out << ID;
} else
Out << M.MemberName;
Out << "/\n";
}

static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
raw_ostream &StringTable,
StringMap<uint64_t> &MemberNames,
object::Archive::Kind Kind, bool Thin,
StringRef ArcName, const NewArchiveMember &M,
sys::TimePoint<std::chrono::seconds> ModTime,
unsigned Size) {

static void
printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind,
bool Thin, const NewArchiveMember &M,
sys::TimePoint<std::chrono::seconds> ModTime, unsigned Size) {
if (isBSDLike(Kind))
return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
Expand All @@ -265,12 +218,12 @@ static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
uint64_t NamePos;
if (Thin) {
NamePos = StringTable.tell();
addToStringTable(StringTable, ArcName, M, Thin);
StringTable << M.MemberName << "/\n";
} else {
auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
if (Insertion.second) {
Insertion.first->second = StringTable.tell();
addToStringTable(StringTable, ArcName, M, Thin);
StringTable << M.MemberName << "/\n";
}
NamePos = Insertion.first->second;
}
Expand Down Expand Up @@ -432,8 +385,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {

static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, StringRef ArcName,
bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
object::Archive::Kind Kind, bool Thin, bool Deterministic,
ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};

// This ignores the symbol table, but we only need the value mod 8 and the
Expand Down Expand Up @@ -520,8 +473,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
else
ModTime = M.ModTime;
printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
M, ModTime, Buf.getBufferSize() + MemberPadding);
printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
ModTime, Buf.getBufferSize() + MemberPadding);
Out.flush();

Expected<std::vector<unsigned>> Symbols =
Expand All @@ -540,11 +493,40 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
return Ret;
}

Error llvm::writeArchive(StringRef ArcName,
ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
namespace llvm {
// Compute the relative path from From to To.
std::string computeArchiveRelativePath(StringRef From, StringRef To) {
if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
return To;

StringRef DirFrom = sys::path::parent_path(From);
auto FromI = sys::path::begin(DirFrom);
auto ToI = sys::path::begin(To);
while (*FromI == *ToI) {
++FromI;
++ToI;
}

SmallString<128> Relative;
for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
sys::path::append(Relative, "..");

for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
sys::path::append(Relative, *ToI);

#ifdef _WIN32
// Replace backslashes with slashes so that the path is portable between *nix
// and Windows.
std::replace(Relative.begin(), Relative.end(), '\\', '/');
#endif

return Relative.str();
}

Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");

SmallString<0> SymNamesBuf;
Expand All @@ -553,7 +535,7 @@ Error llvm::writeArchive(StringRef ArcName,
raw_svector_ostream StringTable(StringTableBuf);

Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
StringTable, SymNames, Kind, Thin, Deterministic, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
Expand Down Expand Up @@ -630,3 +612,5 @@ Error llvm::writeArchive(StringRef ArcName,

return Temp->keep(ArcName);
}

} // namespace llvm
7 changes: 7 additions & 0 deletions llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
set(LLVM_LINK_COMPONENTS
BinaryFormat
Object
Option
Support
)

set(LLVM_TARGET_DEFINITIONS Options.td)
tablegen(LLVM Options.inc -gen-opt-parser-defs)
add_public_tablegen_target(LibOptionsTableGen)
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {

// Create an archive file.
std::string OutputPath = getOutputPath(&Args, Members[0]);
// llvm-lib uses relative paths for both regular and thin archives, unlike
// standard GNU ar, which only uses relative paths for thin archives and
// basenames for regular archives.

This comment has been minimized.

Copy link
@mstorsjo

mstorsjo Jan 16, 2025

Member

This is a bit of code archeology here, but, does anyone (@rupprecht, @zmodem) happen to know why it was chosen to use different logic here (to always use paths relative to the archive) for llvm-lib, while it says that this only is done for thin archives, when using llvm-ar? I tried to read the code review at https://reviews.llvm.org/D57842 but I don't see it mentioned why it was decided to go this way.

I'm running into cases where the always-relative path logic is problematic.

This comment has been minimized.

Copy link
@mstorsjo

mstorsjo Jan 16, 2025

Member

It's possible that this was only due to convenience, as we didn't have a direct variable telling us whether we're making a thin archive or not - although the info was not very far away (Args.hasArg(OPT_llvmlibthin) in method arguments a couple lines below). This was made into a bool Thin variable separately, later in 4fcbf38.

This comment has been minimized.

Copy link
@zmodem

zmodem Jan 16, 2025

Collaborator

I haven't digested the details yet, but this is a re-land of an earlier version (bf990ab) which didn't have this logic, and was reverted due to https://crbug.com/41440160 so that bug probably provides some context.

This comment has been minimized.

Copy link
@mstorsjo

mstorsjo Jan 16, 2025

Member

Thanks, that shows some clues, but that also is just concerned about the thin archive output case... Also, all of check-llvm and check-lld pass with this bit of code wrapped in if (Thin) { ... } in current git.

I guess I'll just write up a patch with that, plus some new test cases for the interactions that I'm concerned about. For most regular libraries, the individual names of members don't matter much. For thin libraries they do matter, and you need the logic here... But for Windows import libraries, where each member is named e.g. foo.dll, the names also matter, so if we e.g. merge two libraries they shouldn't end up getting members named ../../foo.dll.

for (NewArchiveMember &Member : Members)
Member.MemberName =
Saver.save(computeArchiveRelativePath(OutputPath, Member.MemberName));

if (Error E =
writeArchive(OutputPath, Members,
/*WriteSymtab=*/true, object::Archive::K_GNU,
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/tools/llvm-ar/flatten-thin-archive-directories.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This test creates a thin archive in a directory and adds it to a thin archive
# in the parent directory. The relative path should be included when flattening
# the archive.

RUN: mkdir -p %t/foo
RUN: touch %t/foo/a.txt
RUN: rm -f %t/archive.a %t/foo/archive.a

# These tests must be run in the same directory as %t/archive.a. cd %t is
# included on each line to make debugging this test case easier.
RUN: cd %t && llvm-ar rcST foo/archive.a foo/a.txt
RUN: cd %t && llvm-ar rcST archive.a foo/archive.a
RUN: cd %t && llvm-ar t archive.a | FileCheck %s --match-full-lines

CHECK: foo/a.txt
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-ar/flatten-thin-archive.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# flattened members appearing together.

RUN: touch %t-a.txt %t-b.txt %t-c.txt %t-d.txt %t-e.txt
RUN: rm -f %t-a-plus-b.a %t.a
RUN: rm -f %t-a-plus-b.a %t-d-plus-e.a %t.a
RUN: llvm-ar rcsT %t-a-plus-b.a %t-a.txt %t-b.txt
RUN: llvm-ar rcs %t-d-plus-e.a %t-d.txt %t-e.txt
RUN: llvm-ar rcsT %t.a %t-a-plus-b.a %t-c.txt %t-d-plus-e.a
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-lib/thin-relative.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
RUN: rm -rf %t
RUN: mkdir -p %t/foo
RUN: cd %t

RUN: llvm-mc -triple=x86_64-pc-windows-msvc -filetype=obj -o foo/obj.o %S/Inputs/a.s

RUN: llvm-lib -out:archive.a -llvmlibthin foo/obj.o
RUN: llvm-ar t archive.a | FileCheck %s --check-prefix=PARENT-DIR --match-full-lines
PARENT-DIR: foo/obj.o

RUN: llvm-lib -out:foo/archive.a -llvmlibthin foo/obj.o
RUN: llvm-ar t foo/archive.a | FileCheck %s --check-prefix=SAME-DIR --match-full-lines
SAME-DIR: foo/obj.o
28 changes: 22 additions & 6 deletions llvm/tools/llvm-ar/llvm-ar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ static std::string ArchiveName;
// on the command line.
static std::vector<StringRef> Members;

// Static buffer to hold StringRefs.
static BumpPtrAllocator Alloc;

// Extract the member filename from the command line for the [relpos] argument
// associated with a, b, and i modifiers
static void getRelPos() {
Expand Down Expand Up @@ -545,6 +548,15 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getOldMember(M, Deterministic);
failIfError(NMOrErr.takeError());
// If the child member we're trying to add is thin, use the path relative to
// the archive it's in, so the file resolves correctly.
if (Thin && FlattenArchive) {
StringSaver Saver(Alloc);
Expected<std::string> FileNameOrErr = M.getFullName();
failIfError(FileNameOrErr.takeError());
NMOrErr->MemberName =
Saver.save(computeArchiveRelativePath(ArchiveName, *FileNameOrErr));
}
if (FlattenArchive &&
identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
Expected<std::string> FileNameOrErr = M.getFullName();
Expand All @@ -568,6 +580,13 @@ static void addMember(std::vector<NewArchiveMember> &Members,
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getFile(FileName, Deterministic);
failIfError(NMOrErr.takeError(), FileName);
StringSaver Saver(Alloc);
// For regular archives, use the basename of the object path for the member
// name. For thin archives, use the full relative paths so the file resolves
// correctly.
NMOrErr->MemberName =
Thin ? Saver.save(computeArchiveRelativePath(ArchiveName, FileName))
: sys::path::filename(NMOrErr->MemberName);
if (FlattenArchive &&
identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
object::Archive &Lib = readLibrary(FileName);
Expand All @@ -581,8 +600,6 @@ static void addMember(std::vector<NewArchiveMember> &Members,
return;
}
}
// Use the basename of the object path for the member name.
NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
Members.push_back(std::move(*NMOrErr));
}

Expand Down Expand Up @@ -672,15 +689,15 @@ computeNewArchiveMembers(ArchiveOperation Operation,
computeInsertAction(Operation, Child, Name, MemberI);
switch (Action) {
case IA_AddOldMember:
addChildMember(Ret, Child);
addChildMember(Ret, Child, /*FlattenArchive=*/Thin);
break;
case IA_AddNewMember:
addMember(Ret, *MemberI);
break;
case IA_Delete:
break;
case IA_MoveOldMember:
addChildMember(Moved, Child);
addChildMember(Moved, Child, /*FlattenArchive=*/Thin);
break;
case IA_MoveNewMember:
addMember(Moved, *MemberI);
Expand Down Expand Up @@ -899,7 +916,7 @@ static void runMRIScript() {
{
Error Err = Error::success();
for (auto &Member : Lib.children(Err))
addChildMember(NewMembers, Member);
addChildMember(NewMembers, Member, /*FlattenArchive=*/Thin);
failIfError(std::move(Err));
}
break;
Expand Down Expand Up @@ -951,7 +968,6 @@ static bool handleGenericOption(StringRef arg) {

static int ar_main(int argc, char **argv) {
SmallVector<const char *, 0> Argv(argv, argv + argc);
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
for (size_t i = 1; i < Argv.size(); ++i) {
Expand Down

0 comments on commit 451c2ef

Please sign in to comment.