From 86adef58f4fd67744de960aea08d8bc02fbdf778 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Sun, 29 Dec 2024 22:05:22 -0800 Subject: [PATCH 1/8] Use PosixFileStream for files on POSIX --- Src/IronPython.Modules/mmap.cs | 130 +++++++--- Src/IronPython.Modules/nt.cs | 96 +++++-- Src/IronPython/Modules/_fileio.cs | 104 ++++---- Src/IronPython/Runtime/PosixFileStream.cs | 267 ++++++++++++++++++++ Src/IronPython/Runtime/PythonFileManager.cs | 16 +- Tests/test_file.py | 2 +- Tests/test_io_stdlib.py | 6 +- 7 files changed, 525 insertions(+), 96 deletions(-) create mode 100644 Src/IronPython/Runtime/PosixFileStream.cs diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index c512385c8..8c84bce0c 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -15,6 +15,8 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Serialization; +using System.Runtime.Versioning; using System.Text; using System.Threading; @@ -24,6 +26,7 @@ using IronPython.Runtime.Types; using Microsoft.Scripting.Utils; +using Microsoft.Win32.SafeHandles; [assembly: PythonModule("mmap", typeof(IronPython.Modules.MmapModule))] namespace IronPython.Modules { @@ -92,6 +95,7 @@ public class MmapDefault : IWeakReferenceable { private readonly long _offset; private readonly string _mapName; private readonly MemoryMappedFileAccess _fileAccess; + private readonly SafeFileHandle _handle; // only used on some POSIX platforms, null otherwise private volatile bool _isClosed; private int _refCount = 1; @@ -148,46 +152,65 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag PythonContext pContext = context.LanguageContext; if (pContext.FileManager.TryGetStreams(fileno, out StreamBox streams)) { - if ((_sourceStream = streams.ReadStream as FileStream) == null) { - throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); + Stream stream = streams.ReadStream; + if (stream is FileStream fs) { + _sourceStream = fs; + } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { + // use file descriptor +#if NET8_0_OR_GREATER + CheckFileAccess(_fileAccess, stream); + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, length, _fileAccess, HandleInheritability.None, leaveOpen: true); +#else + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + FileAccess fileAccess = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; + // This may or may not work on Mono, but on Mono streams.ReadStream is FileStream (unless dupped in some cases) + _sourceStream = new FileStream(_handle, fileAccess); +#endif } + // otherwise leaves _file as null and _sourceStream as null } else { throw PythonOps.OSError(PythonExceptions._OSError.ERROR_INVALID_BLOCK, "Bad file descriptor"); } - if (_fileAccess == MemoryMappedFileAccess.ReadWrite && !_sourceStream.CanWrite) { - throw WindowsError(PythonExceptions._OSError.ERROR_ACCESS_DENIED); - } + if (_file is null) { + // create _file form _sourceStream + if (_sourceStream is null) { + throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); + } + + CheckFileAccess(_fileAccess, _sourceStream); - if (length == 0) { - length = _sourceStream.Length; if (length == 0) { - throw PythonOps.ValueError("cannot mmap an empty file"); - } - if (_offset >= length) { - throw PythonOps.ValueError("mmap offset is greater than file size"); + length = _sourceStream.Length; + if (length == 0) { + throw PythonOps.ValueError("cannot mmap an empty file"); + } + if (_offset >= length) { + throw PythonOps.ValueError("mmap offset is greater than file size"); + } + length -= _offset; } - length -= _offset; - } - long capacity = checked(_offset + length); + long capacity = checked(_offset + length); - // Enlarge the file as needed. - if (capacity > _sourceStream.Length) { - if (_sourceStream.CanWrite) { - _sourceStream.SetLength(capacity); - } else { - throw WindowsError(PythonExceptions._OSError.ERROR_NOT_ENOUGH_MEMORY); + // Enlarge the file as needed. + if (capacity > _sourceStream.Length) { + if (_sourceStream.CanWrite) { + _sourceStream.SetLength(capacity); + } else { + throw WindowsError(PythonExceptions._OSError.ERROR_NOT_ENOUGH_MEMORY); + } } - } - _file = CreateFromFile( - _sourceStream, - _mapName, - _sourceStream.Length, - _fileAccess, - HandleInheritability.None, - true); + _file = CreateFromFile( + _sourceStream, + _mapName, + _sourceStream.Length, + _fileAccess, + HandleInheritability.None, + true); + } } try { @@ -198,7 +221,24 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag throw; } _position = 0L; - } + + void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { + bool isValid = mmapAccess switch { + MemoryMappedFileAccess.Read => stream.CanRead, + MemoryMappedFileAccess.ReadWrite => stream.CanRead && stream.CanWrite, + MemoryMappedFileAccess.CopyOnWrite => stream.CanRead, + _ => false + }; + + if (!isValid) { + if (_handle is not null && _sourceStream is not null) { + _sourceStream.Dispose(); + } + throw PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode"); + } + } + } // end of constructor + public object __len__() { using (new MmapLocker(this)) { @@ -325,6 +365,11 @@ private void CloseWorker() { _view.Flush(); _view.Dispose(); _file.Dispose(); + if (_handle is not null) { + // mmap owns _sourceStream too in this case + _sourceStream?.Dispose(); + _handle.Dispose(); + } _sourceStream = null; _view = null; _file = null; @@ -557,6 +602,11 @@ public void resize(long newsize) { } if (_sourceStream == null) { + if (_handle is not null && !_handle.IsInvalid + && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { + // resize on Posix platforms + PythonNT.ftruncateUnix(unchecked((int)_handle.DangerousGetHandle()), newsize); + } // resizing is not supported without an underlying file throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_PARAMETER); } @@ -716,6 +766,9 @@ public void seek(long pos, int whence = SEEK_SET) { public object size() { using (new MmapLocker(this)) { + if (_handle is not null && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { + return GetFileSizeUnix(_handle); + } if (_sourceStream == null) return ReturnLong(_view.Capacity); return ReturnLong(new FileInfo(_sourceStream.Name).Length); } @@ -830,6 +883,25 @@ internal Bytes GetSearchString() { } } + [SupportedOSPlatform("linux"), SupportedOSPlatform("macos")] + private static long GetFileSizeUnix(SafeFileHandle handle) { + long size; + if (handle.IsInvalid) { + throw PythonOps.OSError(PythonExceptions._OSError.ERROR_INVALID_HANDLE, "Invalid file handle"); + } + + if (Mono.Unix.Native.Syscall.fstat((int)handle.DangerousGetHandle(), out Mono.Unix.Native.Stat status) == 0) { + size = status.st_size; + } else { + Mono.Unix.Native.Errno errno = Mono.Unix.Native.Stdlib.GetLastError(); + string msg = Mono.Unix.UnixMarshal.GetErrorDescription(errno); + int error = Mono.Unix.Native.NativeConvert.FromErrno(errno); + throw PythonOps.OSError(error, msg); + } + + return size; + } + #endregion #region Synchronization diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index 5811c9cf7..4622a62a5 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -431,22 +431,41 @@ public static int dup2(CodeContext/*!*/ context, int fd, int fd2) { } + [SupportedOSPlatform("linux"), SupportedOSPlatform("osx")] private static int UnixDup(int fd, int fd2, out Stream? stream) { int res = fd2 < 0 ? Mono.Unix.Native.Syscall.dup(fd) : Mono.Unix.Native.Syscall.dup2(fd, fd2); if (res < 0) throw GetLastUnixError(); if (ClrModule.IsMono) { - // This does not work on .NET, probably because .NET FileStream is not aware of Mono.Unix.UnixStream - stream = new Mono.Unix.UnixStream(res, ownsHandle: true); + // Elaborate workaround on Mono to avoid UnixStream as out + stream = new Mono.Unix.UnixStream(res, ownsHandle: false); + FileAccess fileAccess = stream.CanRead ? stream.CanWrite ? FileAccess.ReadWrite : FileAccess.Read : FileAccess.Write; + stream.Dispose(); + try { + // FileStream on Mono created with a file descriptor might not work: /~https://github.com/mono/mono/issues/12783 + // Test if it does, without closing the handle if it doesn't + var sfh = new SafeFileHandle((IntPtr)res, ownsHandle: false); + stream = new FileStream(sfh, fileAccess); + // No exception? Great! We can use FileStream. + stream.Dispose(); + sfh.Dispose(); + stream = null; // Create outside of try block + } catch (IOException) { + // Fall back to UnixStream + stream = new Mono.Unix.UnixStream(res, ownsHandle: true); + } + if (stream is null) { + // FileStream is safe + var sfh = new SafeFileHandle((IntPtr)res, ownsHandle: true); + stream = new FileStream(sfh, fileAccess); + } } else { - // This does not work 100% correctly on .NET, probably because each FileStream has its own read/write cursor - // (it should be shared between dupped descriptors) - //stream = new FileStream(new SafeFileHandle((IntPtr)res, ownsHandle: true), FileAccess.ReadWrite); - // Accidentaly, this would also not work on Mono: /~https://github.com/mono/mono/issues/12783 - stream = null; // Handle stream sharing in PythonFileManager + // normal case + stream = new PosixFileStream(res); } return res; } + #if FEATURE_PROCESS /// /// single instance of environment dictionary is shared between multiple runtimes because the environment @@ -470,6 +489,9 @@ public static object fstat(CodeContext/*!*/ context, int fd) { PythonFileManager fileManager = context.LanguageContext.FileManager; if (fileManager.TryGetStreams(fd, out StreamBox? streams)) { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + return fstatUnix(fd); + } if (streams.IsConsoleStream()) return new stat_result(0x2000); if (streams.IsStandardIOStream()) return new stat_result(0x1000); if (StatStream(streams.ReadStream) is not null and var res) return res; @@ -483,15 +505,9 @@ public static object fstat(CodeContext/*!*/ context, int fd) { #endif if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { if (ReferenceEquals(stream, Stream.Null)) return new stat_result(0x2000); - } else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - if (IsUnixStream(stream)) return new stat_result(0x1000); } return null; } - - static bool IsUnixStream(Stream stream) { - return stream is Mono.Unix.UnixStream; - } } public static void fsync(CodeContext context, int fd) { @@ -869,7 +885,16 @@ public static void mkdir(CodeContext context, object? path, [ParamDictionary, No private const int DefaultBufferSize = 4096; - [Documentation("open(path, flags, mode=511, *, dir_fd=None)")] + [Documentation(""" + open(path, flags, mode=511, *, dir_fd=None) + + Open a file for low level IO. Returns a file descriptor (integer). + + If dir_fd is not None, it should be a file descriptor open to a directory, + and path should be relative; path will then be relative to that directory. + dir_fd may not be implemented on your platform. + If it is unavailable, using it will raise a NotImplementedError. + """)] public static object open(CodeContext/*!*/ context, [NotNone] string path, int flags, [ParamDictionary, NotNone] IDictionary kwargs, [NotNone] params object[] args) { var numArgs = args.Length; CheckOptionalArgsCount(numRegParms: 2, numOptPosParms: 1, numKwParms: 1, numArgs, kwargs.Count); @@ -889,12 +914,23 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f } } + if ((RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) && !ClrModule.IsMono) { + // Use PosixFileStream to operate on fd directly + // On Mono, we must use FileStream due to limitations in MemoryMappedFile + Stream s = PosixFileStream.Open(path, flags, unchecked((uint)mode), out int fd); + //Stream s = PythonIOModule.FileIO.OpenFilePosix(path, flags, mode, out int fd); + if ((flags & O_APPEND) != 0) { + s.Seek(0L, SeekOrigin.End); + } + return context.LanguageContext.FileManager.Add(fd, new(s)); + } + try { FileMode fileMode = FileModeFromFlags(flags); FileAccess access = FileAccessFromFlags(flags); FileOptions options = FileOptionsFromFlags(flags); Stream s; // the stream opened to acces the file - FileStream? fs; // downcast of s if s is FileStream (this is always the case on POSIX) + FileStream? fs; // downcast of s if s is FileStream Stream? rs = null; // secondary read stream if needed, otherwise same as s if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && IsNulFile(path)) { fs = null; @@ -1436,6 +1472,13 @@ private static object statUnix(string path) { return LightExceptions.Throw(GetLastUnixError(path)); } + private static object fstatUnix(int fd) { + if (Mono.Unix.Native.Syscall.fstat(fd, out Mono.Unix.Native.Stat buf) == 0) { + return new stat_result(buf); + } + return LightExceptions.Throw(GetLastUnixError()); + } + private const int OPEN_EXISTING = 3; private const int FILE_ATTRIBUTE_NORMAL = 0x00000080; private const int FILE_READ_ATTRIBUTES = 0x0080; @@ -1669,8 +1712,27 @@ public static void truncate(CodeContext context, object? path, BigInteger length public static void truncate(CodeContext context, int fd, BigInteger length) => ftruncate(context, fd, length); - public static void ftruncate(CodeContext context, int fd, BigInteger length) - => context.LanguageContext.FileManager.GetStreams(fd).Truncate((long)length); + public static void ftruncate(CodeContext context, int fd, BigInteger length) { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + ftruncateUnix(fd, (long)length); + } else { + context.LanguageContext.FileManager.GetStreams(fd).Truncate((long)length); + } + } + + + [SupportedOSPlatform("linux"), SupportedOSPlatform("osx")] + internal static void ftruncateUnix(int fd, long length) { + int result; + Mono.Unix.Native.Errno errno; + do { + result = Mono.Unix.Native.Syscall.ftruncate(fd, length); + } while (Mono.Unix.UnixMarshal.ShouldRetrySyscall(result, out errno)); + + if (errno != 0) + throw GetOsError(Mono.Unix.Native.NativeConvert.FromErrno(errno)); + } + #if FEATURE_FILESYSTEM public static object times() { diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index 28982c56e..5f299b655 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -105,55 +105,67 @@ public FileIO(CodeContext/*!*/ context, [NotNone] string name, [NotNone] string this.mode = NormalizeMode(mode, out int flags); if (opener is null) { - switch (this.mode) { - case "rb": - _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); - break; - case "wb": - _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)); - break; - case "xb": - _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.Write, FileShare.ReadWrite)); - break; - case "ab": - _streams = new(OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)); - _streams.ReadStream.Seek(0L, SeekOrigin.End); - break; - case "rb+": - _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "wb+": - _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "xb+": - _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite)); - break; - case "ab+": - // Opening writeStream before readStream will create the file if it does not exist - var writeStream = OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); - var readStream = OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); - readStream.Seek(0L, SeekOrigin.End); - writeStream.Seek(0L, SeekOrigin.End); - _streams = new(readStream, writeStream); - break; - default: - throw new InvalidOperationException(); - } - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - // On POSIX, register the file descriptor with the file manager right after file opening - _context.FileManager.GetOrAssignId(_streams); - // according to [documentation](https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.safefilehandle?view=net-9.0#remarks) - // accessing SafeFileHandle sets the current stream position to 0 - // in practice it doesn't seem to be the case, but better to be sure - if (this.mode.StartsWith("ab", StringComparison.InvariantCulture)) { - _streams.WriteStream.Seek(0L, SeekOrigin.End); + if ((RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) && !ClrModule.IsMono) { + // Use PosixFileStream to operate on fd directly + // On Mono, we must use FileStream due to limitations in MemoryMappedFile + var stream = PosixFileStream.Open(name, flags, 0b_110_110_110, out int fd); // mode: rw-rw-rw- + //var stream = OpenFilePosix(name, flags, 0b_110_110_110, out int fd); + if ((flags & O_APPEND) != 0) { + stream.Seek(0L, SeekOrigin.End); } - if (!_streams.IsSingleStream) { - _streams.ReadStream.Seek(_streams.WriteStream.Position, SeekOrigin.Begin); + _streams = new(stream); + _context.FileManager.Add(fd, _streams); + } else { + switch (this.mode) { + case "rb": + _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); + break; + case "wb": + _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)); + break; + case "xb": + _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.Write, FileShare.ReadWrite)); + break; + case "ab": + _streams = new(OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)); + _streams.WriteStream.Seek(0L, SeekOrigin.End); + break; + case "rb+": + _streams = new(OpenFile(context, pal, name, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "wb+": + _streams = new(OpenFile(context, pal, name, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "xb+": + _streams = new(OpenFile(context, pal, name, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.ReadWrite)); + break; + case "ab+": + // Opening writeStream before readStream will create the file if it does not exist + var writeStream = OpenFile(context, pal, name, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); + var readStream = OpenFile(context, pal, name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + readStream.Seek(0L, SeekOrigin.End); + writeStream.Seek(0L, SeekOrigin.End); + _streams = new(readStream, writeStream); + break; + default: + throw new InvalidOperationException(); + } + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { + // On POSIX, register the file descriptor with the file manager right after file opening + // Because the .NET case is already handled above before `switch`, this branch runs only on Mono + _context.FileManager.GetOrAssignId(_streams); + // according to [documentation](https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.safefilehandle?view=net-9.0#remarks) + // accessing SafeFileHandle sets the current stream position to 0 + // in practice it doesn't seem to be the case, but better to be sure + if (this.mode[0] == 'a') { + _streams.WriteStream.Seek(0L, SeekOrigin.End); + } + if (!_streams.IsSingleStream) { + _streams.ReadStream.Seek(_streams.WriteStream.Position, SeekOrigin.Begin); + } } } - } - else { + } else { // opener is not null object? fdobj = PythonOps.CallWithContext(context, opener, name, flags); if (fdobj is int fd) { if (fd < 0) { diff --git a/Src/IronPython/Runtime/PosixFileStream.cs b/Src/IronPython/Runtime/PosixFileStream.cs new file mode 100644 index 000000000..36a380b0a --- /dev/null +++ b/Src/IronPython/Runtime/PosixFileStream.cs @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information. + +#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP2_1_OR_GREATER +#define SPAN_OVERRIDE // Stream has Span-based virtual methods +#endif + +using System; +using System.IO; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +using Mono.Unix; +using Mono.Unix.Native; + +using IronPython.Runtime.Operations; +using System.Diagnostics; +using IronPython.Runtime.Exceptions; + +#nullable enable + +namespace IronPython.Runtime; + +[SupportedOSPlatform("linux")] +[SupportedOSPlatform("macos")] +internal class PosixFileStream : Stream +{ + private readonly int _fd; + private readonly bool _canSeek; + private readonly bool _canRead; + private readonly bool _canWrite; + + private bool _disposed; + + + public PosixFileStream(int fileDescriptor) { + if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux) && !RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + throw new PlatformNotSupportedException("This stream only works on POSIX systems"); + + if (fileDescriptor < 0) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + _fd = fileDescriptor; + + _canSeek = Syscall.lseek(fileDescriptor, 0, SeekFlags.SEEK_CUR) >= 0; + _canRead = Syscall.read(fileDescriptor, IntPtr.Zero, 0) == 0; + _canWrite = Syscall.write(fileDescriptor, IntPtr.Zero, 0) == 0; + } + + + public static Stream Open(string name, int flags, uint mode, out int fd) { + OpenFlags openFlags = NativeConvert.ToOpenFlags(flags); + FilePermissions permissions = NativeConvert.ToFilePermissions(mode); + Errno errno; + + do { + fd = Syscall.open(name, openFlags, permissions); + } while (UnixMarshal.ShouldRetrySyscall(fd, out errno)); + + if (fd < 0) { + Debug.Assert(errno != 0); + throw CreateExceptionForLastError(errno, name); + } + return new PosixFileStream(fd); + } + + + public int Handle => _fd; + + public override bool CanSeek => _canSeek; + public override bool CanRead => _canRead; + public override bool CanWrite => _canWrite; + + + public override long Length { + get { + ThrowIfDisposed(); + int res = Syscall.fstat(_fd, out Stat stat); + ThrowIfError(res); + + return stat.st_size; + } + } + + + public override long Position { + get => Seek(0, SeekOrigin.Current); + set => Seek(value, SeekOrigin.Begin); + } + + + public override long Seek(long offset, SeekOrigin origin) { + ThrowIfDisposed(); + SeekFlags whence = origin switch + { + SeekOrigin.Begin => SeekFlags.SEEK_SET, + SeekOrigin.Current => SeekFlags.SEEK_CUR, + SeekOrigin.End => SeekFlags.SEEK_END, + _ => throw PythonOps.OSError(PythonFileManager.EINVAL, "Invalid argument") + }; + + long result = Syscall.lseek(_fd, offset, whence); + ThrowIfError(result); + + return result; + } + + + public override void SetLength(long value) { + ThrowIfDisposed(); + int result; + Errno errno; + do { + result = Syscall.ftruncate(_fd, value); + } while (UnixMarshal.ShouldRetrySyscall(result, out errno)); + ThrowIfError(errno); + } + + +#if SPAN_OVERRIDE +#pragma warning disable IDE0036 // Modifiers are not ordered + override +#pragma warning restore IDE0036 +#endif + public int Read(Span buffer) { + ThrowIfDisposed(); + if (!CanRead) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + + if (buffer.Length == 0) + return 0; + + int bytesRead; + Errno errno; + unsafe { + fixed (byte* buf = buffer) { + do { + bytesRead = (int)Syscall.read(_fd, buf, (ulong)buffer.Length); + } while (UnixMarshal.ShouldRetrySyscall(bytesRead, out errno)); + } + } + ThrowIfError(errno); + + return bytesRead; + } + + + // If offset == 0 and count == 0, buffer is allowed to be null. + public override int Read(byte[] buffer, int offset, int count) { + ThrowIfDisposed(); + return Read(buffer.AsSpan(offset, count)); + } + + + public override int ReadByte() { + Span buffer = stackalloc byte[1]; + int bytesRead = Read(buffer); + return bytesRead == 0 ? -1 : buffer[0]; + } + + +#if SPAN_OVERRIDE +#pragma warning disable IDE0036 // Modifiers are not ordered + override +#pragma warning restore IDE0036 +#endif + public void Write(ReadOnlySpan buffer) { + ThrowIfDisposed(); + if (!CanWrite) + throw PythonOps.OSError(PythonFileManager.EBADF, "Bad file descriptor"); + + if (buffer.Length == 0) + return; + + int bytesWritten; + Errno errno; + unsafe { + fixed (byte* buf = buffer) { + do { + bytesWritten = (int)Syscall.write(_fd, buf, (ulong)buffer.Length); + } while (UnixMarshal.ShouldRetrySyscall(bytesWritten, out errno)); + } + } + ThrowIfError(errno); + } + + // If offset == 0 and count == 0, buffer is allowed to be null. + public override void Write(byte[] buffer, int offset, int count) { + ThrowIfDisposed(); + Write(buffer.AsSpan(offset, count)); + } + + + public override void WriteByte(byte value) { + Span buffer = stackalloc byte[] { value }; + Write(buffer); + } + + + public override void Flush() { + ThrowIfDisposed(); + int result; + Errno errno; + do { + result = Syscall.fsync(_fd); + } while (UnixMarshal.ShouldRetrySyscall(result, out errno)); + ThrowIfError(errno); + } + + protected override void Dispose(bool disposing) { + if (!_disposed) { + int result = Syscall.close(_fd); + WarnIfError(result, "Error closing file descriptor {0}: {1}: {2}"); + _disposed = true; + } + base.Dispose(disposing); + } + + + #region Private Methods + + private void ThrowIfDisposed() { + if (_disposed) + throw new ObjectDisposedException(GetType().Name); + } + + + private static void ThrowIfError(long result) { + if (result < 0) + throw CreateExceptionForLastError(); + } + + + private static void ThrowIfError(Errno errno) { + if (errno != 0) + throw CreateExceptionForLastError(errno); + } + + + private static Exception CreateExceptionForLastError(string? filename = null) { + Errno errno = Stdlib.GetLastError(); + return CreateExceptionForLastError(errno, filename); + } + + + private static Exception CreateExceptionForLastError(Errno errno, string? filename = null) { + if (errno == 0) return new InvalidOperationException("Unknown error"); + + string msg = UnixMarshal.GetErrorDescription(errno); + int error = NativeConvert.FromErrno(errno); + return PythonOps.OSError(error, msg, filename); + } + + private void WarnIfError(int result, string msgTmpl) { + if (result < 0) { + Errno errno = Stdlib.GetLastError(); + int error = NativeConvert.FromErrno(errno); + PythonOps.Warn(DefaultContext.Default, + PythonExceptions.RuntimeWarning, + msgTmpl, _fd, error, UnixMarshal.GetErrorDescription(errno)); + } + } + + #endregion +} diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index ca4dbef44..574ba0eb3 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -8,6 +8,7 @@ using System.Buffers; using System.Collections.Concurrent; using System.Collections.Generic; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.InteropServices; @@ -15,9 +16,9 @@ using Microsoft.Scripting.Runtime; using Microsoft.Scripting.Utils; +using Mono.Unix; using IronPython.Runtime.Operations; -using System.Diagnostics; namespace IronPython.Runtime { @@ -28,10 +29,19 @@ internal sealed class StreamBox { private int _id = -1; private Stream _readStream; private Stream _writeStream; + private readonly bool _writeNeedsFlush; public StreamBox(Stream readStream, Stream writeStream) { _readStream = readStream; _writeStream = writeStream; + + // a hack but it does improve perfomance a lot if flushing is not needed + _writeNeedsFlush = writeStream switch { + FileStream => true, // FileStream is buffered by default, used on Windows and mostly Mono + PosixFileStream => false, // PosixFileStream is unbuffered, used on .NET/Posix + UnixStream => false, // UnixStream is unbuffered, used sometimes on Mono + _ => true // if in doubt, flush + }; } public StreamBox(Stream stream) : this(stream, stream) { } @@ -152,7 +162,9 @@ public int Write(IPythonBuffer buffer) { count = buffer.NumBytes(); _writeStream.Write(bytes, 0, count); #endif - _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + if (_writeNeedsFlush) { + _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + } if (!IsSingleStream) { _readStream.Seek(_writeStream.Position, SeekOrigin.Begin); } diff --git a/Tests/test_file.py b/Tests/test_file.py index 1fb7fa3d9..c2a64b166 100644 --- a/Tests/test_file.py +++ b/Tests/test_file.py @@ -702,7 +702,7 @@ def test_errors(self): with self.assertRaises(OSError) as cm: open('path_too_long' * 100) - self.assertEqual(cm.exception.errno, (errno.ENAMETOOLONG if is_posix else errno.EINVAL) if is_netcoreapp and not is_posix or sys.version_info >= (3,6) else errno.ENOENT) + self.assertEqual(cm.exception.errno, (errno.ENAMETOOLONG if is_posix else errno.EINVAL) if is_netcoreapp or sys.version_info >= (3,6) else errno.ENOENT) def test_write_bytes(self): fname = self.temp_file diff --git a/Tests/test_io_stdlib.py b/Tests/test_io_stdlib.py index 8a71c5e75..9c24dd4dd 100644 --- a/Tests/test_io_stdlib.py +++ b/Tests/test_io_stdlib.py @@ -85,7 +85,6 @@ def load_tests(loader, standard_tests, pattern): test.test_io.CTextIOWrapperTest('test_uninitialized'), # AssertionError: Exception not raised by repr test.test_io.CTextIOWrapperTest('test_unseekable'), # OSError: underlying stream is not seekable test.test_io.PyTextIOWrapperTest('test_nonnormalized_close_error_on_close'), # AssertionError: None is not an instance of - test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. test.test_io.CMiscIOTest('test_io_after_close'), # AttributeError: 'TextIOWrapper' object has no attribute 'read1' test.test_io.CMiscIOTest('test_nonblock_pipe_write_bigbuf'), # AttributeError: 'module' object has no attribute 'fcntl' test.test_io.CMiscIOTest('test_nonblock_pipe_write_smallbuf'), # AttributeError: 'module' object has no attribute 'fcntl' @@ -101,6 +100,11 @@ def load_tests(loader, standard_tests, pattern): test.test_io.PyMiscIOTest('test_warn_on_dealloc_fd'), # AssertionError: ResourceWarning not triggered ] + if is_mono: + failing_tests += [ + test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. + ] + skip_tests = [ test.test_io.CBufferedWriterTest('test_override_destructor'), # StackOverflowException test.test_io.CBufferedRandomTest('test_override_destructor'), # StackOverflowException From 64d0d4ae307b68ee71ad27b9501f395fcea201e8 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Tue, 31 Dec 2024 13:07:13 -0800 Subject: [PATCH 2/8] Improve non-buffering strategy for FileStream --- Src/IronPython.Modules/nt.cs | 16 ++++++++++------ Src/IronPython/Runtime/PythonFileManager.cs | 13 ++----------- Tests/test_io_stdlib.py | 4 ++-- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index 4622a62a5..ca843c658 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -883,8 +883,6 @@ public static void mkdir(CodeContext context, [NotNone] Bytes path, [ParamDictio public static void mkdir(CodeContext context, object? path, [ParamDictionary, NotNone] IDictionary kwargs, [NotNone] params object[] args) => mkdir(ConvertToFsString(context, path, nameof(path)), kwargs, args); - private const int DefaultBufferSize = 4096; - [Documentation(""" open(path, flags, mode=511, *, dir_fd=None) @@ -926,6 +924,12 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f } try { + // FileStream buffer size must be >= 0 on .NET, and >= 1 on .NET Framework and Mono. + // On .NET, buffer size 0 or 1 disables buffering. + // On .NET Framework, buffer size 1 disables buffering. + // On Mono, buffer size 1 makes writes of length >= 2 bypass the buffer. + const int NoBuffering = 1; + FileMode fileMode = FileModeFromFlags(flags); FileAccess access = FileAccessFromFlags(flags); FileOptions options = FileOptionsFromFlags(flags); @@ -940,15 +944,15 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f // open it again w/ just read access. fs = new FileStream(path, fileMode, FileAccess.Write, FileShare.None); fs.Close(); - s = fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, NoBuffering, options); } else if (access == FileAccess.ReadWrite && fileMode == FileMode.Append) { // .NET doesn't allow Append w/ access != Write, so open the file w/ Write // and a secondary stream w/ Read, then seek to the end. - s = fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, DefaultBufferSize, options); - rs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, NoBuffering, options); + rs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, NoBuffering, options); rs.Seek(0L, SeekOrigin.End); } else { - s = fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, NoBuffering, options); } rs ??= s; diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index 574ba0eb3..f1efa378f 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -29,19 +29,10 @@ internal sealed class StreamBox { private int _id = -1; private Stream _readStream; private Stream _writeStream; - private readonly bool _writeNeedsFlush; public StreamBox(Stream readStream, Stream writeStream) { _readStream = readStream; _writeStream = writeStream; - - // a hack but it does improve perfomance a lot if flushing is not needed - _writeNeedsFlush = writeStream switch { - FileStream => true, // FileStream is buffered by default, used on Windows and mostly Mono - PosixFileStream => false, // PosixFileStream is unbuffered, used on .NET/Posix - UnixStream => false, // UnixStream is unbuffered, used sometimes on Mono - _ => true // if in doubt, flush - }; } public StreamBox(Stream stream) : this(stream, stream) { } @@ -162,8 +153,8 @@ public int Write(IPythonBuffer buffer) { count = buffer.NumBytes(); _writeStream.Write(bytes, 0, count); #endif - if (_writeNeedsFlush) { - _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush. + if (ClrModule.IsMono && count == 1) { + _writeStream.Flush(); // IO at this level is not supposed to buffer so we need to call Flush (only needed on Mono) } if (!IsSingleStream) { _readStream.Seek(_writeStream.Position, SeekOrigin.Begin); diff --git a/Tests/test_io_stdlib.py b/Tests/test_io_stdlib.py index 9c24dd4dd..344eef760 100644 --- a/Tests/test_io_stdlib.py +++ b/Tests/test_io_stdlib.py @@ -6,7 +6,7 @@ ## Run selected tests from test_io from StdLib ## -from iptest import is_ironpython, is_mono, generate_suite, run_test +from iptest import is_ironpython, is_mono, is_windows, generate_suite, run_test import test.test_io @@ -100,7 +100,7 @@ def load_tests(loader, standard_tests, pattern): test.test_io.PyMiscIOTest('test_warn_on_dealloc_fd'), # AssertionError: ResourceWarning not triggered ] - if is_mono: + if is_mono or is_windows: failing_tests += [ test.test_io.PyTextIOWrapperTest('test_seek_append_bom'), # OSError: [Errno -2146232800] Unable seek backward to overwrite data that previously existed in a file opened in Append mode. ] From 1cf9ac646b7b0abf3f488db7ebc89eb4b843d836 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Wed, 1 Jan 2025 11:56:06 -0800 Subject: [PATCH 3/8] Ignore OSException along with IOException --- Src/IronPython.Modules/_warnings.cs | 2 +- Src/IronPython/Modules/_fileio.cs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Src/IronPython.Modules/_warnings.cs b/Src/IronPython.Modules/_warnings.cs index ae37101c8..e343cff9e 100644 --- a/Src/IronPython.Modules/_warnings.cs +++ b/Src/IronPython.Modules/_warnings.cs @@ -260,7 +260,7 @@ internal static void showwarning(CodeContext context, object message, PythonType ((TextWriter)file).Write(text); } // unrecognized file type - warning is lost } - } catch (IOException) { + } catch (Exception ex) when (ex is IOException or OSException) { // invalid file - warning is lost } } diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index 5f299b655..e67592af3 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -305,10 +305,10 @@ public override void close(CodeContext/*!*/ context) { try { flush(context); - } catch (IOException) { - // flushing can fail, esp. if the other half of a pipe is closed - // ignore it because we're closing anyway - } + } catch (IOException) { /* ignore */ } catch (OSException) { /* ignore */ } + // flushing can fail, esp. if the other half of a pipe is closed + // ignore it because we're closing anyway + _closed = true; if (_closefd) { From 215237cc7b1ce0a919894fd45ad8009ed4f32149 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Sun, 5 Jan 2025 22:07:14 -0800 Subject: [PATCH 4/8] Fix file size checks in constructor --- Src/IronPython.Modules/mmap.cs | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index 8236c2331..bfce0811c 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -190,9 +190,9 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { // use file descriptor #if NET8_0_OR_GREATER - CheckFileAccess(_fileAccess, stream); + CheckFileAccessAndSize(stream); _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); - _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, length, _fileAccess, HandleInheritability.None, leaveOpen: true); + _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, stream.Length, _fileAccess, HandleInheritability.None, leaveOpen: true); #else _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); FileAccess fa = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; @@ -211,19 +211,12 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_HANDLE); } - CheckFileAccess(_fileAccess, _sourceStream); - if (length == 0) { - length = _sourceStream.Length; - if (length == 0) { - throw PythonOps.ValueError("cannot mmap an empty file"); - } - if (_offset >= length) { - throw PythonOps.ValueError("mmap offset is greater than file size"); - } - length -= _offset; + length = _sourceStream.Length - _offset; } + CheckFileAccessAndSize(_sourceStream); + long capacity = checked(_offset + length); // Enlarge the file as needed. @@ -254,8 +247,8 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } _position = 0L; - void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { - bool isValid = mmapAccess switch { + void CheckFileAccessAndSize(Stream stream) { + bool isValid = _fileAccess switch { MemoryMappedFileAccess.Read => stream.CanRead, MemoryMappedFileAccess.ReadWrite => stream.CanRead && stream.CanWrite, MemoryMappedFileAccess.CopyOnWrite => stream.CanRead, @@ -265,20 +258,27 @@ void CheckFileAccess(MemoryMappedFileAccess mmapAccess, Stream stream) { }; if (!isValid) { - if (_handle is not null && _sourceStream is not null) { - _sourceStream.Dispose(); - } - throw PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode"); + ThrowException(PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode")); } if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Unix map does not support increasing size on open - if (_offset + length > stream.Length) { - if (_handle is not null && _sourceStream is not null) { - _sourceStream.Dispose(); - } - throw PythonOps.ValueError("mmap length is greater than file size"); + if (length != 0 && _offset + length > stream.Length) { + ThrowException(PythonOps.ValueError("mmap length is greater than file size")); + } + } + if (length == 0 && stream.Length == 0) { + ThrowException(PythonOps.ValueError("cannot mmap an empty file")); + } + if (_offset >= stream.Length) { + ThrowException(PythonOps.ValueError("mmap offset is greater than file size")); + } + + void ThrowException(Exception ex) { + if (_handle is not null && _sourceStream is not null) { // .NET 6.0 on POSIX only + _sourceStream.Dispose(); } + throw ex; } } } // end of constructor From f4cce8a47f272eb0761595677ccd2ad84da0e65a Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Tue, 7 Jan 2025 13:12:12 -0800 Subject: [PATCH 5/8] Implement proper lifetime management of handles --- Src/IronPython.Modules/mmap.cs | 95 ++++++++++++++++++++++------------ Src/IronPython.Modules/nt.cs | 2 +- 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index bfce0811c..2f2aa2ad2 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -5,7 +5,6 @@ #if FEATURE_MMAP using System; -using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.Globalization; @@ -15,7 +14,6 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Serialization; using System.Runtime.Versioning; using System.Text; using System.Threading; @@ -190,14 +188,21 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) { // use file descriptor #if NET8_0_OR_GREATER + // On .NET 8.0+ we can create a MemoryMappedFile directly from a file descriptor + stream.Flush(); CheckFileAccessAndSize(stream); - _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + fileno = Dup(fileno); + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: true); _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, stream.Length, _fileAccess, HandleInheritability.None, leaveOpen: true); #else - _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: false); + // On .NET 6.0 on POSIX we need to create a FileStream from the file descriptor + fileno = Dup(fileno); + _handle = new SafeFileHandle((IntPtr)fileno, ownsHandle: true); FileAccess fa = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; - // This may or may not work on Mono, but on Mono streams.ReadStream is FileStream (unless dupped in some cases) - _sourceStream = new FileStream(_handle, fa); + // This FileStream constructor may or may not work on Mono, but on Mono streams.ReadStream is FileStream + // (unless dupped in some cases, which are unsupported anyway) + // so Mono should not be in this else-branch + _sourceStream = new FileStream(new SafeFileHandle((IntPtr)fileno, ownsHandle: false), access: fa); #endif } // otherwise leaves _file as null and _sourceStream as null @@ -234,7 +239,7 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag _sourceStream.Length, _fileAccess, HandleInheritability.None, - true); + leaveOpen: true); } } @@ -243,6 +248,7 @@ public MmapDefault(CodeContext/*!*/ context, int fileno, long length, string tag } catch { _file.Dispose(); _file = null; + CloseFileHandle(); throw; } _position = 0L; @@ -257,33 +263,54 @@ void CheckFileAccessAndSize(Stream stream) { _ => false }; - if (!isValid) { - ThrowException(PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode")); - } - - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - // Unix map does not support increasing size on open - if (length != 0 && _offset + length > stream.Length) { - ThrowException(PythonOps.ValueError("mmap length is greater than file size")); + try { + if (!isValid) { + throw PythonOps.OSError(PythonExceptions._OSError.ERROR_ACCESS_DENIED, "Invalid access mode"); } - } - if (length == 0 && stream.Length == 0) { - ThrowException(PythonOps.ValueError("cannot mmap an empty file")); - } - if (_offset >= stream.Length) { - ThrowException(PythonOps.ValueError("mmap offset is greater than file size")); - } - void ThrowException(Exception ex) { - if (_handle is not null && _sourceStream is not null) { // .NET 6.0 on POSIX only - _sourceStream.Dispose(); + if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + // Unix map does not support increasing size on open + if (length != 0 && _offset + length > stream.Length) { + throw PythonOps.ValueError("mmap length is greater than file size"); + } + } + if (length == 0 && stream.Length == 0) { + throw PythonOps.ValueError("cannot mmap an empty file"); + } + if (_offset >= stream.Length) { + throw PythonOps.ValueError("mmap offset is greater than file size"); } - throw ex; + } catch { + CloseFileHandle(); + throw; } } } // end of constructor + // TODO: Move to PythonNT - POSIX + private static int Dup(int fd) { + int fd2 = Mono.Unix.Native.Syscall.dup(fd); + if (fd2 == -1) throw PythonNT.GetLastUnixError(); + + try { + // set close-on-exec flag + int flags = Mono.Unix.Native.Syscall.fcntl(fd2, Mono.Unix.Native.FcntlCommand.F_GETFD); + if (flags == -1) throw PythonNT.GetLastUnixError(); + + const int FD_CLOEXEC = 1; // TODO: Move to module fcntl + flags |= FD_CLOEXEC; + flags = Mono.Unix.Native.Syscall.fcntl(fd2, Mono.Unix.Native.FcntlCommand.F_SETFD, flags); + if (flags == -1) throw PythonNT.GetLastUnixError(); + } catch { + Mono.Unix.Native.Syscall.close(fd2); + throw; + } + + return fd2; + } + + public object __len__() { using (new MmapLocker(this)) { return ReturnLong(_view.Capacity); @@ -409,17 +436,21 @@ private void CloseWorker() { _view.Flush(); _view.Dispose(); _file.Dispose(); - if (_handle is not null) { - // mmap owns _sourceStream too in this case - _sourceStream?.Dispose(); - _handle.Dispose(); - } + CloseFileHandle(); _sourceStream = null; _view = null; _file = null; } } + private void CloseFileHandle() { + if (_handle is not null) { + // mmap owns _sourceStream too (if any) in this case + _sourceStream?.Dispose(); + _handle.Dispose(); + } + } + public object find([NotNone] IBufferProtocol s) { using (new MmapLocker(this)) { return FindWorker(s, Position, _view.Capacity); @@ -646,7 +677,7 @@ public void resize(long newsize) { } if (_sourceStream == null) { - if (_handle is not null && !_handle.IsInvalid + if (_handle is not null && !_handle.IsInvalid && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { // resize on Posix platforms PythonNT.ftruncateUnix(unchecked((int)_handle.DangerousGetHandle()), newsize); diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index ca843c658..2db96b695 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -2417,7 +2417,7 @@ private static Exception DirectoryExistsError(string? filename) { #if FEATURE_NATIVE - private static Exception GetLastUnixError(string? filename = null, string? filename2 = null) + internal static Exception GetLastUnixError(string? filename = null, string? filename2 = null) => GetOsError(Mono.Unix.Native.NativeConvert.FromErrno(Mono.Unix.Native.Syscall.GetLastError()), filename, filename2); #endif From a858fcd7c5a7b538091e22e0ee9555681fd647df Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Tue, 7 Jan 2025 14:19:02 -0800 Subject: [PATCH 6/8] Describe Rules of Engagement --- Src/IronPython.Modules/mmap.cs | 153 ++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 2 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index 2f2aa2ad2..c3f8c2d06 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information. @@ -26,6 +26,155 @@ using Microsoft.Scripting.Utils; using Microsoft.Win32.SafeHandles; +/* +MemoryMappedFile — Rules of Engagement on .NET +============================================== + +In .NET, there are the following fields of `MemoryMappedFile` related to the lifetime management of +resources. +* `private readonly SafeMemoryMappedFileHandle _handle;` created in the constructor; necessary to + operate on the mmap, always disposed. +* `private readonly bool _leaveOpen;` initialized to a constructor parameter value; it pertains to + `_fileHandle` not `_handle` +* `private readonly SafeFileHandle? _fileHandle;` may be provided to the constructor, created by the + constructor, or null. + +Note that there is no field that captures `FileStream`. If a `FileStream` instance is provided to +the factory method, it will only be used once to get its file handle, which fate is controlled by +`_leaveOpen`. The `FileStream` instance itself is not disposed by `MemoryMappedFile`. A bit strange, +since `FileStream` has a destructor and may be lingering around. However, when its `Dispose` is +called from within the finalizer, it will not try to dispose the file handle, which is the whole +point. + +`MemoryMappedFile` itself is `IDisposable` and its `Dispose` does: +* dispose `_handle`, unless `_handle.IsClosed` already. +* if not `_leaveOpen` and `_fileHandle` is not null, dispose `_fileHandle`. + +There are several factory/constructor groups of `MemoryMappedFile`: + +## Factory Method Group #1 (Windows only) + +Opens an existing named memory mapped file by name. In this case, only `_handle` is initialized; +there is no underlying `_fileHandle`. It delegates opening to `OpenCore(mapName, inheritability, +desiredAccessRights, false);` **This group functions only on Windows.** + +## Factory Method Group #2 + +Creates a new memory mapped file where the content is taken from an existing file on disk. + +If the factory method is given a file path, it creates its own `FileStream`, stores its handle in +`_fileHandle`, and ensures that the file handle gets closed on dispose (`_leaveOpen` is false). + +If the factory method is given a file handle, it is stored in `_fileHandle` and its lifetime is +controlled by parameter `leaveOpen` given to the same method. If `leaveOpen` is true, the caller is +responsible of disposing the file handle. + +If the factory method is given a `fileStream`, it is used to get the file length, flush the stream, +and to extract the file handle into `_fileHandle`. Whether the extracted file handle is disposed +depend on parameter `leaveOpen`. `FileStream` itself is never disposed. + +It delegates the opening to `CreateCore(fileHandle, mapName, HandleInheritability.None, access, +MemoryMappedFileOptions.None, capacity, fileSize);` (see below for mode details on POSIX). + +**On POSIX, mapName must be null.** + +## Factory Method Group #3 (not POSIX) + +Creates a new empty memory mapped file. It only accepts a map name, and never creates/uses an +existing file from the file system. It delegates the creation to `CreateCore(fileHandle: null, +mapName, inheritability, access, options, capacity, -1);` + +**On POSIX, mapName must be null so practically this group cannot be used on POSIX.** + +## Factory Method Group #4 (Windows only) + +Creates a new empty memory mapped file or opens an existing memory mapped file if one exists with +the same name. In this factory method/constructor, there is no file stream or file handle involved; +If the map of the requested name exists, it is like opening from group #1, if it doesn't, it is like +group #3 **This group functions only on Windows.** + +## Behaviour on POSIX + +Only Group #2 can be used so it means that the factory/constructor must be given one of: +* file path (`string`) +* file handle (`SafeFileHandle`), may be null for an anonymous empty map +* file stream (`FileStream`) + +The actual work is done by `CreateCore` (POSIX-specific). If given a null file handle (from factory +method `CreateNew`), `CreateCore` may create its own file stream if needed. This file stream/handle +is not saved in a field `_fileHandle` of the map itself, but when the handle is passed to the +constructor of `SafeMemoryMappedFileHandle`, it is also marked as `ownsFileStream`, so disposing the +map will dispose the file handle. In all normal cases, i.e. `filehandle` is not null but passed by +the factory method, the lifetime of the file handle is controlled by `_leaveOpen` of +`MemoryMappedFile` and `SafeMemoryMappedFileHandle` is created with the argument `ownsFileStream` +set to false. The constructor to `SafeMemoryMappedFileHandle` does `DangerousAddRef` to the given +file stream handle (if any), so that when the original file is disposed, the handle is still valid. +On POSIX, the mmap handle value will be set to the same value as file handle value, which is the +file descriptor. For mmaps without the underlying file stream, the handle is set originally to +`IntPtr.MaxValue` so that it is valid but does not collide with any existing file descriptor. When +the mmap handle is released, it also does `DangerousRelease` of the underlying file stream handle +(if any), plus `Dispose` of it if it owned the file stream. + +`SafeFileHandle` closes the underlying file descriptor on dispose and sets it to invalid. It is OK +to close the handle several times, or even if it add-reffed and in use somewhere else. The +descriptor will be closed ass soon as the refcount is released, and in the meantime, it will prevent +future addrefs. + +MmapDefault - Rules of Engagement +================================= + +`MmapDefault` is the workhorse for Python's `mmap`. It contains all the code necessary to run on all +supported platforms. The two subclasses `MmapWindows` and `MmapPosix` only contain platform specific +constructors, to adhere to Python API. + +The relevant lifetime-sensitive (disposable) fields are: +* `MemoryMappedFile _file;` created in the constructor, may be recreated on resize +* `MemoryMappedViewAccessor _view;` created in the constructor, may be recreated on resize +* `FileStream _sourceStream;` the underlying file object, may be null +* `SafeFileHandle _handle;` the handle of the underlying file object, only used on some POSIX + platforms, null otherwise + +## .NET 8.0+/POSIX + +When the constructor is given a file descriptor, it is duplicated, saved in `_handle` and used to +create the memory-mapped `_file`. The duplication of the file descriptor is CPython's behaviour; +since Python 3.13 the constructor accepts a keyword-only argument `trackfd` prevents the duplication +but it is not implemented here. `_handle` always owns the (duplicated) file descriptor, so it has to +be disposed appropriately, if created. `_file` is created instructed to leave the file handle open, +so it is possible to dispose it and recreate again on `resize`. + +## .NET 6.0/POSIX + +The factory method to create a memory-mapped file from a file descriptor is not available. The +descriptor is still duplicated and saved in `_handle` like on .NET 8.0 but it is used to create a +`FileStream` which is then used to create the memory-mapped file. The created `FileStream` is saved +in `_fileStream` since it is useful to perform various file operations. However, it does not own the +file descriptor, so the rules of engagement for `_handle` from .NET 8.0 still apply. Because of +that, it is not essential to dispose `_sourceStream` in this case, but a good practice since it will +suppress its finalizer. Also the memory-mapped `_file` is created instructed to leave the file +handle open to prevent the closure of the file descriptor when the memory-mapped file is re-created. + +## Windows, all frameworks + +On Windows, the file descriptor is emulated by `PythonFileManager`, the file handle is not +duplicated and field `_handle` is always null. The associated file stream is retrieved from +`PythonFileManager` and used to create the memory-mapped file. The file stream is saved in +`_sourceStream`, but since it comes from somewhere else, it must not be disposed here. Therefore the +memory-mapped file is created instructed to leave the file handle open and there is no +`_sourceStream.Dispose` call on disposing `MmapDefault`. The `MemoryMappedFile` constructor will +addref the actual file handle internally, so it is safe to keep using `mmap` even if the original +file stream is closed prematurely. Of course, it is still important to dispose the `mmap` object to +release the reference to the file handle. + +## Mono + +Mono uses genuine file descriptors, however due to bugs and limitations, it cannot use the +.NET/POSIX mechanics. Therefore, to prevent regressions, it follows the Windows way (to the extent +that it is feasible), but more advanced scenarios will not behave correctly. + +*/ + + [assembly: PythonModule("mmap", typeof(IronPython.Modules.MmapModule))] namespace IronPython.Modules { public static class MmapModule { @@ -137,7 +286,7 @@ public class MmapDefault : IWeakReferenceable { private readonly long _offset; private readonly string _mapName; private readonly MemoryMappedFileAccess _fileAccess; - private readonly SafeFileHandle _handle; // only used on some POSIX platforms, null otherwise + private readonly SafeFileHandle _handle; private volatile bool _isClosed; private int _refCount = 1; From 3bce846352a3c4fcb7044f344e86749410cf3eda Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Tue, 7 Jan 2025 14:38:43 -0800 Subject: [PATCH 7/8] Implement mmap.resize on POSIX --- Src/IronPython.Modules/mmap.cs | 37 ++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index c3f8c2d06..0e6da7277 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information. @@ -825,12 +825,37 @@ public void resize(long newsize) { throw PythonOps.TypeError("mmap can't resize a readonly or copy-on-write memory map."); } - if (_sourceStream == null) { - if (_handle is not null && !_handle.IsInvalid - && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { - // resize on Posix platforms - PythonNT.ftruncateUnix(unchecked((int)_handle.DangerousGetHandle()), newsize); + if (_handle is not null + && (RuntimeInformation.IsOSPlatform(OSPlatform.OSX) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux))) { + // resize on Posix platforms + try { + if (_handle.IsInvalid) { + throw PythonOps.OSError(PythonErrorNumber.EBADF, "Bad file descriptor"); + } + _view.Flush(); + _view.Dispose(); + _file.Dispose(); + + // Resize the underlying file as needed. + int fd = unchecked((int)_handle.DangerousGetHandle()); + PythonNT.ftruncateUnix(fd, newsize); + + #if NET8_0_OR_GREATER + _file = MemoryMappedFile.CreateFromFile(_handle, _mapName, newsize, _fileAccess, HandleInheritability.None, leaveOpen: true); + #else + _sourceStream?.Dispose(); + _sourceStream = new FileStream(new SafeFileHandle((IntPtr)fd, ownsHandle: false), FileAccess.ReadWrite); + _file = CreateFromFile(_sourceStream, _mapName, newsize, _fileAccess, HandleInheritability.None, leaveOpen: true); + #endif + _view = _file.CreateViewAccessor(_offset, newsize, _fileAccess); + return; + } catch { + close(); + throw; } + } + + if (_sourceStream == null) { // resizing is not supported without an underlying file throw WindowsError(PythonExceptions._OSError.ERROR_INVALID_PARAMETER); } From 91fc9f45ffde8281afaf28c0075b1c0350e07a97 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Wed, 8 Jan 2025 19:37:08 -0800 Subject: [PATCH 8/8] Update after review --- Src/IronPython.Modules/mmap.cs | 4 ++-- Src/IronPython.Modules/nt.cs | 3 +-- Src/IronPython/Modules/_fileio.cs | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index 0e6da7277..434ab8adc 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -116,8 +116,8 @@ file stream handle (if any), so that when the original file is disposed, the han (if any), plus `Dispose` of it if it owned the file stream. `SafeFileHandle` closes the underlying file descriptor on dispose and sets it to invalid. It is OK -to close the handle several times, or even if it add-reffed and in use somewhere else. The -descriptor will be closed ass soon as the refcount is released, and in the meantime, it will prevent +to close the handle several times, or even if it is add-reffed and in use somewhere else. The +descriptor will be closed as soon as the refcount is released, and in the meantime, it will prevent future addrefs. MmapDefault - Rules of Engagement diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index 2db96b695..3412cd868 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -438,7 +438,7 @@ private static int UnixDup(int fd, int fd2, out Stream? stream) { if (ClrModule.IsMono) { // Elaborate workaround on Mono to avoid UnixStream as out stream = new Mono.Unix.UnixStream(res, ownsHandle: false); - FileAccess fileAccess = stream.CanRead ? stream.CanWrite ? FileAccess.ReadWrite : FileAccess.Read : FileAccess.Write; + FileAccess fileAccess = stream.CanWrite ? stream.CanRead ? FileAccess.ReadWrite : FileAccess.Write : FileAccess.Read; stream.Dispose(); try { // FileStream on Mono created with a file descriptor might not work: /~https://github.com/mono/mono/issues/12783 @@ -916,7 +916,6 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f // Use PosixFileStream to operate on fd directly // On Mono, we must use FileStream due to limitations in MemoryMappedFile Stream s = PosixFileStream.Open(path, flags, unchecked((uint)mode), out int fd); - //Stream s = PythonIOModule.FileIO.OpenFilePosix(path, flags, mode, out int fd); if ((flags & O_APPEND) != 0) { s.Seek(0L, SeekOrigin.End); } diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index e67592af3..61dbcc600 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -109,7 +109,6 @@ public FileIO(CodeContext/*!*/ context, [NotNone] string name, [NotNone] string // Use PosixFileStream to operate on fd directly // On Mono, we must use FileStream due to limitations in MemoryMappedFile var stream = PosixFileStream.Open(name, flags, 0b_110_110_110, out int fd); // mode: rw-rw-rw- - //var stream = OpenFilePosix(name, flags, 0b_110_110_110, out int fd); if ((flags & O_APPEND) != 0) { stream.Seek(0L, SeekOrigin.End); } @@ -152,7 +151,7 @@ public FileIO(CodeContext/*!*/ context, [NotNone] string name, [NotNone] string } if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { // On POSIX, register the file descriptor with the file manager right after file opening - // Because the .NET case is already handled above before `switch`, this branch runs only on Mono + // This branch is needed for Mono, the .NET case is already handled above before `switch` _context.FileManager.GetOrAssignId(_streams); // according to [documentation](https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.safefilehandle?view=net-9.0#remarks) // accessing SafeFileHandle sets the current stream position to 0