Files
2022/src/Util.cs
Parnic 483f1f2502 Strip more BOMs
The 01a sample text file I created in VS2022 has a big-endian utf-16 byte order mark in front. When read with File.Read* methods it was fine, but when piped in from the command line it was causing the integer parsing to fail due to the extra bytes on the front. This is the cleanest way I can find to strip any of a set of BOMs from a string. Maybe there's an easier way somewhere, but the main issue is that using something like line[0..1].SequenceEqual() may or may not use wide chars (0xFEFF as opposed to 0xFE, 0xFF, for example) so I can't just build a list of preamble arrays and check if the byte version of the string starts with them.

The "StripPreamble" character list is still a mystery. I found it while working on my Macbook for aoc2021 and can't seem to find any search results indicating what these bytes represent, so...just gonna leave it there.
2022-12-01 11:59:43 -06:00

119 lines
3.5 KiB
C#

using System.Diagnostics;
using System.Reflection;
using System.Text;
namespace aoc2022;
internal static class Util
{
private static readonly char[] StripPreamble = { (char)8745, (char)9559, (char)9488, };
private static readonly Encoding[] StripBOMsFromEncodings = { Encoding.UTF8, Encoding.Unicode, Encoding.BigEndianUnicode, };
private static void ReadData(string inputName, Action<string> processor)
{
if (Console.IsInputRedirected)
{
bool processedSomething = false;
for (int i = 0; Console.In.ReadLine() is { } line; i++)
{
if (i == 0)
{
if (line[0..StripPreamble.Length].SequenceEqual(StripPreamble))
{
line = line[StripPreamble.Length..];
}
else
{
foreach (var encoding in StripBOMsFromEncodings)
{
if (line.StartsWith(encoding.GetString(encoding.GetPreamble()), StringComparison.Ordinal))
{
line = line[encoding.GetPreamble().Length..];
}
}
}
}
processor(line);
if (!string.IsNullOrEmpty(line))
{
processedSomething = true;
}
}
if (processedSomething)
{
return;
}
}
var filename = $"inputs/{inputName}.txt";
if (File.Exists(filename))
{
foreach (var line in File.ReadLines(filename))
{
processor(line);
}
return;
}
// typeof(Util) is not technically correct since what we need is the "default namespace,"
// but "default namespace" is a Project File concept, not a C#/.NET concept, so it's not
// accessible at runtime. instead, we assume Util is also part of the "default namespace"
var resourceName = $"{typeof(Util).Namespace}.inputs.{inputName}.txt";
using var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName);
using StreamReader reader = new(stream!);
while (reader.ReadLine() is { } readLine)
{
processor(readLine);
}
}
internal static string ReadAllText(string filename)
{
string contents = string.Empty;
ReadData(filename, (line) => contents = line);
return contents;
}
internal static IEnumerable<string> ReadAllLines(string filename)
{
List<string> lines = new();
ReadData(filename, (line) => lines.Add(line));
return lines;
}
internal static IEnumerable<long> ReadAllLinesAsInts(string filename)
{
return ReadAllLines(filename).Select(long.Parse);
}
internal static void StartTestSet(string name)
{
Logger.Log($"<underline>test: {name}<r>");
}
internal static void StartTest(string label)
{
Logger.Log($"<magenta>{label}<r>");
}
internal static void TestCondition(Func<bool> a, bool printResult = true)
{
if (a.Invoke() == false)
{
Debug.Assert(false);
if (printResult)
{
Logger.Log("<red>x<r>");
}
}
else
{
if (printResult)
{
Logger.Log("<green>✓<r>");
}
}
}
}