Files
PoyoLang/PoyoLang.Translator.SourceGenerator/PoyoLangTranslatorGenerator.cs

225 lines
6.3 KiB
C#

using System.Text;
using System.Text.Json;
using Microsoft.CodeAnalysis;
namespace PoyoLang.Translator.SourceGenerator;
[Generator]
public class PoyoLangTranslatorGenerator : IIncrementalGenerator
{
private const char IndentChar = '\t';
public void Initialize(IncrementalGeneratorInitializationContext context)
{
var texts = context.AdditionalTextsProvider;
// There will be only one of those but incremental generators work as pipelines
var dictionaries = texts
.Where(static text => text.Path.EndsWith("dictionary.json"))
.Select(static (text, _) => text.GetText());
var parsedDictionaries = dictionaries
.Select(static (dictionary, _) =>
JsonSerializer.Deserialize<Dictionary<string, string>>(dictionary!.ToString())
);
var formattedDictionaries = parsedDictionaries
.Select(static (dictionary, _) =>
{
// Reverse dictionary order to have ngrams first
return dictionary!.OrderBy(p => p.Value).ToDictionary(p => p.Value, p => p.Key);
});
var prefixTrees = formattedDictionaries
.Select(static (formattedDictionary, _) => BuildPrefixTree(formattedDictionary));
context.RegisterSourceOutput(prefixTrees, static (sourceProductionContext, prefixTree) =>
{
sourceProductionContext.AddSource("PoyoLangTranslator.g.cs", GenerateSource(prefixTree));
});
}
private static List<Node> BuildPrefixTree(Dictionary<string, string> dictionary)
{
var rootNodes = new List<Node>();
var firstNodes = dictionary.Where(p => p.Key.Length is 1);
foreach (var firstNode in firstNodes)
{
var letter = firstNode.Key[0];
var target = firstNode.Value;
var node = new Node(letter, target);
rootNodes.Add(node);
// Add sub-nodes
ParseNodes(node, letter.ToString());
}
return rootNodes;
void ParseNodes(Node node, string prefix)
{
// Find nodes that have previous node as prefixed
var subNodes = dictionary
.Where(p => p.Key.StartsWith(prefix) && p.Key.Length == prefix.Length + 1);
foreach (var subNode in subNodes)
{
var letter = subNode.Key[prefix.Length];
var target = subNode.Value;
var newPrefix = $"{prefix}{letter}";
var newNode = new Node(letter, target);
node.Nodes.Add(newNode);
// Recursively add sub-nodes
ParseNodes(newNode, newPrefix);
}
}
}
private static string GenerateSource(List<Node> rootNodes)
{
var source = new StringBuilder();
// Usings and namespace
source.Append(
"""
using System;
using System.Text;
namespace PoyoLang.Translator;
"""
);
// Partial class definition
source.Append(
"""
public partial class PoyoLangTranslator
{
"""
);
// Next letter method definition
source.Append(
"""
public void NextLetter(ref ReadOnlySpan<char> text, StringBuilder output)
{
"""
);
// 0 length case and caps
source.Append(
"""
if (text.Length < 1)
{
return;
}
var isCaps = char.IsUpper(text[0]);
"""
);
GenerateSwitchCases(rootNodes, depth: 0, source: source);
// Next letter method end
source.Append(
"""
// Punctuation/Unknown characters case
text = text[1..];
output.Append(text[0]);
}
"""
);
// Partial class end
source.Append(
"""
}
"""
);
return source.ToString();
}
private static void GenerateSwitchCases(List<Node> nodes, int depth, StringBuilder source)
{
var indent = Indent(depth * 3);
// Switch-case start
source.Append(
$$"""
{{indent}}switch (text[{{depth}}])
{{indent}}{
"""
);
foreach (var node in nodes)
{
var targetLower = node.Target;
var targetUpper = ToTitleCase(targetLower);
// Case start
source.Append(
$$"""
{{indent}} case '{{node.Letter}}' or '{{char.ToUpper(node.Letter)}}':
{{indent}}
"""
);
// Sub nodes handling
if (node.Nodes.Count > 0)
{
source.Append(
$$"""
{{indent}} if (text.Length > {{depth + 1}})
{{indent}} {
"""
);
// Sub nodes
GenerateSwitchCases(node.Nodes, depth + 1, source);
source.Append(
$$"""
{{indent}} }
"""
);
}
// Current node handling fallback
source.Append(
$$"""
{{indent}}
{{indent}} text = text[{{depth + 1}}..];
{{indent}}
{{indent}} output.Append(isCaps ? "{{targetUpper}}" : "{{targetLower}}");
{{indent}}
{{indent}} return;
"""
);
}
// Switch-case end
source.Append(
$$"""
{{indent}}}
"""
);
}
private static string ToTitleCase(string text)
{
return $"{char.ToUpper(text[0])}{text[1..]}";
}
private static string Indent(int depth) => new(IndentChar, depth);
}