Files
PoyoLang/PoyoLang.Translator.SourceGenerator/PoyoLangTranslatorGenerator.cs
2025-05-16 14:07:02 +02:00

377 lines
9.9 KiB
C#

using System.Text;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Text;
namespace PoyoLang.Translator.SourceGenerator;
[Generator]
public class PoyoLangTranslatorGenerator : IIncrementalGenerator
{
private const char IndentChar = '\t';
public void Initialize(IncrementalGeneratorInitializationContext context)
{
var texts = context.AdditionalTextsProvider;
// There will be only one of those but incremental generators work as pipelines
var dictionaries = texts
.Where(static text => text.Path.EndsWith("dictionary.txt"))
.Select(static (text, _) => text.GetText());
var parsedDictionaries = dictionaries
.Select(static (dictionary, _) =>
ReadCustomDictionary(dictionary!)
);
var formattedDictionaries = parsedDictionaries
.Select(static (dictionary, _) =>
{
// Return normal and reverse dictionary order to have ngrams first
return (
Normal: dictionary,
Reversed: dictionary!.OrderBy(p => p.Value).ToDictionary(p => p.Value, p => p.Key)
);
});
var prefixTrees = formattedDictionaries
.Select(static (dictionaries, _) => (
Dictionary: dictionaries.Normal,
PrefixTree: BuildPrefixTree(dictionaries.Reversed)
));
context.RegisterSourceOutput(prefixTrees, static (sourceProductionContext, data) =>
{
sourceProductionContext.AddSource("PoyoLangTranslator.g.cs", GenerateSource(data.Dictionary, data.PrefixTree));
});
}
private static Dictionary<string, string> ReadCustomDictionary(SourceText text)
{
var dictionary = new Dictionary<string, string>();
foreach (var line in text.ToString().Split('\n'))
{
var span = line.TrimEnd('\r').AsSpan();
// Reached end of file
if (span.Length < 1)
{
break;
}
var splitIndex = span.IndexOf('=');
dictionary[span[..splitIndex].ToString()] = span[(splitIndex + 1)..].ToString();
}
return dictionary;
}
private static List<Node> BuildPrefixTree(Dictionary<string, string> dictionary)
{
var rootNodes = new List<Node>();
var firstNodes = dictionary.Where(p => p.Key.Length is 1);
foreach (var firstNode in firstNodes)
{
var letter = firstNode.Key[0];
var target = firstNode.Value;
var node = new Node(letter, target);
rootNodes.Add(node);
// Add sub-nodes
ParseNodes(node, letter.ToString());
}
return rootNodes;
void ParseNodes(Node node, string prefix)
{
// Find nodes that have previous node as prefixed
var subNodes = dictionary
.Where(p => p.Key.StartsWith(prefix) && p.Key.Length == prefix.Length + 1);
foreach (var subNode in subNodes)
{
var letter = subNode.Key[prefix.Length];
var target = subNode.Value;
var newPrefix = $"{prefix}{letter}";
var newNode = new Node(letter, target);
node.Nodes.Add(newNode);
// Recursively add sub-nodes
ParseNodes(newNode, newPrefix);
}
}
}
private static string GenerateSource(Dictionary<string, string> dictionary, List<Node> rootNodes)
{
var source = new StringBuilder();
// Usings and namespace
source.Append(
"""
using System;
using System.Text;
namespace PoyoLang.Translator;
"""
);
// Partial class definition
source.Append(
"""
public partial class PoyoLangTranslator
{
"""
);
GenerateNextLetterMethod(rootNodes, source);
GenerateFromPoyoMethod(dictionary, source);
// Partial class end
source.Append(
"""
}
"""
);
return source.ToString();
}
private static void GenerateNextLetterMethod(List<Node> rootNodes, StringBuilder source)
{
// Next letter method definition
source.Append(
"""
private void NextLetter(ref ReadOnlySpan<char> text, StringBuilder output)
{
"""
);
// 0 length case and caps
source.Append(
"""
if (text.Length < 1)
{
return;
}
var isCaps = char.IsUpper(text[0]);
"""
);
GenerateSwitchCases(rootNodes, depth: 0);
// Next letter method end
source.Append(
"""
// Punctuation/Unknown characters case
output.Append(text[0]);
text = text[1..];
}
"""
);
return;
void GenerateSwitchCases(List<Node> nodes, int depth)
{
var indent = Indent(depth * 3);
// Switch-case start
source.Append(
$$"""
{{indent}}switch (text[{{depth}}])
{{indent}}{
"""
);
foreach (var node in nodes)
{
var targetLower = node.Target;
var targetUpper = ToTitleCase(targetLower);
// Case start
source.Append(
$$"""
{{indent}} case '{{node.Letter}}' or '{{char.ToUpper(node.Letter)}}':
"""
);
// Sub nodes handling
if (node.Nodes.Count > 0)
{
source.Append(
$$"""
{{indent}} if (text.Length > {{depth + 1}})
{{indent}} {
"""
);
// Sub nodes
GenerateSwitchCases(node.Nodes, depth + 1);
source.Append(
$$"""
{{indent}} }
"""
);
}
// Current node handling fallback
source.Append(
$$"""
{{indent}}
{{indent}} text = text[{{depth + 1}}..];
{{indent}}
{{indent}} output.Append(isCaps ? "{{targetUpper}}" : "{{targetLower}}");
{{indent}}
{{indent}} return;
"""
);
}
// Switch-case end
source.Append(
$$"""
{{indent}}}
"""
);
}
}
private static void GenerateFromPoyoMethod(Dictionary<string, string> dictionary, StringBuilder source)
{
// From Poyo method definition
source.Append(
"""
private void FromPoyo(ref ReadOnlySpan<char> text, StringBuilder output)
{
"""
);
// Initial cases
source.Append(
"""
if (text.Length < 1)
{
return;
}
// This happens if the end of the text is not a poyo letter (punctuation for ex)
if (text.Length < 4)
{
output.Append(text);
text = text[^0..];
return;
}
var letter = text[..4];
"""
);
GenerateReverseSwitchCases();
// From Poyo method end
source.Append(
"""
// Advance in text
text = text[4..];
}
"""
);
return;
void GenerateReverseSwitchCases()
{
// Switch start
source.Append(
"""
switch (letter)
{
"""
);
foreach (var pair in dictionary)
{
// Non-caps case
source.Append(
$$"""
case "{{pair.Key}}":
output.Append("{{pair.Value}}");
break;
"""
);
// Caps case
source.Append(
$$"""
case "{{ToTitleCase(pair.Key)}}":
output.Append("{{ToTitleCase(pair.Value)}}");
break;
"""
);
}
// Switch end
source.Append(
"""
default:
// Not a poyo letter, only read 1 character (could be punctuation for ex)
output.Append(text[0]);
text = text[1..];
return;
}
"""
);
}
}
private static string ToTitleCase(string text)
{
return $"{char.ToUpper(text[0])}{text[1..]}";
}
private static string Indent(int depth) => new(IndentChar, depth);
}