Skip to content

Commit

Permalink
use encoded-word for Unicode parameter values
Browse files Browse the repository at this point in the history
  • Loading branch information
IS4Code committed May 3, 2023
1 parent 6012ac8 commit 527137b
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 3 deletions.
19 changes: 19 additions & 0 deletions SFI.Tests/UnitTests/TextToolsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ public void FormatComponentNameTests(string name, string expected)
Assert.AreEqual(expected, result);
}

/// <summary>
/// The tests for <see cref="FormatMimeParameter(string)"/>.
/// </summary>
[TestMethod]
[DataRow("token", "token")]
[DataRow("with space", "\"with space\"")]
[DataRow("with\nnewline", "\"with\nnewline\"")]
[DataRow("with\"quote", "\"with\\\"quote\"")]
[DataRow("with\rreturn", "\"with\\\rreturn\"")]
[DataRow("with\u00B7unicode", "\"=?utf-8?Q?with=C2=B7unicode?=\"")]
[DataRow("with\u00B7unicode and space", "\"=?utf-8?Q?with=C2=B7unicode_and_space?=\"")]
[DataRow("=?.?.?.?=", "\"=?utf-8?B?PT8uPy4/Lj89?=\"")]
[DataRow("=?.?.?very long text to warrant Q?=", "\"=?utf-8?Q?=3D=3F.=3F.=3Fvery_long_text_to_warrant_Q=3F=3D?=\"")]
public void FormatMimeParameterTests(string value, string expected)
{
var result = FormatMimeParameter(value);
Assert.AreEqual(expected, result);
}

/// <summary>
/// The tests for <see cref="GetUserFriendlyName{T}(T)"/>.
/// </summary>
Expand Down
52 changes: 49 additions & 3 deletions SFI/TextTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,12 @@ public static string GetImpliedMediaTypeFromInterpreter(string interpreter)
return "application/prs.implied-executable;interpreter=" + FormatMimeParameter(interpreter);
}

static readonly Regex invalidTokenCharacters = new(@"[][()<>@,;:\\""/?=\x00-\x20\x7F]", RegexOptions.Compiled);

static readonly Regex invalidTokenCharacters = new(@"[][()<>@,;:\\""/?=\x00-\x20\x7F-\uFFFF]", RegexOptions.Compiled);
static readonly Regex encodedWord = new(@"^=\?[^?]*\?[^?]*\?[^?]*\?=$", RegexOptions.Compiled);
static readonly Regex escapedQuotedCharacters = new(@"[""\\\r]", RegexOptions.Compiled);

static readonly Encoding mimeParamEncoding = Encoding.UTF8;

/// <summary>
/// Formats the value of a MIME parameter.
/// </summary>
Expand All @@ -253,9 +255,53 @@ public static string FormatMimeParameter(string value)
{
if(!invalidTokenCharacters.IsMatch(value))
{
// The value is okay as a standalone token.
return value;
}
return "\"" + escapedQuotedCharacters.Replace(value, @"\\$0") + "\"";
if(!value.Any(c => c > '\x7F') && !encodedWord.IsMatch(value))
{
// The value does not need encoding.
return "\"" + escapedQuotedCharacters.Replace(value, @"\$0") + "\"";
}
// Produce encoded-word from the value (might not be according to RFC 2047,
// but corresponds to what the ContentType class does).
// TODO: Support for RFC 2231?
var bytes = mimeParamEncoding.GetBytes(value);
string base64Encoded = Convert.ToBase64String(bytes);
var qEncoded = new StringBuilder();
qEncoded.Append("\"=?");
var encodingName = mimeParamEncoding.WebName.ToLowerInvariant();
qEncoded.Append(encodingName);
qEncoded.Append("?Q?");
var prefixLength = qEncoded.Length;
foreach(var b in bytes)
{
switch(b)
{
case (byte)' ':
qEncoded.Append('_');
break;
case (byte)'\\':
case (byte)'"':
qEncoded.Append('\\');
qEncoded.Append((char)b);
break;
case < 0x20 or > 0x7F or (byte)'=' or (byte)'?':
qEncoded.Append('=');
qEncoded.Append(b.ToString("X2"));
break;
default:
qEncoded.Append((char)b);
break;
}
if(qEncoded.Length - prefixLength > base64Encoded.Length)
{
// Q-encoded is longer, use B
return $"\"=?{encodingName}?B?{base64Encoded}?=\"";
}
}
qEncoded.Append("?=\"");
return qEncoded.ToString();
}

static readonly Regex mimeNameRegex = new(@"^[^/;]+/(?:vnd\.|prs\.|x-|)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
Expand Down

0 comments on commit 527137b

Please sign in to comment.