normalize method
Validates and normalizes the base64 encoded data in source.
Only acts on the substring from start to end, with end
defaulting to the end of the string.
Normalization will:
- Unescape any
%-escapes. - Only allow valid characters (
A-Z,a-z,0-9,/and+). - Normalize a
_or-character to/or+. - Validate that existing padding (trailing
=characters) is correct. - If no padding exists, add correct padding if necessary and possible.
- Validate that the length is correct (a multiple of four).
Implementation
String normalize(String source, [int start = 0, int end]) {
end = RangeError.checkValidRange(start, end, source.length);
const int percent = 0x25;
const int equals = 0x3d;
StringBuffer buffer = null;
int sliceStart = start;
var alphabet = _Base64Encoder._base64Alphabet;
var inverseAlphabet = _Base64Decoder._inverseAlphabet;
int firstPadding = -1;
int firstPaddingSourceIndex = -1;
int paddingCount = 0;
for (int i = start; i < end;) {
int sliceEnd = i;
int char = source.codeUnitAt(i++);
int originalChar = char;
// Normalize char, keep originalChar to see if it matches the source.
if (char == percent) {
if (i + 2 <= end) {
char = parseHexByte(source, i); // May be negative.
i += 2;
// We know that %25 isn't valid, but our table considers it
// a potential padding start, so skip the checks.
if (char == percent) char = -1;
} else {
// An invalid HEX escape (too short).
// Just skip past the handling and reach the throw below.
char = -1;
}
}
// If char is negative here, hex-decoding failed in some way.
if (0 <= char && char <= 127) {
int value = inverseAlphabet[char];
if (value >= 0) {
char = alphabet.codeUnitAt(value);
if (char == originalChar) continue;
} else if (value == _Base64Decoder._padding) {
// We have ruled out percent, so char is '='.
if (firstPadding < 0) {
// Mark position in normalized output where padding occurs.
firstPadding = (buffer?.length ?? 0) + (sliceEnd - sliceStart);
firstPaddingSourceIndex = sliceEnd;
}
paddingCount++;
// It could have been an escaped equals (%3D).
if (originalChar == equals) continue;
}
if (value != _Base64Decoder._invalid) {
buffer ??= new StringBuffer();
buffer.write(source.substring(sliceStart, sliceEnd));
buffer.writeCharCode(char);
sliceStart = i;
continue;
}
}
throw new FormatException("Invalid base64 data", source, sliceEnd);
}
if (buffer != null) {
buffer.write(source.substring(sliceStart, end));
if (firstPadding >= 0) {
// There was padding in the source. Check that it is valid:
// * result length a multiple of four
// * one or two padding characters at the end.
_checkPadding(source, firstPaddingSourceIndex, end, firstPadding,
paddingCount, buffer.length);
} else {
// Length of last chunk (1-4 chars) in the encoding.
int endLength = ((buffer.length - 1) % 4) + 1;
if (endLength == 1) {
// The data must have length 0, 2 or 3 modulo 4.
throw new FormatException(
"Invalid base64 encoding length ", source, end);
}
while (endLength < 4) {
buffer.write("=");
endLength++;
}
}
return source.replaceRange(start, end, buffer.toString());
}
// Original was already normalized, only check padding.
int length = end - start;
if (firstPadding >= 0) {
_checkPadding(source, firstPaddingSourceIndex, end, firstPadding,
paddingCount, length);
} else {
// No padding given, so add some if needed it.
int endLength = length % 4;
if (endLength == 1) {
// The data must have length 0, 2 or 3 modulo 4.
throw new FormatException(
"Invalid base64 encoding length ", source, end);
}
if (endLength > 1) {
// There is no "insertAt" on String, but this works as well.
source = source.replaceRange(end, end, (endLength == 2) ? "==" : "=");
}
}
return source;
}