IdCoderHeader fixed and added decoding headers to unicode
16.04.2011Download fixed idcoderheader.pas
unit IdCoderHeader; interface uses IdEMailAddress; type TTransfer = (bit7, bit8, iso2022jp); CSET = set of AnsiChar; // Procs function EncodeAddressItem(EmailAddr: TIdEmailAddressItem; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string; function EncodeHeader(const Header: string; specials: CSET; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string): string; function Encode2022JP(const S: ansistring): string; function EncodeAddress(EmailAddr: TIdEMailAddressList; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string; function DecodeHeader(Header: string): string; function WideDecodeHeader(Header: widestring): Widestring; function Decode2022JP(const S: string): string; procedure DecodeAddress(EMailAddr: TIdEmailAddressItem); procedure DecodeAddresses(AEMails: string; EMailAddr: TIdEmailAddressList); implementation uses IdGlobal, IdGlobalProtocols, SysUtils, Charsetconvertors; const csSPECIALS: CSET = ['(', ')', '[', ']', '<', '>', ':', ';', '.', ',', '@', '\', '"']; {Do not Localize} kana_tbl: array[#$A1..#$DF] of Word = ( $2123, $2156, $2157, $2122, $2126, $2572, $2521, $2523, $2525, $2527, $2529, $2563, $2565, $2567, $2543, $213C, $2522, $2524, $2526, $2528, $252A, $252B, $252D, $252F, $2531, $2533, $2535, $2537, $2539, $253B, $253D, $253F, $2541, $2544, $2546, $2548, $254A, $254B, $254C, $254D, $254E, $254F, $2552, $2555, $2558, $255B, $255E, $255F, $2560, $2561, $2562, $2564, $2566, $2568, $2569, $256A, $256B, $256C, $256D, $256F, $2573, $212B, $212C); vkana_tbl: array[#$A1..#$DF] of Word = ( $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $2574, $0000, $0000, $252C, $252E, $2530, $2532, $2534, $2536, $2538, $253A, $253C, $253E, $2540, $2542, $2545, $2547, $2549, $0000, $0000, $0000, $0000, $0000, $2550, $2553, $2556, $2559, $255C, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000); sj1_tbl: array[#128..#255] of Byte = ( $00, $21, $23, $25, $27, $29, $2B, $2D, $2F, $31, $33, $35, $37, $39, $3B, $3D, $3F, $41, $43, $45, $47, $49, $4B, $4D, $4F, $51, $53, $55, $57, $59, $5B, $5D, $00, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $01, $5F, $61, $63, $65, $67, $69, $6B, $6D, $6F, $71, $73, $75, $77, $79, $7B, $7D, $02, $02, $02, $02, $02, $02, $02, $02, $02, $02, $02, $02, $02, $00, $00, $00); sj2_tbl: array[AnsiChar] of Word = ( $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0000, $0021, $0022, $0023, $0024, $0025, $0026, $0027, $0028, $0029, $002A, $002B, $002C, $002D, $002E, $002F, $0030, $0031, $0032, $0033, $0034, $0035, $0036, $0037, $0038, $0039, $003A, $003B, $003C, $003D, $003E, $003F, $0040, $0041, $0042, $0043, $0044, $0045, $0046, $0047, $0048, $0049, $004A, $004B, $004C, $004D, $004E, $004F, $0050, $0051, $0052, $0053, $0054, $0055, $0056, $0057, $0058, $0059, $005A, $005B, $005C, $005D, $005E, $005F, $0000, $0060, $0061, $0062, $0063, $0064, $0065, $0066, $0067, $0068, $0069, $006A, $006B, $006C, $006D, $006E, $006F, $0070, $0071, $0072, $0073, $0074, $0075, $0076, $0077, $0078, $0079, $007A, $007B, $007C, $007D, $007E, $0121, $0122, $0123, $0124, $0125, $0126, $0127, $0128, $0129, $012A, $012B, $012C, $012D, $012E, $012F, $0130, $0131, $0132, $0133, $0134, $0135, $0136, $0137, $0138, $0139, $013A, $013B, $013C, $013D, $013E, $013F, $0140, $0141, $0142, $0143, $0144, $0145, $0146, $0147, $0148, $0149, $014A, $014B, $014C, $014D, $014E, $014F, $0150, $0151, $0152, $0153, $0154, $0155, $0156, $0157, $0158, $0159, $015A, $015B, $015C, $015D, $015E, $015F, $0160, $0161, $0162, $0163, $0164, $0165, $0166, $0167, $0168, $0169, $016A, $016B, $016C, $016D, $016E, $016F, $0170, $0171, $0172, $0173, $0174, $0175, $0176, $0177, $0178, $0179, $017A, $017B, $017C, $017D, $017E, $0000, $0000, $0000); base64_tbl: array[0..63] of Char = ( 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', {Do not Localize} 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', {Do not Localize} 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', {Do not Localize} 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', {Do not Localize} 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', {Do not Localize} 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', {Do not Localize} 'w', 'x', 'y', 'z', '0', '1', '2', '3', {Do not Localize} '4', '5', '6', '7', '8', '9', '+', '/'); {Do not Localize} function EncodeAddressItem(EmailAddr: TIdEmailAddressItem; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string; var S: string; I: Integer; NeedEncode: Boolean; begin if ((AUseAddressForNameIfNameMissing = True) and (EmailAddr.Name = '')) then begin {CC: Use Address as Name...} EmailAddr.Name := EmailAddr.Address; end; if EmailAddr.Name <> '' then {Do not Localize} begin NeedEncode := False; for I := 1 to Length(EmailAddr.Name) do begin if (EmailAddr.Name[I] < #32) or (EmailAddr.Name[I] >= #127) then begin NeedEncode := True; Break; end; end; if NeedEncode then S := EncodeHeader(EmailAddr.Name, csSPECIALS, HeaderEncoding, TransferHeader, MimeCharSet) else begin { quoted string } S := '"'; {Do not Localize} for I := 1 to Length(EmailAddr.Name) do begin { quote special characters } if (EmailAddr.Name[I] = '\') or (EmailAddr.Name[I] = '"') then S := S + '\'; {Do not Localize} S := S + EmailAddr.Name[I]; end; S := S + '"'; {Do not Localize} end; Result := Format('%s <%s>', [S, EmailAddr.Address]) {Do not Localize} end else Result := Format('%s', [EmailAddr.Address]); {Do not Localize} end; function B64(AChar: Char): Byte; //TODO: Make this use the more efficient MIME Coder var i: Integer; begin for i := Low(base64_tbl) to High(base64_tbl) do begin if AChar = base64_tbl[i] then begin Result := i; exit; end; end; Result := 0; end; { convert Shift_JIS to ISO-2022-JP (RFC 1468) } function Decode2022JP(const S: string): string; var T: string; I, L: integer; isK: Boolean; K1, K2: byte; K3: byte; begin T := ''; {Do not Localize} isK := False; L := length(S); I := 1; while I <= L do begin if S[I] = #27 then begin Inc(I); if I + 1 <= L then begin if Copy(S, I, 2) = '$B' then {Do not Localize} begin isK := True; end else begin if Copy(S, I, 2) = '(B' then {Do not Localize} begin isK := False; end; end; Inc(I, 2); { TODO -oTArisawa : Check RFC 1468} end; end else begin if isK then begin if I + 1 <= L then begin K1 := byte(S[I]); K2 := byte(S[I + 1]); K3 := (K1 - 1) shr 1; if K1 < 95 then K3 := K3 + 113 else K3 := K3 + 177; if (K1 mod 2) = 1 then begin if K2 < 96 then K2 := K2 + 31 else K2 := K2 + 32 end else K2 := K2 + 126; T := T + char(K3) + char(k2); Inc(I, 2); end else Inc(I); { invalid DBCS } end else begin T := T + S[I]; Inc(I); end; end; end; Result := T; end; procedure DecodeAddress(EMailAddr: TIdEmailAddressItem); begin EMailAddr.Name := DecodeHeader(EMailAddr.Name); end; procedure DecodeAddresses(AEMails: string; EMailAddr: TIdEmailAddressList); var idx: Integer; begin idx := 0; EMailAddr.EMailAddresses := AEMails; while idx < EMailAddr.Count do begin DecodeAddress(EMailAddr[idx]); inc(idx); end; end; function EncodeAddress(EmailAddr: TIdEMailAddressList; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string; AUseAddressForNameIfNameMissing: Boolean = False): string; var idx: Integer; begin Result := ''; {Do not Localize} idx := 0; while (idx < EmailAddr.Count) do begin Result := Result + ', ' + EncodeAddressItem(EMailAddr[idx], HeaderEncoding, TransferHeader, MimeCharSet, AUseAddressForNameIfNameMissing); {Do not Localize} Inc(idx); end; // while ( idx < EncodeAddress.Count ) do {Remove the first comma and the following space ', ' }{Do not Localize} IdDelete(Result, 1, 2); end; { convert Shift_JIS to ISO-2022-JP (RFC 1468) } function Encode2022JP(const S: ansistring): string; const desig_asc = #27'(B'; {Do not Localize} desig_jis = #27'$B'; {Do not Localize} var T: string; I, L: Integer; isK: Boolean; K1: Byte; K2, K3: Word; begin T := ''; {Do not Localize} isK := False; L := Length(S); I := 1; while I <= L do begin if Ord(S[I]) < 128 then {Do not Localize} begin if isK then begin T := T + desig_asc; isK := False; end; T := T + S[I]; INC(I); end else begin K1 := sj1_tbl[S[I]]; case K1 of 0: INC(I); { invalid SBCS } 2: INC(I, 2); { invalid DBCS } 1: begin { halfwidth katakana } if not isK then begin T := T + desig_jis; isK := True; end; { simple SBCS -> DBCS conversion } K2 := kana_tbl[S[I]]; if (I < L) and (Ord(S[I + 1]) and $FE = $DE) then begin { convert kana + voiced mark to voiced kana } K3 := vkana_tbl[S[I]]; // This is an if and not a case because of a D8 bug, return to // case when d8 patch is released if S[I + 1] = #$DE then begin { voiced } if K3 <> 0 then begin K2 := K3; INC(I); end; end else if S[I + 1] = #$DF then begin { semivoiced } if (K3 >= $2550) and (K3 <= $255C) then begin K2 := K3 + 1; INC(I); end; end; end; T := T + Chr(K2 shr 8) + Chr(K2 and $FF); INC(I); end; else { DBCS } if (I < L) then begin K2 := sj2_tbl[S[I + 1]]; if K2 <> 0 then begin if not isK then begin T := T + desig_jis; isK := True; end; T := T + Chr(K1 + K2 shr 8) + Chr(K2 and $FF); end; end; INC(I, 2); end; end; end; if isK then T := T + desig_asc; Result := T; end; { encode a header field if non-ASCII characters are used } function EncodeHeader(const Header: string; specials: CSET; const HeaderEncoding: Char; TransferHeader: TTransfer; MimeCharSet: string): string; const SPACES: set of AnsiChar = [' ', #9, #10, #13]; {Do not Localize} var S, T: string; L, P, Q, R: Integer; B0, B1, B2: Integer; InEncode: Integer; NeedEncode: Boolean; csNeedEncode, csReqQuote: CSET; BeginEncode, EndEncode: string; procedure EncodeWord(P: Integer); const MaxEncLen = 75; var Q: Integer; EncLen: Integer; Enc1: string; begin T := T + BeginEncode; if L < P then P := L + 1; Q := InEncode; InEncode := 0; EncLen := Length(BeginEncode) + 2; if TextIsSame(HeaderEncoding, 'Q') then { quoted-printable } {Do not Localize} begin while Q < P do begin if not (CharIsInSet(S, Q, csReqQuote)) then begin Enc1 := S[Q] end else begin if S[Q] = ' ' then {Do not Localize} Enc1 := '_' {Do not Localize} else Enc1 := '=' + IntToHex(Ord(S[Q]), 2); {Do not Localize} end; if EncLen + Length(Enc1) > MaxEncLen then begin //T := T + EndEncode + #13#10#9 + BeginEncode; //CC: The #13#10#9 above caused the subsequent call to FoldWrapText to //insert an extra #13#10 which, being a blank line in the headers, //was interpreted by email clients, etc., as the end of the headers //and the start of the message body. FoldWrapText seems to look for //and treat correctly the sequence #13#10 + ' ' however... T := T + EndEncode + #13#10 + ' ' + BeginEncode; EncLen := Length(BeginEncode) + 2; end; T := T + Enc1; INC(EncLen, Length(Enc1)); INC(Q); end; end else begin { base64 } while Q < P do begin if EncLen + 4 > MaxEncLen then begin //T := T + EndEncode + #13#10#9 + BeginEncode; //CC: The #13#10#9 above caused the subsequent call to FoldWrapText to //insert an extra #13#10 which, being a blank line in the headers, //was interpreted by email clients, etc., as the end of the headers //and the start of the message body. FoldWrapText seems to look for //and treat correctly the sequence #13#10 + ' ' however... T := T + EndEncode + #13#10 + ' ' + BeginEncode; EncLen := Length(BeginEncode) + 2; end; B0 := Ord(S[Q]); case P - Q of 1: T := T + base64_tbl[B0 shr 2] + base64_tbl[B0 and $03 shl 4] + '=='; {Do not Localize} 2: begin B1 := Ord(S[Q + 1]); T := T + base64_tbl[B0 shr 2] + base64_tbl[B0 and $03 shl 4 + B1 shr 4] + base64_tbl[B1 and $0F shl 2] + '='; {Do not Localize} end; else B1 := Ord(S[Q + 1]); B2 := Ord(S[Q + 2]); T := T + base64_tbl[B0 shr 2] + base64_tbl[B0 and $03 shl 4 + B1 shr 4] + base64_tbl[B1 and $0F shl 2 + B2 shr 6] + base64_tbl[B2 and $3F]; end; INC(EncLen, 4); INC(Q, 3); end; end; T := T + EndEncode; end; begin case TransferHeader of iso2022jp: S := Encode2022JP(Header); else S := Header; end; {Suggested by Andrew P.Rybin for easy 8bit support} if HeaderEncoding = '8' then begin //UpCase('8')='8' {Do not Localize} Result := S; EXIT; end; //if csNeedEncode := [#0..#31, #127..#255] + specials; csReqQuote := csNeedEncode + ['?', '=', '_']; {Do not Localize} BeginEncode := '=?' + MimeCharSet + '?' + HeaderEncoding + '?'; {Do not Localize} EndEncode := '?='; {Do not Localize} // JMBERG: We want to encode stuff that the user typed // as if it already is encoded!! if DecodeHeader(Header) <> Header then begin csNeedEncode := csNeedEncode + ['=']; end; L := Length(S); P := 1; T := ''; {Do not Localize} InEncode := 0; while P <= L do begin Q := P; while (P <= L) and (CharIsInSet(S, P, SPACES)) do INC(P); R := P; NeedEncode := False; while (P <= L) and not (CharIsInSet(S, P, SPACES)) do begin if CharIsInSet(S, P, csNeedEncode) then begin NeedEncode := True; end; INC(P); end; if NeedEncode then begin if InEncode = 0 then begin T := T + Copy(S, Q, R - Q); InEncode := R; end; end else begin if InEncode <> 0 then begin EncodeWord(Q); end; T := T + Copy(S, Q, P - Q); end; end; if InEncode <> 0 then begin EncodeWord(P); end; Result := T; end; function DecodeHeader(Header: string): string; const WhiteSpace = [LF, CR, CHAR32, TAB]; var i, l: Integer; HeaderEncoding, HeaderCharSet, s: string; a3: array[1..3] of byte; a4: array[1..4] of byte; LEncodingStartPos, encodingendpos: Integer; LPreviousEncodingStartPos: integer; substring: string; EncodingFound: Boolean; OnlyWhitespace: boolean; EncodingBeforeEnd: integer; // мое добавление для ускорения UpHeader := UpperCase(Header) UpHeader: string; begin UpHeader := UpperCase(Header); S := ''; // Get the Charset part. EncodingBeforeEnd := -1; // мой риск кодировок 850 в 10 версии инди, задолбаешься все их перечислять // поэтому иду на риск определение начала кодировки как =? без названия кодировки LEncodingStartPos := PosIdx('=?', UpHeader, 1); {do not localize} (* закавычил три кодировки LEncodingStartPos := PosIdx('=?ISO', UpHeader, 1); {do not localize} if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?WINDOWS', UpHeader, 1); {do not localize} end; if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?KOI8', UpHeader, 1); {do not localize} end; *) while LEncodingStartPos > 0 do begin // Assume we will find the encoding EncodingFound := True; //we need 3 more question marks first and after that a '?=' {Do not Localize} //to find the end of the substring, we can't just search for '?=', {Do not Localize} //example: '=?ISO-8859-1?Q?=E4?=' {Do not Localize} encodingendpos := PosIdx('?', UpHeader, LEncodingStartPos + 5); {Do not Localize} if encodingendpos = 0 then begin EncodingFound := False; end else begin // valid encoded words can not contain spaces // if the user types something *almost* like an encoded word, // and its sent as-is, we need to find this!! for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; end; if EncodingFound then begin encodingendpos := PosIdx('?', UpHeader, encodingendpos + 1); {Do not Localize} if encodingendpos = 0 then begin EncodingFound := false; end else begin for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; end; end; if EncodingFound then begin encodingendpos := PosIdx('?=', UpHeader, encodingendpos + 1); {Do not Localize} if encodingendpos > 0 then begin for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; if EncodingFound then begin substring := Copy(Header, LEncodingStartPos, encodingendpos - LEncodingStartPos + 2); //now decode the substring for i := 1 to 3 do begin l := Pos('?', substring); {Do not Localize} substring := Copy(substring, l + 1, Length(substring) - l + 1); if i = 1 then begin HeaderCharSet := Copy(substring, 1, Pos('?', substring) - 1) {Do not Localize} end else if i = 2 then begin HeaderEncoding := copy(substring, 1, 1); end; end; //now Substring needs to end with '?=' otherwise give up! {Do not Localize} if Copy(substring, Length(substring) - 1, 2) <> '?=' then {Do not Localize} begin EncodingFound := false; end; end; if (EncodingBeforeEnd >= 0) and EncodingFound and (LEncodingStartPos > 0) then begin OnlyWhitespace := true; for i := EncodingBeforeEnd to LEncodingStartPos - 1 do begin if not (CharIsInSet(Header, i, WhiteSpace)) then begin OnlyWhitespace := false; break; end; end; if OnlyWhitespace then begin Delete(Header, EncodingBeforeEnd, LEncodingStartPos - EncodingBeforeEnd); encodingendpos := encodingendpos - (LEncodingStartPos - encodingbeforeend); LEncodingStartPos := EncodingBeforeEnd; end; end; // Get the HeaderEncoding if TextIsSame(HeaderEncoding, 'Q') {Do not Localize} and EncodingFound then begin i := 1; s := ''; {Do not Localize} repeat // substring can be accessed by index here, because we know that it ends with '?=' {Do not Localize} if substring[i] = '_' then {Do not Localize} begin s := s + ' '; {Do not Localize} end else if (substring[i] = '=') and (Length(substring) >= i + 2 + 2) then //make sure we can access i+2 and '?=' is still beyond {Do not Localize} begin s := s + chr(StrToInt('$' + substring[i + 1] + substring[i + 2])); {Do not Localize} inc(i, 2); end else begin s := s + substring[i]; end; inc(i); until (substring[i] = '?') and (substring[i + 1] = '=') {Do not Localize} end else if EncodingFound then begin while Length(substring) >= 4 do begin a4[1] := b64(substring[1]); a4[2] := b64(substring[2]); a4[3] := b64(substring[3]); a4[4] := b64(substring[4]); a3[1] := Byte((a4[1] shl 2) or (a4[2] shr 4)); a3[2] := Byte((a4[2] shl 4) or (a4[3] shr 2)); a3[3] := Byte((a4[3] shl 6) or (a4[4] shr 0)); substring := Copy(substring, 5, Length(substring)); s := s + CHR(a3[1]) + CHR(a3[2]) + CHR(a3[3]); end; end; if EncodingFound then begin if TextIsSame(HeaderCharSet, 'ISO-2022-JP') then {Do not Localize} begin substring := Decode2022JP(s); end else begin substring := s; end; //replace old substring in header with decoded one: header := Copy(header, 1, LEncodingStartPos - 1) + substring + Copy(header, encodingendpos + 2, Length(Header)); encodingendpos := length(substring); substring := ''; {Do not Localize} S := ''; UpHeader := UpperCase(Header); end; end; end; encodingendpos := LEncodingStartPos + encodingendpos; {CC: Bug fix - changed LEncodingStartPos to LPreviousEncodingStartPos because LEncodingStartPos gets overwritten by return value from PosIdx.} LPreviousEncodingStartPos := LEncodingStartPos; LEncodingStartPos := PosIdx('=?', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} (* LEncodingStartPos := PosIdx('=?ISO', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?WINDOWS', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} end; if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?KOI8', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} end; *) // delete whitespace between adjacent encoded words, but only // if we had an encoding before if EncodingFound then begin EncodingBeforeEnd := encodingendpos; end else begin EncodingBeforeEnd := -1; end; end; //There might be #0's in header when this it b64 encoded, e.g with: //decodeheader('"Fernando Corti=?ISO-8859-1?B?8Q==?=a" <fernando@nowhere.com>'); while Pos(#0, header) > 0 do begin Delete(header, Pos(#0, header), 1); end; Result := Header; end; function WideDecodeHeader(Header: widestring): Widestring; const WhiteSpace = [LF, CR, CHAR32, TAB]; var i, l: Integer; HeaderEncoding, HeaderCharSet, s: string; a3: array[1..3] of byte; a4: array[1..4] of byte; LEncodingStartPos, encodingendpos: Integer; LPreviousEncodingStartPos: integer; substring: string; EncodingFound: Boolean; OnlyWhitespace: boolean; EncodingBeforeEnd: integer; // мое добавление для ускорения UpHeader := UpperCase(Header) UpHeader: string; begin UpHeader := UpperCase(Header); S := ''; // Get the Charset part. EncodingBeforeEnd := -1; // мой риск кодировок 850 в 10 версии инди, задолбаешься все их перечислять // поэтому иду на риск определение начала кодировки как =? без названия кодировки LEncodingStartPos := PosIdx('=?', UpHeader, 1); {do not localize} (* закавычил три кодировки LEncodingStartPos := PosIdx('=?ISO', UpHeader, 1); {do not localize} if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?WINDOWS', UpHeader, 1); {do not localize} end; if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?KOI8', UpHeader, 1); {do not localize} end; *) while LEncodingStartPos > 0 do begin // Assume we will find the encoding EncodingFound := True; //we need 3 more question marks first and after that a '?=' {Do not Localize} //to find the end of the substring, we can't just search for '?=', {Do not Localize} //example: '=?ISO-8859-1?Q?=E4?=' {Do not Localize} encodingendpos := PosIdx('?', UpHeader, LEncodingStartPos + 5); {Do not Localize} if encodingendpos = 0 then begin EncodingFound := False; end else begin // valid encoded words can not contain spaces // if the user types something *almost* like an encoded word, // and its sent as-is, we need to find this!! for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; end; if EncodingFound then begin encodingendpos := PosIdx('?', UpHeader, encodingendpos + 1); {Do not Localize} if encodingendpos = 0 then begin EncodingFound := false; end else begin for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; end; end; if EncodingFound then begin encodingendpos := PosIdx('?=', UpHeader, encodingendpos + 1); {Do not Localize} if encodingendpos > 0 then begin for i := LEncodingStartPos to encodingendpos - 1 do begin if CharIsInSet(Header, i, Whitespace) then begin EncodingFound := false; break; end; end; if EncodingFound then begin substring := Copy(Header, LEncodingStartPos, encodingendpos - LEncodingStartPos + 2); //now decode the substring for i := 1 to 3 do begin l := Pos('?', substring); {Do not Localize} substring := Copy(substring, l + 1, Length(substring) - l + 1); if i = 1 then begin HeaderCharSet := Copy(substring, 1, Pos('?', substring) - 1) {Do not Localize} end else if i = 2 then begin HeaderEncoding := copy(substring, 1, 1); end; end; //now Substring needs to end with '?=' otherwise give up! {Do not Localize} if Copy(substring, Length(substring) - 1, 2) <> '?=' then {Do not Localize} begin EncodingFound := false; end; end; if (EncodingBeforeEnd >= 0) and EncodingFound and (LEncodingStartPos > 0) then begin OnlyWhitespace := true; for i := EncodingBeforeEnd to LEncodingStartPos - 1 do begin if not (CharIsInSet(Header, i, WhiteSpace)) then begin OnlyWhitespace := false; break; end; end; if OnlyWhitespace then begin Delete(Header, EncodingBeforeEnd, LEncodingStartPos - EncodingBeforeEnd); encodingendpos := encodingendpos - (LEncodingStartPos - encodingbeforeend); LEncodingStartPos := EncodingBeforeEnd; end; end; // Get the HeaderEncoding if TextIsSame(HeaderEncoding, 'Q') {Do not Localize} and EncodingFound then begin i := 1; s := ''; {Do not Localize} repeat // substring can be accessed by index here, because we know that it ends with '?=' {Do not Localize} if substring[i] = '_' then {Do not Localize} begin s := s + ' '; {Do not Localize} end else if (substring[i] = '=') and (Length(substring) >= i + 2 + 2) then //make sure we can access i+2 and '?=' is still beyond {Do not Localize} begin s := s + chr(StrToInt('$' + substring[i + 1] + substring[i + 2])); {Do not Localize} inc(i, 2); end else begin s := s + substring[i]; end; inc(i); until (substring[i] = '?') and (substring[i + 1] = '=') {Do not Localize} end else if EncodingFound then begin while Length(substring) >= 4 do begin a4[1] := b64(substring[1]); a4[2] := b64(substring[2]); a4[3] := b64(substring[3]); a4[4] := b64(substring[4]); a3[1] := Byte((a4[1] shl 2) or (a4[2] shr 4)); a3[2] := Byte((a4[2] shl 4) or (a4[3] shr 2)); a3[3] := Byte((a4[3] shl 6) or (a4[4] shr 0)); substring := Copy(substring, 5, Length(substring)); s := s + CHR(a3[1]) + CHR(a3[2]) + CHR(a3[3]); end; end; if EncodingFound then begin if TextIsSame(HeaderCharSet, 'ISO-2022-JP') then {Do not Localize} begin substring := Decode2022JP(s); end else begin substring := ConvertToWide(s, HeaderCharSet); end; //replace old substring in header with decoded one: header := Copy(header, 1, LEncodingStartPos - 1) + substring + Copy(header, encodingendpos + 2, Length(Header)); encodingendpos := length(substring); substring := ''; {Do not Localize} S := ''; UpHeader := UpperCase(Header); end; end; end; encodingendpos := LEncodingStartPos + encodingendpos; {CC: Bug fix - changed LEncodingStartPos to LPreviousEncodingStartPos because LEncodingStartPos gets overwritten by return value from PosIdx.} LPreviousEncodingStartPos := LEncodingStartPos; LEncodingStartPos := PosIdx('=?', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} (* LEncodingStartPos := PosIdx('=?ISO', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?WINDOWS', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} end; if LEncodingStartPos = 0 then begin LEncodingStartPos := PosIdx('=?KOI8', UpHeader, LPreviousEncodingStartPos + 1); {do not localize} end; *) // delete whitespace between adjacent encoded words, but only // if we had an encoding before if EncodingFound then begin EncodingBeforeEnd := encodingendpos; end else begin EncodingBeforeEnd := -1; end; end; //There might be #0's in header when this it b64 encoded, e.g with: //decodeheader('"Fernando Corti=?ISO-8859-1?B?8Q==?=a" <fernando@nowhere.com>'); while Pos(#0, header) > 0 do begin Delete(header, Pos(#0, header), 1); end; Result := Header; end; end.
Categories:INDY (Internet Direct) changes