libkmime

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     This file is part of KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     KMime is free software; you can redistribute it and/or modify it
00008     under the terms of the GNU General Public License, version 2, as
00009     published by the Free Software Foundation.
00010 
00011     KMime is distributed in the hope that it will be useful, but
00012     WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this library; if not, write to the Free Software
00018     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00019 
00020     In addition, as a special exception, the copyright holders give
00021     permission to link the code of this library with any edition of
00022     the Qt library by Trolltech AS, Norway (or with modified versions
00023     of Qt that use the same license as Qt), and distribute linked
00024     combinations including the two.  You must obey the GNU General
00025     Public License in all respects for all of the code used other than
00026     Qt.  If you modify this file, you may extend this exception to
00027     your version of the file, but you are not obligated to do so.  If
00028     you do not wish to do so, delete this exception statement from
00029     your version.
00030 */
00031 
00032 #include <config.h>
00033 #include "kmime_header_parsing.h"
00034 
00035 #include "kmime_codecs.h"
00036 #include "kmime_util.h"
00037 #include "kmime_warning.h"
00038 
00039 #include <kglobal.h>
00040 #include <kcharsets.h>
00041 
00042 #include <qtextcodec.h>
00043 #include <qmap.h>
00044 #include <qcstring.h>
00045 #include <qstringlist.h>
00046 
00047 #include <ctype.h> // for isdigit
00048 #include <cassert>
00049 
00050 using namespace KMime;
00051 using namespace KMime::Types;
00052 
00053 namespace KMime {
00054 
00055 namespace Types {
00056 
00057   QString AddrSpec::asString() const {
00058     bool needsQuotes = false;
00059     QString result;
00060     result.reserve( localPart.length() + domain.length() + 1 );
00061     for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
00062       const char ch = localPart[i].latin1();
00063       if ( ch == '.' || isAText( ch ) )
00064     result += ch;
00065       else {
00066     needsQuotes = true;
00067     if ( ch == '\\' || ch == '"' )
00068       result += '\\';
00069     result += ch;
00070       }
00071     }
00072     if ( needsQuotes )
00073       return '"' + result + "\"@" + domain;
00074     else
00075       return result + '@' + domain;
00076   }
00077 
00078 }
00079 
00080 namespace HeaderParsing {
00081 
00082 // parse the encoded-word (scursor points to after the initial '=')
00083 bool parseEncodedWord( const char* & scursor, const char * const send,
00084                QString & result, QCString & language ) {
00085 
00086   // make sure the caller already did a bit of the work.
00087   assert( *(scursor-1) == '=' );
00088 
00089   //
00090   // STEP 1:
00091   // scan for the charset/language portion of the encoded-word
00092   //
00093 
00094   char ch = *scursor++;
00095 
00096   if ( ch != '?' ) {
00097     kdDebug() << "first" << endl;
00098     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00099     return false;
00100   }
00101 
00102   // remember start of charset (ie. just after the initial "=?") and
00103   // language (just after the first '*') fields:
00104   const char * charsetStart = scursor;
00105   const char * languageStart = 0;
00106 
00107   // find delimiting '?' (and the '*' separating charset and language
00108   // tags, if any):
00109   for ( ; scursor != send ; scursor++ )
00110     if ( *scursor == '?')
00111       break;
00112     else if ( *scursor == '*' && !languageStart )
00113       languageStart = scursor + 1;
00114 
00115   // not found? can't be an encoded-word!
00116   if ( scursor == send || *scursor != '?' ) {
00117     kdDebug() << "second" << endl;
00118     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00119     return false;
00120   }
00121 
00122   // extract the language information, if any (if languageStart is 0,
00123   // language will be null, too):
00124   QCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/);
00125   // extract charset information (keep in mind: the size given to the
00126   // ctor is one off due to the \0 terminator):
00127   QCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
00128 
00129   //
00130   // STEP 2:
00131   // scan for the encoding portion of the encoded-word
00132   //
00133 
00134 
00135   // remember start of encoding (just _after_ the second '?'):
00136   scursor++;
00137   const char * encodingStart = scursor;
00138 
00139   // find next '?' (ending the encoding tag):
00140   for ( ; scursor != send ; scursor++ )
00141     if ( *scursor == '?' ) break;
00142 
00143   // not found? Can't be an encoded-word!
00144   if ( scursor == send || *scursor != '?' ) {
00145     kdDebug() << "third" << endl;
00146     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00147     return false;
00148   }
00149 
00150   // extract the encoding information:
00151   QCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
00152 
00153 
00154   kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
00155         << "\"; language == \"" << maybeLanguage
00156         << "\"; encoding == \"" << maybeEncoding << "\"" << endl;
00157 
00158   //
00159   // STEP 3:
00160   // scan for encoded-text portion of encoded-word
00161   //
00162 
00163 
00164   // remember start of encoded-text (just after the third '?'):
00165   scursor++;
00166   const char * encodedTextStart = scursor;
00167 
00168   // find next '?' (ending the encoded-text):
00169   for ( ; scursor != send ; scursor++ )
00170     if ( *scursor == '?' ) break;
00171 
00172   // not found? Can't be an encoded-word!
00173   // ### maybe evaluate it nonetheless if the rest is OK?
00174   if ( scursor == send || *scursor != '?' ) {
00175     kdDebug() << "fourth" << endl;
00176     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00177     return false;
00178   }
00179   scursor++;
00180   // check for trailing '=':
00181   if ( scursor == send || *scursor != '=' ) {
00182     kdDebug() << "fifth" << endl;
00183     KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00184     return false;
00185   }
00186   scursor++;
00187 
00188   // set end sentinel for encoded-text:
00189   const char * const encodedTextEnd = scursor - 2;
00190 
00191   //
00192   // STEP 4:
00193   // setup decoders for the transfer encoding and the charset
00194   //
00195 
00196 
00197   // try if there's a codec for the encoding found:
00198   Codec * codec = Codec::codecForName( maybeEncoding );
00199   if ( !codec ) {
00200     KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
00201     return false;
00202   }
00203 
00204   // get an instance of a corresponding decoder:
00205   Decoder * dec = codec->makeDecoder();
00206   assert( dec );
00207 
00208   // try if there's a (text)codec for the charset found:
00209   bool matchOK = false;
00210   QTextCodec
00211     *textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00212 
00213   if ( !matchOK || !textCodec ) {
00214     KMIME_WARN_UNKNOWN(Charset,maybeCharset);
00215     delete dec;
00216     return false;
00217   };
00218 
00219   kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
00220 
00221   // allocate a temporary buffer to store the 8bit text:
00222   int encodedTextLength = encodedTextEnd - encodedTextStart;
00223   QByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
00224   QByteArray::Iterator bit = buffer.begin();
00225   QByteArray::ConstIterator bend = buffer.end();
00226 
00227   //
00228   // STEP 5:
00229   // do the actual decoding
00230   //
00231 
00232   if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
00233     KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
00234            << encodedTextLength << " )\nresult may be truncated" << endl;
00235 
00236   result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
00237 
00238   kdDebug() << "result now: \"" << result << "\"" << endl;
00239   // cleanup:
00240   delete dec;
00241   language = maybeLanguage;
00242 
00243   return true;
00244 }
00245 
00246 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
00247   while ( scursor != send
00248       && ( *scursor == ' ' || *scursor == '\n' ||
00249            *scursor == '\t' || *scursor == '\r' ) )
00250     scursor++;
00251 }
00252 
00253 bool parseAtom( const char * & scursor, const char * const send,
00254         QString & result, bool allow8Bit )
00255 {
00256   QPair<const char*,int> maybeResult;
00257 
00258   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00259     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00260     return true;
00261   }
00262 
00263   return false;
00264 }
00265 
00266 bool parseAtom( const char * & scursor, const char * const send,
00267         QPair<const char*,int> & result, bool allow8Bit ) {
00268   bool success = false;
00269   const char * start = scursor;
00270 
00271   while ( scursor != send ) {
00272     signed char ch = *scursor++;
00273     if ( ch > 0 && isAText(ch) ) {
00274       // AText: OK
00275       success = true;
00276     } else if ( allow8Bit && ch < 0 ) {
00277       // 8bit char: not OK, but be tolerant.
00278       KMIME_WARN_8BIT(ch);
00279       success = true;
00280     } else {
00281       // CTL or special - marking the end of the atom:
00282       // re-set sursor to point to the offending
00283       // char and return:
00284       scursor--;
00285       break;
00286     }
00287   }
00288   result.first = start;
00289   result.second = scursor - start;
00290   return success;
00291 }
00292 
00293 bool parseToken( const char * & scursor, const char * const send,
00294          QString & result, bool allow8Bit )
00295 {
00296   QPair<const char*,int> maybeResult;
00297 
00298   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00299     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00300     return true;
00301   }
00302 
00303   return false;
00304 }
00305 
00306 bool parseToken( const char * & scursor, const char * const send,
00307          QPair<const char*,int> & result, bool allow8Bit )
00308 {
00309   bool success = false;
00310   const char * start = scursor;
00311 
00312   while ( scursor != send ) {
00313     signed char ch = *scursor++;
00314     if ( ch > 0 && isTText(ch) ) {
00315       // TText: OK
00316       success = true;
00317     } else if ( allow8Bit && ch < 0 ) {
00318       // 8bit char: not OK, but be tolerant.
00319       KMIME_WARN_8BIT(ch);
00320       success = true;
00321     } else {
00322       // CTL or tspecial - marking the end of the atom:
00323       // re-set sursor to point to the offending
00324       // char and return:
00325       scursor--;
00326       break;
00327     }
00328   }
00329   result.first = start;
00330   result.second = scursor - start;
00331   return success;
00332 }
00333 
00334 #define READ_ch_OR_FAIL if ( scursor == send ) { \
00335                           KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
00336                           return false; \
00337                         } else { \
00338                           ch = *scursor++; \
00339                 }
00340 
00341 // known issues:
00342 //
00343 // - doesn't handle quoted CRLF
00344 
00345 bool parseGenericQuotedString( const char* & scursor, const char * const send,
00346                    QString & result, bool isCRLF,
00347                    const char openChar, const char closeChar )
00348 {
00349   char ch;
00350   // We are in a quoted-string or domain-literal or comment and the
00351   // cursor points to the first char after the openChar.
00352   // We will apply unfolding and quoted-pair removal.
00353   // We return when we either encounter the end or unescaped openChar
00354   // or closeChar.
00355 
00356   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00357 
00358   while ( scursor != send ) {
00359     ch = *scursor++;
00360 
00361     if ( ch == closeChar || ch == openChar ) {
00362       // end of quoted-string or another opening char:
00363       // let caller decide what to do.
00364       return true;
00365     }
00366 
00367     switch( ch ) {
00368     case '\\':      // quoted-pair
00369       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00370       READ_ch_OR_FAIL;
00371       KMIME_WARN_IF_8BIT(ch);
00372       result += QChar(ch);
00373       break;
00374     case '\r':
00375       // ###
00376       // The case of lonely '\r' is easy to solve, as they're
00377       // not part of Unix Line-ending conventions.
00378       // But I see a problem if we are given Unix-native
00379       // line-ending-mails, where we cannot determine anymore
00380       // whether a given '\n' was part of a CRLF or was occurring
00381       // on it's own.
00382       READ_ch_OR_FAIL;
00383       if ( ch != '\n' ) {
00384     // CR on it's own...
00385     KMIME_WARN_LONE(CR);
00386     result += QChar('\r');
00387     scursor--; // points to after the '\r' again
00388       } else {
00389     // CRLF encountered.
00390     // lookahead: check for folding
00391     READ_ch_OR_FAIL;
00392     if ( ch == ' ' || ch == '\t' ) {
00393       // correct folding;
00394       // position cursor behind the CRLF WSP (unfolding)
00395       // and add the WSP to the result
00396       result += QChar(ch);
00397     } else {
00398       // this is the "shouldn't happen"-case. There is a CRLF
00399       // inside a quoted-string without it being part of FWS.
00400       // We take it verbatim.
00401       KMIME_WARN_NON_FOLDING(CRLF);
00402       result += "\r\n";
00403       // the cursor is decremented again, so's we need not
00404       // duplicate the whole switch here. "ch" could've been
00405       // everything (incl. openChar or closeChar).
00406       scursor--;
00407     }
00408       }
00409       break;
00410     case '\n':
00411       // Note: CRLF has been handled above already!
00412       // ### LF needs special treatment, depending on whether isCRLF
00413       // is true (we can be sure a lonely '\n' was meant this way) or
00414       // false ('\n' alone could have meant LF or CRLF in the original
00415       // message. This parser assumes CRLF iff the LF is followed by
00416       // either WSP (folding) or NULL (premature end of quoted-string;
00417       // Should be fixed, since NULL is allowed as per rfc822).
00418       READ_ch_OR_FAIL;
00419       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00420     // folding
00421     // correct folding
00422     result += QChar(ch);
00423       } else {
00424     // non-folding
00425     KMIME_WARN_LONE(LF);
00426     result += QChar('\n');
00427     // pos is decremented, so's we need not duplicate the whole
00428     // switch here. ch could've been everything (incl. <">, "\").
00429     scursor--;
00430       }
00431       break;
00432     default:
00433       KMIME_WARN_IF_8BIT(ch);
00434       result += QChar(ch);
00435     }
00436   }
00437 
00438   return false;
00439 }
00440 
00441 // known issues:
00442 //
00443 // - doesn't handle encoded-word inside comments.
00444 
00445 bool parseComment( const char* & scursor, const char * const send,
00446            QString & result, bool isCRLF, bool reallySave )
00447 {
00448   int commentNestingDepth = 1;
00449   const char * afterLastClosingParenPos = 0;
00450   QString maybeCmnt;
00451   const char * oldscursor = scursor;
00452 
00453   assert( *(scursor-1) == '(' );
00454 
00455   while ( commentNestingDepth ) {
00456     QString cmntPart;
00457     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00458       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00459       // see the kdoc for above function for the possible conditions
00460       // we have to check:
00461       switch ( *(scursor-1) ) {
00462       case ')':
00463     if ( reallySave ) {
00464       // add the chunk that's now surely inside the comment.
00465       result += maybeCmnt;
00466       result += cmntPart;
00467       if ( commentNestingDepth > 1 ) // don't add the outermost ')'...
00468         result += QChar(')');
00469       maybeCmnt = QString::null;
00470     }
00471     afterLastClosingParenPos = scursor;
00472     --commentNestingDepth;
00473     break;
00474       case '(':
00475     if ( reallySave ) {
00476       // don't add to "result" yet, because we might find that we
00477       // are already outside the (broken) comment...
00478       maybeCmnt += cmntPart;
00479       maybeCmnt += QChar('(');
00480     }
00481     ++commentNestingDepth;
00482     break;
00483       default: assert( 0 );
00484       } // switch
00485     } else {
00486       // !parseGenericQuotedString, ie. premature end
00487       if ( afterLastClosingParenPos )
00488     scursor = afterLastClosingParenPos;
00489       else
00490     scursor = oldscursor;
00491       return false;
00492     }
00493   } // while
00494 
00495   return true;
00496 }
00497 
00498 
00499 // known issues: none.
00500 
00501 bool parsePhrase( const char* & scursor, const char * const send,
00502           QString & result, bool isCRLF )
00503 {
00504   enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
00505   QString tmp;
00506   QCString lang;
00507   const char * successfullyParsed = 0;
00508   // only used by the encoded-word branch
00509   const char * oldscursor;
00510   // used to suppress whitespace between adjacent encoded-words
00511   // (rfc2047, 6.2):
00512   bool lastWasEncodedWord = false;
00513 
00514   while ( scursor != send ) {
00515     char ch = *scursor++;
00516     switch ( ch ) {
00517     case '.': // broken, but allow for intorop's sake
00518       if ( found == None ) {
00519     --scursor;
00520     return false;
00521       } else {
00522     if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
00523       result += ". ";
00524     else
00525       result += '.';
00526     successfullyParsed = scursor;
00527       }
00528       break;
00529     case '"': // quoted-string
00530       tmp = QString::null;
00531       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00532     successfullyParsed = scursor;
00533     assert( *(scursor-1) == '"' );
00534     switch ( found ) {
00535     case None:
00536       found = QuotedString;
00537       break;
00538     case Phrase:
00539     case Atom:
00540     case EncodedWord:
00541     case QuotedString:
00542       found = Phrase;
00543       result += QChar(' '); // rfc822, 3.4.4
00544       break;
00545     default:
00546       assert( 0 );
00547     }
00548     lastWasEncodedWord = false;
00549     result += tmp;
00550       } else {
00551     // premature end of quoted string.
00552     // What to do? Return leading '"' as special? Return as quoted-string?
00553     // We do the latter if we already found something, else signal failure.
00554     if ( found == None ) {
00555       return false;
00556     } else {
00557       result += QChar(' '); // rfc822, 3.4.4
00558       result += tmp;
00559       return true;
00560     }
00561       }
00562       break;
00563     case '(': // comment
00564       // parse it, but ignore content:
00565       tmp = QString::null;
00566       if ( parseComment( scursor, send, tmp, isCRLF,
00567              false /*don't bother with the content*/ ) ) {
00568     successfullyParsed = scursor;
00569     lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00570       } else {
00571     if ( found == None )
00572       return false;
00573     else {
00574       scursor = successfullyParsed;
00575       return true;
00576     }
00577       }
00578       break;
00579     case '=': // encoded-word
00580       tmp = QString::null;
00581       oldscursor = scursor;
00582       lang = 0;
00583       if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
00584     successfullyParsed = scursor;
00585     switch ( found ) {
00586     case None:
00587       found = EncodedWord;
00588       break;
00589     case Phrase:
00590     case EncodedWord:
00591     case Atom:
00592     case QuotedString:
00593       if ( !lastWasEncodedWord )
00594         result += QChar(' '); // rfc822, 3.4.4
00595       found = Phrase;
00596       break;
00597     default: assert( 0 );
00598     }
00599     lastWasEncodedWord = true;
00600     result += tmp;
00601     break;
00602       } else
00603     // parse as atom:
00604     scursor = oldscursor;
00605       // fall though...
00606 
00607     default: //atom
00608       tmp = QString::null;
00609       scursor--;
00610       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00611     successfullyParsed = scursor;
00612     switch ( found ) {
00613     case None:
00614       found = Atom;
00615       break;
00616     case Phrase:
00617     case Atom:
00618     case EncodedWord:
00619     case QuotedString:
00620       found = Phrase;
00621       result += QChar(' '); // rfc822, 3.4.4
00622       break;
00623     default:
00624       assert( 0 );
00625     }
00626     lastWasEncodedWord = false;
00627     result += tmp;
00628       } else {
00629     if ( found == None )
00630       return false;
00631     else {
00632       scursor = successfullyParsed;
00633       return true;
00634     }
00635       }
00636     }
00637     eatWhiteSpace( scursor, send );
00638   }
00639 
00640   return ( found != None );
00641 }
00642 
00643 
00644 bool parseDotAtom( const char* & scursor, const char * const send,
00645            QString & result, bool isCRLF )
00646 {
00647   // always points to just after the last atom parsed:
00648   const char * successfullyParsed;
00649 
00650   QString tmp;
00651   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
00652     return false;
00653   result += tmp;
00654   successfullyParsed = scursor;
00655 
00656   while ( scursor != send ) {
00657     eatCFWS( scursor, send, isCRLF );
00658 
00659     // end of header or no '.' -> return
00660     if ( scursor == send || *scursor != '.' ) return true;
00661     scursor++; // eat '.'
00662 
00663     eatCFWS( scursor, send, isCRLF );
00664 
00665     if ( scursor == send || !isAText( *scursor ) ) {
00666       // end of header or no AText, but this time following a '.'!:
00667       // reset cursor to just after last successfully parsed char and
00668       // return:
00669       scursor = successfullyParsed;
00670       return true;
00671     }
00672 
00673     // try to parse the next atom:
00674     QString maybeAtom;
00675     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00676       scursor = successfullyParsed;
00677       return true;
00678     }
00679 
00680     result += QChar('.');
00681     result += maybeAtom;
00682     successfullyParsed = scursor;
00683   }
00684 
00685   scursor = successfullyParsed;
00686   return true;
00687 }
00688 
00689 
00690 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
00691   QString dummy;
00692 
00693   while ( scursor != send ) {
00694     const char * oldscursor = scursor;
00695 
00696     char ch = *scursor++;
00697 
00698     switch( ch ) {
00699     case ' ':
00700     case '\t': // whitespace
00701     case '\r':
00702     case '\n': // folding
00703       continue;
00704 
00705     case '(': // comment
00706       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) )
00707     continue;
00708       scursor = oldscursor;
00709       return;
00710 
00711     default:
00712       scursor = oldscursor;
00713       return;
00714     }
00715 
00716   }
00717 }
00718 
00719 bool parseDomain( const char* & scursor, const char * const send,
00720           QString & result, bool isCRLF ) {
00721   eatCFWS( scursor, send, isCRLF );
00722   if ( scursor == send ) return false;
00723 
00724   // domain := dot-atom / domain-literal / atom *("." atom)
00725   //
00726   // equivalent to:
00727   // domain = dot-atom / domain-literal,
00728   // since parseDotAtom does allow CFWS between atoms and dots
00729 
00730   if ( *scursor == '[' ) {
00731     // domain-literal:
00732     QString maybeDomainLiteral;
00733     // eat '[':
00734     scursor++;
00735     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00736                       isCRLF, '[', ']' ) ) {
00737       if ( scursor == send ) {
00738     // end of header: check for closing ']':
00739     if ( *(scursor-1) == ']' ) {
00740       // OK, last char was ']':
00741       result = maybeDomainLiteral;
00742       return true;
00743     } else {
00744       // not OK, domain-literal wasn't closed:
00745       return false;
00746     }
00747       }
00748       // we hit openChar in parseGenericQuotedString.
00749       // include it in maybeDomainLiteral and keep on parsing:
00750       if ( *(scursor-1) == '[' ) {
00751     maybeDomainLiteral += QChar('[');
00752     continue;
00753       }
00754       // OK, real end of domain-literal:
00755       result = maybeDomainLiteral;
00756       return true;
00757     }
00758   } else {
00759     // dot-atom:
00760     QString maybeDotAtom;
00761     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00762       result = maybeDotAtom;
00763       return true;
00764     }
00765   }
00766   return false;
00767 }
00768 
00769 bool parseObsRoute( const char* & scursor, const char* const send,
00770             QStringList & result, bool isCRLF, bool save ) {
00771   while ( scursor != send ) {
00772     eatCFWS( scursor, send, isCRLF );
00773     if ( scursor == send ) return false;
00774 
00775     // empty entry:
00776     if ( *scursor == ',' ) {
00777       scursor++;
00778       if ( save ) result.append( QString::null );
00779       continue;
00780     }
00781 
00782     // empty entry ending the list:
00783     if ( *scursor == ':' ) {
00784       scursor++;
00785       if ( save ) result.append( QString::null );
00786       return true;
00787     }
00788 
00789     // each non-empty entry must begin with '@':
00790     if ( *scursor != '@' )
00791       return false;
00792     else
00793       scursor++;
00794 
00795     QString maybeDomain;
00796     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
00797     if ( save ) result.append( maybeDomain );
00798 
00799     // eat the following (optional) comma:
00800     eatCFWS( scursor, send, isCRLF );
00801     if ( scursor == send ) return false;
00802     if ( *scursor == ':' ) { scursor++; return true; }
00803     if ( *scursor == ',' ) scursor++;
00804 
00805   }
00806 
00807   return false;
00808 }
00809 
00810 bool parseAddrSpec( const char* & scursor, const char * const send,
00811             AddrSpec & result, bool isCRLF ) {
00812   //
00813   // STEP 1:
00814   // local-part := dot-atom / quoted-string / word *("." word)
00815   //
00816   // this is equivalent to:
00817   // local-part := word *("." word)
00818 
00819   QString maybeLocalPart;
00820   QString tmp;
00821 
00822   while ( scursor != send ) {
00823     // first, eat any whitespace
00824     eatCFWS( scursor, send, isCRLF );
00825 
00826     char ch = *scursor++;
00827     switch ( ch ) {
00828     case '.': // dot
00829       maybeLocalPart += QChar('.');
00830       break;
00831 
00832     case '@':
00833       goto SAW_AT_SIGN;
00834       break;
00835 
00836     case '"': // quoted-string
00837       tmp = QString::null;
00838       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
00839     maybeLocalPart += tmp;
00840       else
00841     return false;
00842       break;
00843 
00844     default: // atom
00845       scursor--; // re-set scursor to point to ch again
00846       tmp = QString::null;
00847       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) )
00848     maybeLocalPart += tmp;
00849       else
00850     return false; // parseAtom can only fail if the first char is non-atext.
00851       break;
00852     }
00853   }
00854 
00855   return false;
00856 
00857 
00858   //
00859   // STEP 2:
00860   // domain
00861   //
00862 
00863 SAW_AT_SIGN:
00864 
00865   assert( *(scursor-1) == '@' );
00866 
00867   QString maybeDomain;
00868   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
00869     return false;
00870 
00871   result.localPart = maybeLocalPart;
00872   result.domain = maybeDomain;
00873 
00874   return true;
00875 }
00876 
00877 
00878 bool parseAngleAddr( const char* & scursor, const char * const send,
00879              AddrSpec & result, bool isCRLF ) {
00880   // first, we need an opening angle bracket:
00881   eatCFWS( scursor, send, isCRLF );
00882   if ( scursor == send || *scursor != '<' ) return false;
00883   scursor++; // eat '<'
00884 
00885   eatCFWS( scursor, send, isCRLF );
00886   if ( scursor == send ) return false;
00887 
00888   if ( *scursor == '@' || *scursor == ',' ) {
00889     // obs-route: parse, but ignore:
00890     KMIME_WARN << "obsolete source route found! ignoring." << endl;
00891     QStringList dummy;
00892     if ( !parseObsRoute( scursor, send, dummy,
00893              isCRLF, false /* don't save */ ) )
00894       return false;
00895     // angle-addr isn't complete until after the '>':
00896     if ( scursor == send ) return false;
00897   }
00898 
00899   // parse addr-spec:
00900   AddrSpec maybeAddrSpec;
00901   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
00902 
00903   eatCFWS( scursor, send, isCRLF );
00904   if ( scursor == send || *scursor != '>' ) return false;
00905   scursor++;
00906 
00907   result = maybeAddrSpec;
00908   return true;
00909 
00910 }
00911 
00912 bool parseMailbox( const char* & scursor, const char * const send,
00913            Mailbox & result, bool isCRLF ) {
00914 
00915   // rfc:
00916   // mailbox := addr-spec / ([ display-name ] angle-addr)
00917   // us:
00918   // mailbox := addr-spec / ([ display-name ] angle-addr)
00919   //                      / (angle-addr "(" display-name ")")
00920 
00921   eatCFWS( scursor, send, isCRLF );
00922   if ( scursor == send ) return false;
00923 
00924   AddrSpec maybeAddrSpec;
00925 
00926   // first, try if it's a vanilla addr-spec:
00927   const char * oldscursor = scursor;
00928   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
00929     result.displayName = QString::null;
00930     result.addrSpec = maybeAddrSpec;
00931     return true;
00932   }
00933   scursor = oldscursor;
00934 
00935   // second, see if there's a display-name:
00936   QString maybeDisplayName;
00937   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
00938     // failed: reset cursor, note absent display-name
00939     maybeDisplayName = QString::null;
00940     scursor = oldscursor;
00941   } else {
00942     // succeeded: eat CFWS
00943     eatCFWS( scursor, send, isCRLF );
00944     if ( scursor == send ) return false;
00945   }
00946 
00947   // third, parse the angle-addr:
00948   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
00949     return false;
00950 
00951   if ( maybeDisplayName.isNull() ) {
00952     // check for the obsolete form of display-name (as comment):
00953     eatWhiteSpace( scursor, send );
00954     if ( scursor != send && *scursor == '(' ) {
00955       scursor++;
00956       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) )
00957     return false;
00958     }
00959   }
00960 
00961   result.displayName = maybeDisplayName;
00962   result.addrSpec = maybeAddrSpec;
00963   return true;
00964 }
00965 
00966 bool parseGroup( const char* & scursor, const char * const send,
00967          Address & result, bool isCRLF ) {
00968   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
00969   //
00970   // equivalent to:
00971   // group   := display-name ":" [ obs-mbox-list ] ";"
00972 
00973   eatCFWS( scursor, send, isCRLF );
00974   if ( scursor == send ) return false;
00975 
00976   // get display-name:
00977   QString maybeDisplayName;
00978   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
00979     return false;
00980 
00981   // get ":":
00982   eatCFWS( scursor, send, isCRLF );
00983   if ( scursor == send || *scursor != ':' ) return false;
00984 
00985   result.displayName = maybeDisplayName;
00986 
00987   // get obs-mbox-list (may contain empty entries):
00988   scursor++;
00989   while ( scursor != send ) {
00990     eatCFWS( scursor, send, isCRLF );
00991     if ( scursor == send ) return false;
00992 
00993     // empty entry:
00994     if ( *scursor == ',' ) { scursor++; continue; }
00995 
00996     // empty entry ending the list:
00997     if ( *scursor == ';' ) { scursor++; return true; }
00998 
00999     Mailbox maybeMailbox;
01000     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
01001       return false;
01002     result.mailboxList.append( maybeMailbox );
01003 
01004     eatCFWS( scursor, send, isCRLF );
01005     // premature end:
01006     if ( scursor == send ) return false;
01007     // regular end of the list:
01008     if ( *scursor == ';' ) { scursor++; return true; }
01009     // eat regular list entry separator:
01010     if ( *scursor == ',' ) scursor++;
01011   }
01012   return false;
01013 }
01014 
01015 
01016 bool parseAddress( const char* & scursor, const char * const send,
01017            Address & result, bool isCRLF ) {
01018   // address       := mailbox / group
01019 
01020   eatCFWS( scursor, send, isCRLF );
01021   if ( scursor == send ) return false;
01022 
01023   // first try if it's a single mailbox:
01024   Mailbox maybeMailbox;
01025   const char * oldscursor = scursor;
01026   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01027     // yes, it is:
01028     result.displayName = QString::null;
01029     result.mailboxList.append( maybeMailbox );
01030     return true;
01031   }
01032   scursor = oldscursor;
01033 
01034   Address maybeAddress;
01035 
01036   // no, it's not a single mailbox. Try if it's a group:
01037   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
01038     return false;
01039 
01040   result = maybeAddress;
01041   return true;
01042 }
01043 
01044 bool parseAddressList( const char* & scursor, const char * const send,
01045                AddressList & result, bool isCRLF ) {
01046   while ( scursor != send ) {
01047     eatCFWS( scursor, send, isCRLF );
01048     // end of header: this is OK.
01049     if ( scursor == send ) return true;
01050     // empty entry: ignore:
01051     if ( *scursor == ',' ) { scursor++; continue; }
01052 
01053     // parse one entry
01054     Address maybeAddress;
01055     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
01056     result.append( maybeAddress );
01057 
01058     eatCFWS( scursor, send, isCRLF );
01059     // end of header: this is OK.
01060     if ( scursor == send ) return true;
01061     // comma separating entries: eat it.
01062     if ( *scursor == ',' ) scursor++;
01063   }
01064   return true;
01065 }
01066 
01067 
01068 static QString asterisk = QString::fromLatin1("*0*",1);
01069 static QString asteriskZero = QString::fromLatin1("*0*",2);
01070 //static QString asteriskZeroAsterisk = QString::fromLatin1("*0*",3);
01071 
01072 bool parseParameter( const char* & scursor, const char * const send,
01073              QPair<QString,QStringOrQPair> & result, bool isCRLF ) {
01074   // parameter = regular-parameter / extended-parameter
01075   // regular-parameter = regular-parameter-name "=" value
01076   // extended-parameter =
01077   // value = token / quoted-string
01078   //
01079   // note that rfc2231 handling is out of the scope of this function.
01080   // Therefore we return the attribute as QString and the value as
01081   // (start,length) tupel if we see that the value is encoded
01082   // (trailing asterisk), for parseParameterList to decode...
01083 
01084   eatCFWS( scursor, send, isCRLF );
01085   if ( scursor == send ) return false;
01086 
01087   //
01088   // parse the parameter name:
01089   //
01090   QString maybeAttribute;
01091   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) )
01092     return false;
01093 
01094   eatCFWS( scursor, send, isCRLF );
01095   // premature end: not OK (haven't seen '=' yet).
01096   if ( scursor == send || *scursor != '=' ) return false;
01097   scursor++; // eat '='
01098 
01099   eatCFWS( scursor, send, isCRLF );
01100   if ( scursor == send ) {
01101     // don't choke on attribute=, meaning the value was omitted:
01102     if ( maybeAttribute.endsWith( asterisk ) ) {
01103       KMIME_WARN << "attribute ends with \"*\", but value is empty! "
01104     "Chopping away \"*\"." << endl;
01105       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01106     }
01107     result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01108     return true;
01109   }
01110 
01111   const char * oldscursor = scursor;
01112 
01113   //
01114   // parse the parameter value:
01115   //
01116   QStringOrQPair maybeValue;
01117   if ( *scursor == '"' ) {
01118     // value is a quoted-string:
01119     scursor++;
01120     if ( maybeAttribute.endsWith( asterisk ) ) {
01121       // attributes ending with "*" designate extended-parameters,
01122       // which cannot have quoted-strings as values. So we remove the
01123       // trailing "*" to not confuse upper layers.
01124       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
01125     "Chopping away \"*\"." << endl;
01126       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01127     }
01128 
01129     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01130       scursor = oldscursor;
01131       result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01132       return false; // this case needs further processing by upper layers!!
01133     }
01134   } else {
01135     // value is a token:
01136     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01137       scursor = oldscursor;
01138       result = qMakePair( maybeAttribute.lower(), QStringOrQPair() );
01139       return false; // this case needs further processing by upper layers!!
01140     }
01141   }
01142 
01143   result = qMakePair( maybeAttribute.lower(), maybeValue );
01144   return true;
01145 }
01146 
01147 
01148 
01149 bool parseRawParameterList( const char* & scursor, const char * const send,
01150                 QMap<QString,QStringOrQPair> & result,
01151                 bool isCRLF ) {
01152   // we use parseParameter() consecutively to obtain a map of raw
01153   // attributes to raw values. "Raw" here means that we don't do
01154   // rfc2231 decoding and concatenation. This is left to
01155   // parseParameterList(), which will call this function.
01156   //
01157   // The main reason for making this chunk of code a separate
01158   // (private) method is that we can deal with broken parameters
01159   // _here_ and leave the rfc2231 handling solely to
01160   // parseParameterList(), which will still be enough work.
01161 
01162   while ( scursor != send ) {
01163     eatCFWS( scursor, send, isCRLF );
01164     // empty entry ending the list: OK.
01165     if ( scursor == send ) return true;
01166     // empty list entry: ignore.
01167     if ( *scursor == ';' ) { scursor++; continue; }
01168 
01169     QPair<QString,QStringOrQPair> maybeParameter;
01170     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01171       // we need to do a bit of work if the attribute is not
01172       // NULL. These are the cases marked with "needs further
01173       // processing" in parseParameter(). Specifically, parsing of the
01174       // token or the quoted-string, which should represent the value,
01175       // failed. We take the easy way out and simply search for the
01176       // next ';' to start parsing again. (Another option would be to
01177       // take the text between '=' and ';' as value)
01178       if ( maybeParameter.first.isNull() ) return false;
01179       while ( scursor != send ) {
01180     if ( *scursor++ == ';' ) goto IS_SEMICOLON;
01181       }
01182       // scursor == send case: end of list.
01183       return true;
01184     IS_SEMICOLON:
01185       // *scursor == ';' case: parse next entry.
01186       continue;
01187     }
01188     // successful parsing brings us here:
01189     result.insert( maybeParameter.first, maybeParameter.second );
01190 
01191     eatCFWS( scursor, send, isCRLF );
01192     // end of header: ends list.
01193     if ( scursor == send ) return true;
01194     // regular separator: eat it.
01195     if ( *scursor == ';' ) scursor++;
01196   }
01197   return true;
01198 }
01199 
01200 
01201 static void decodeRFC2231Value( Codec* & rfc2231Codec,
01202                 QTextCodec* & textcodec,
01203                 bool isContinuation, QString & value,
01204                 QPair<const char*,int> & source ) {
01205 
01206   //
01207   // parse the raw value into (charset,language,text):
01208   //
01209 
01210   const char * decBegin = source.first;
01211   const char * decCursor = decBegin;
01212   const char * decEnd = decCursor + source.second;
01213 
01214   if ( !isContinuation ) {
01215     // find the first single quote
01216     while ( decCursor != decEnd ) {
01217       if ( *decCursor == '\'' ) break;
01218       else decCursor++;
01219     }
01220 
01221     if ( decCursor == decEnd ) {
01222       // there wasn't a single single quote at all!
01223       // take the whole value to be in latin-1:
01224       KMIME_WARN << "No charset in extended-initial-value. "
01225     "Assuming \"iso-8859-1\"." << endl;
01226       value += QString::fromLatin1( decBegin, source.second );
01227       return;
01228     }
01229 
01230     QCString charset( decBegin, decCursor - decBegin + 1 );
01231 
01232     const char * oldDecCursor = ++decCursor;
01233     // find the second single quote (we ignore the language tag):
01234     while ( decCursor != decEnd ) {
01235       if ( *decCursor == '\'' ) break;
01236       else decCursor++;
01237     }
01238     if ( decCursor == decEnd ) {
01239       KMIME_WARN << "No language in extended-initial-value. "
01240     "Trying to recover." << endl;
01241       decCursor = oldDecCursor;
01242     } else
01243       decCursor++;
01244 
01245     // decCursor now points to the start of the
01246     // "extended-other-values":
01247 
01248     //
01249     // get the decoders:
01250     //
01251 
01252     bool matchOK = false;
01253     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01254     if ( !matchOK ) {
01255       textcodec = 0;
01256       KMIME_WARN_UNKNOWN(Charset,charset);
01257     }
01258   }
01259 
01260   if ( !rfc2231Codec ) {
01261     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01262     assert( rfc2231Codec );
01263   }
01264 
01265   if ( !textcodec ) {
01266     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01267     return;
01268   }
01269 
01270   Decoder * dec = rfc2231Codec->makeDecoder();
01271   assert( dec );
01272 
01273   //
01274   // do the decoding:
01275   //
01276 
01277   QByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01278   QByteArray::Iterator bit = buffer.begin();
01279   QByteArray::ConstIterator bend = buffer.end();
01280 
01281   if ( !dec->decode( decCursor, decEnd, bit, bend ) )
01282     KMIME_WARN << rfc2231Codec->name()
01283            << " codec lies about it's maxDecodedSizeFor()\n"
01284       "result may be truncated" << endl;
01285 
01286   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01287 
01288   kdDebug() << "value now: \"" << value << "\"" << endl;
01289   // cleanup:
01290   delete dec;
01291 }
01292 
01293 // known issues:
01294 //  - permutes rfc2231 continuations when the total number of parts
01295 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01296 
01297 bool parseParameterList( const char* & scursor, const char * const send,
01298              QMap<QString,QString> & result, bool isCRLF ) {
01299   // parse the list into raw attribute-value pairs:
01300   QMap<QString,QStringOrQPair> rawParameterList;
01301   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
01302     return false;
01303 
01304   if ( rawParameterList.isEmpty() ) return true;
01305 
01306   // decode rfc 2231 continuations and alternate charset encoding:
01307 
01308   // NOTE: this code assumes that what QMapIterator delivers is sorted
01309   // by the key!
01310 
01311   Codec * rfc2231Codec = 0;
01312   QTextCodec * textcodec = 0;
01313   QString attribute;
01314   QString value;
01315   enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
01316 
01317   QMapIterator<QString,QStringOrQPair> it, end = rawParameterList.end();
01318 
01319   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01320     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01321       //
01322       // new attribute:
01323       //
01324 
01325       // store the last attribute/value pair in the result map now:
01326       if ( !attribute.isNull() ) result.insert( attribute, value );
01327       // and extract the information from the new raw attribute:
01328       value = QString::null;
01329       attribute = it.key();
01330       mode = NoMode;
01331       // is the value encoded?
01332       if ( attribute.endsWith( asterisk ) ) {
01333     attribute.truncate( attribute.length() - 1 );
01334     mode = (Modes) ((int) mode | Encoded);
01335       }
01336       // is the value continued?
01337       if ( attribute.endsWith( asteriskZero ) ) {
01338     attribute.truncate( attribute.length() - 2 );
01339     mode = (Modes) ((int) mode | Continued);
01340       }
01341       //
01342       // decode if necessary:
01343       //
01344       if ( mode & Encoded ) {
01345     decodeRFC2231Value( rfc2231Codec, textcodec,
01346                 false, /* isn't continuation */
01347                 value, (*it).qpair );
01348       } else {
01349     // not encoded.
01350     if ( (*it).qpair.first )
01351       value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01352     else
01353       value += (*it).qstring;
01354       }
01355 
01356       //
01357       // shortcut-processing when the value isn't encoded:
01358       //
01359 
01360       if ( !(mode & Continued) ) {
01361     // save result already:
01362     result.insert( attribute, value );
01363     // force begin of a new attribute:
01364     attribute = QString::null;
01365       }
01366     } else /* it.key().startsWith( attribute ) */ {
01367       //
01368       // continuation
01369       //
01370 
01371       // ignore the section and trust QMap to have sorted the keys:
01372       if ( it.key().endsWith( asterisk ) ) {
01373     // encoded
01374     decodeRFC2231Value( rfc2231Codec, textcodec,
01375                 true, /* is continuation */
01376                 value, (*it).qpair );
01377       } else {
01378     // not encoded
01379     if ( (*it).qpair.first )
01380       value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01381     else
01382       value += (*it).qstring;
01383       }
01384     }
01385   }
01386 
01387   // write last attr/value pair:
01388   if ( !attribute.isNull() )
01389     result.insert( attribute, value );
01390 
01391   return true;
01392 }
01393 
01394 static const char * stdDayNames[] = {
01395   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01396 };
01397 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01398 
01399 static bool parseDayName( const char* & scursor, const char * const send )
01400 {
01401   // check bounds:
01402   if ( send - scursor < 3 ) return false;
01403 
01404   for ( int i = 0 ; i < stdDayNamesLen ; ++i )
01405     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01406       scursor += 3;
01407       kdDebug() << "found " << stdDayNames[i] << endl;
01408       return true;
01409     }
01410 
01411   return false;
01412 }
01413 
01414 
01415 static const char * stdMonthNames[] = {
01416   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01417   "Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
01418 };
01419 static const int stdMonthNamesLen =
01420   sizeof stdMonthNames / sizeof *stdMonthNames;
01421 
01422 static bool parseMonthName( const char* & scursor, const char * const send,
01423                 int & result )
01424 {
01425   // check bounds:
01426   if ( send - scursor < 3 ) return false;
01427 
01428   for ( result = 0 ; result < stdMonthNamesLen ; ++result )
01429     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01430       scursor += 3;
01431       return true;
01432     }
01433 
01434   // not found:
01435   return false;
01436 }
01437 
01438 static const struct {
01439   const char * tzName;
01440   long int secsEastOfGMT;
01441 } timeZones[] = {
01442   // rfc 822 timezones:
01443   { "GMT", 0 },
01444   { "UT", 0 },
01445   { "EDT", -4*3600 },
01446   { "EST", -5*3600 },
01447   { "MST", -5*3600 },
01448   { "CST", -6*3600 },
01449   { "MDT", -6*3600 },
01450   { "MST", -7*3600 },
01451   { "PDT", -7*3600 },
01452   { "PST", -8*3600 },
01453   // common, non-rfc-822 zones:
01454   { "CET", 1*3600 },
01455   { "MET", 1*3600 },
01456   { "UTC", 0 },
01457   { "CEST", 2*3600 },
01458   { "BST", 1*3600 },
01459   // rfc 822 military timezones:
01460   { "Z", 0 },
01461   { "A", -1*3600 },
01462   { "B", -2*3600 },
01463   { "C", -3*3600 },
01464   { "D", -4*3600 },
01465   { "E", -5*3600 },
01466   { "F", -6*3600 },
01467   { "G", -7*3600 },
01468   { "H", -8*3600 },
01469   { "I", -9*3600 },
01470   // J is not used!
01471   { "K", -10*3600 },
01472   { "L", -11*3600 },
01473   { "M", -12*3600 },
01474   { "N", 1*3600 },
01475   { "O", 2*3600 },
01476   { "P", 3*3600 },
01477   { "Q", 4*3600 },
01478   { "R", 5*3600 },
01479   { "S", 6*3600 },
01480   { "T", 7*3600 },
01481   { "U", 8*3600 },
01482   { "V", 9*3600 },
01483   { "W", 10*3600 },
01484   { "X", 11*3600 },
01485   { "Y", 12*3600 },
01486 };
01487 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01488 
01489 static bool parseAlphaNumericTimeZone( const char* & scursor,
01490                        const char * const send,
01491                        long int & secsEastOfGMT,
01492                        bool & timeZoneKnown )
01493 {
01494   QPair<const char*,int> maybeTimeZone(0,0);
01495   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) )
01496     return false;
01497   for ( int i = 0 ; i < timeZonesLen ; ++i )
01498     if ( qstrnicmp( timeZones[i].tzName,
01499             maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01500       scursor += maybeTimeZone.second;
01501       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01502       timeZoneKnown = true;
01503       return true;
01504     }
01505 
01506   // don't choke just because we don't happen to know the time zone
01507   KMIME_WARN_UNKNOWN(time zone,QCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
01508   secsEastOfGMT = 0;
01509   timeZoneKnown = false;
01510   return true;
01511 }
01512 
01513 // parse a number and return the number of digits parsed:
01514 static int parseDigits( const char* & scursor, const char * const send,
01515             int & result )
01516 {
01517   result = 0;
01518   int digits = 0;
01519   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01520     result *= 10;
01521     result += int( *scursor - '0' );
01522   }
01523   return digits;
01524 }
01525 
01526 static bool parseTimeOfDay( const char* & scursor, const char * const send,
01527                 int & hour, int & min, int & sec, bool isCRLF=false )
01528 {
01529   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01530 
01531   //
01532   // 2DIGIT representing "hour":
01533   //
01534   if ( !parseDigits( scursor, send, hour ) ) return false;
01535 
01536   eatCFWS( scursor, send, isCRLF );
01537   if ( scursor == send || *scursor != ':' ) return false;
01538   scursor++; // eat ':'
01539 
01540   eatCFWS( scursor, send, isCRLF );
01541   if ( scursor == send ) return false;
01542 
01543   //
01544   // 2DIGIT representing "minute":
01545   //
01546   if ( !parseDigits( scursor, send, min ) ) return false;
01547 
01548   eatCFWS( scursor, send, isCRLF );
01549   if ( scursor == send ) return true; // seconds are optional
01550 
01551   //
01552   // let's see if we have a 2DIGIT representing "second":
01553   //
01554   if ( *scursor == ':' ) {
01555     // yepp, there are seconds:
01556     scursor++; // eat ':'
01557     eatCFWS( scursor, send, isCRLF );
01558     if ( scursor == send ) return false;
01559 
01560     if ( !parseDigits( scursor, send, sec ) ) return false;
01561   } else {
01562     sec = 0;
01563   }
01564 
01565   return true;
01566 }
01567 
01568 
01569 bool parseTime( const char* & scursor, const char * send,
01570         int & hour, int & min, int & sec, long int & secsEastOfGMT,
01571         bool & timeZoneKnown, bool isCRLF )
01572 {
01573   // time := time-of-day CFWS ( zone / obs-zone )
01574   //
01575   // obs-zone    := "UT" / "GMT" /
01576   //                "EST" / "EDT" / ; -0500 / -0400
01577   //                "CST" / "CDT" / ; -0600 / -0500
01578   //                "MST" / "MDT" / ; -0700 / -0600
01579   //                "PST" / "PDT" / ; -0800 / -0700
01580   //                "A"-"I" / "a"-"i" /
01581   //                "K"-"Z" / "k"-"z"
01582 
01583   eatCFWS( scursor, send, isCRLF );
01584   if ( scursor == send ) return false;
01585 
01586   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
01587     return false;
01588 
01589   eatCFWS( scursor, send, isCRLF );
01590   if ( scursor == send ) {
01591     timeZoneKnown = false;
01592     secsEastOfGMT = 0;
01593     return true; // allow missing timezone
01594   }
01595 
01596   timeZoneKnown = true;
01597   if ( *scursor == '+' || *scursor == '-' ) {
01598     // remember and eat '-'/'+':
01599     const char sign = *scursor++;
01600     // numerical timezone:
01601     int maybeTimeZone;
01602     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
01603     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01604     if ( sign == '-' ) {
01605       secsEastOfGMT *= -1;
01606       if ( secsEastOfGMT == 0 )
01607     timeZoneKnown = false; // -0000 means indetermined tz
01608     }
01609   } else {
01610     // maybe alphanumeric timezone:
01611     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
01612       return false;
01613   }
01614   return true;
01615 }
01616 
01617 
01618 bool parseDateTime( const char* & scursor, const char * const send,
01619             Types::DateTime & result, bool isCRLF )
01620 {
01621   // Parsing date-time; strict mode:
01622   //
01623   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
01624   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
01625   //                time
01626   //
01627   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
01628   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
01629   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez"
01630 
01631   struct tm maybeDateTime = {
01632 #ifdef HAVE_TM_GMTOFF
01633     0, 0, // initializers for members tm_gmtoff and tm_zone
01634 #endif
01635     0, 0, 0, 0, 0, 0, 0, 0, 0
01636   };
01637 
01638   eatCFWS( scursor, send, isCRLF );
01639   if ( scursor == send ) return false;
01640 
01641   //
01642   // let's see if there's a day-of-week:
01643   //
01644   if ( parseDayName( scursor, send ) ) {
01645     eatCFWS( scursor, send, isCRLF );
01646     if ( scursor == send ) return false;
01647     // day-name should be followed by ',' but we treat it as optional:
01648     if ( *scursor == ',' ) {
01649       scursor++; // eat ','
01650       eatCFWS( scursor, send, isCRLF );
01651     }
01652   }
01653 
01654   //
01655   // 1*2DIGIT representing "day" (of month):
01656   //
01657   int maybeDay;
01658   if ( !parseDigits( scursor, send, maybeDay ) ) return false;
01659 
01660   eatCFWS( scursor, send, isCRLF );
01661   if ( scursor == send ) return false;
01662 
01663   // success: store maybeDay in maybeDateTime:
01664   maybeDateTime.tm_mday = maybeDay;
01665 
01666   //
01667   // month-name:
01668   //
01669   int maybeMonth = 0;
01670   if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
01671   if ( scursor == send ) return false;
01672   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01673 
01674   eatCFWS( scursor, send, isCRLF );
01675   if ( scursor == send ) return false;
01676 
01677   // success: store maybeMonth in maybeDateTime:
01678   maybeDateTime.tm_mon = maybeMonth;
01679 
01680   //
01681   // 2*DIGIT representing "year":
01682   //
01683   int maybeYear;
01684   if ( !parseDigits( scursor, send, maybeYear ) ) return false;
01685   // RFC 2822 4.3 processing:
01686   if ( maybeYear < 50 )
01687     maybeYear += 2000;
01688   else if ( maybeYear < 1000 )
01689     maybeYear += 1900;
01690   // else keep as is
01691   if ( maybeYear < 1900 ) return false; // rfc2822, 3.3
01692 
01693   eatCFWS( scursor, send, isCRLF );
01694   if ( scursor == send ) return false;
01695 
01696   // success: store maybeYear in maybeDateTime:
01697   maybeDateTime.tm_year = maybeYear - 1900;
01698 
01699   //
01700   // time
01701   //
01702   int maybeHour, maybeMinute, maybeSecond;
01703   long int secsEastOfGMT;
01704   bool timeZoneKnown = true;
01705 
01706   if ( !parseTime( scursor, send,
01707            maybeHour, maybeMinute, maybeSecond,
01708            secsEastOfGMT, timeZoneKnown, isCRLF ) )
01709     return false;
01710 
01711   // success: store everything in maybeDateTime:
01712   maybeDateTime.tm_hour = maybeHour;
01713   maybeDateTime.tm_min = maybeMinute;
01714   maybeDateTime.tm_sec = maybeSecond;
01715   maybeDateTime.tm_isdst = DateFormatter::isDaylight();
01716   // now put everything together and check if mktime(3) likes it:
01717   result.time = mktime( &maybeDateTime );
01718   if ( result.time == (time_t)(-1) ) return false;
01719 
01720   // adjust to UTC/GMT:
01721   //result.time -= secsEastOfGMT;
01722   result.secsEastOfGMT = secsEastOfGMT;
01723   result.timeZoneKnown = timeZoneKnown;
01724 
01725   return true;
01726 }
01727 
01728 #if 0
01729 bool tryToMakeAnySenseOfDateString( const char* & scursor,
01730                     const char * const send,
01731                     time_t & result, bool isCRLF )
01732 {
01733   return false;
01734 }
01735 #endif
01736 
01737 } // namespace HeaderParsing
01738 
01739 } // namespace KMime
KDE Home | KDE Accessibility Home | Description of Access Keys