One Hat Cyber Team
Your IP :
216.73.216.115
Server IP :
194.44.31.54
Server :
Linux zen.imath.kiev.ua 4.18.0-553.77.1.el8_10.x86_64 #1 SMP Fri Oct 3 14:30:23 UTC 2025 x86_64
Server Software :
Apache/2.4.37 (Rocky Linux) OpenSSL/1.1.1k
PHP Version :
5.6.40
Buat File
|
Buat Folder
Eksekusi
Dir :
~
/
usr
/
include
/
clang
/
AST
/
Edit File:
CommentLexer.h
//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines lexer for structured comments and supporting token class. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_COMMENTLEXER_H #define LLVM_CLANG_AST_COMMENTLEXER_H #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace comments { class Lexer; class TextTokenRetokenizer; struct CommandInfo; class CommandTraits; namespace tok { enum TokenKind { eof, newline, text, unknown_command, // Command that does not have an ID. backslash_command, // Command with an ID, that used backslash marker. at_command, // Command with an ID, that used 'at' marker. verbatim_block_begin, verbatim_block_line, verbatim_block_end, verbatim_line_name, verbatim_line_text, html_start_tag, // <tag html_ident, // attr html_equals, // = html_quoted_string, // "blah\"blah" or 'blah\'blah' html_greater, // > html_slash_greater, // /> html_end_tag // </tag }; } // end namespace tok /// Comment token. class Token { friend class Lexer; friend class TextTokenRetokenizer; /// The location of the token. SourceLocation Loc; /// The actual kind of the token. tok::TokenKind Kind; /// Integer value associated with a token. /// /// If the token is a known command, contains command ID and TextPtr is /// unused (command spelling can be found with CommandTraits). Otherwise, /// contains the length of the string that starts at TextPtr. unsigned IntVal; /// Length of the token spelling in comment. Can be 0 for synthenized /// tokens. unsigned Length; /// Contains text value associated with a token. const char *TextPtr; public: SourceLocation getLocation() const LLVM_READONLY { return Loc; } void setLocation(SourceLocation SL) { Loc = SL; } SourceLocation getEndLocation() const LLVM_READONLY { if (Length == 0 || Length == 1) return Loc; return Loc.getLocWithOffset(Length - 1); } tok::TokenKind getKind() const LLVM_READONLY { return Kind; } void setKind(tok::TokenKind K) { Kind = K; } bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; } bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; } unsigned getLength() const LLVM_READONLY { return Length; } void setLength(unsigned L) { Length = L; } StringRef getText() const LLVM_READONLY { assert(is(tok::text)); return StringRef(TextPtr, IntVal); } void setText(StringRef Text) { assert(is(tok::text)); TextPtr = Text.data(); IntVal = Text.size(); } StringRef getUnknownCommandName() const LLVM_READONLY { assert(is(tok::unknown_command)); return StringRef(TextPtr, IntVal); } void setUnknownCommandName(StringRef Name) { assert(is(tok::unknown_command)); TextPtr = Name.data(); IntVal = Name.size(); } unsigned getCommandID() const LLVM_READONLY { assert(is(tok::backslash_command) || is(tok::at_command)); return IntVal; } void setCommandID(unsigned ID) { assert(is(tok::backslash_command) || is(tok::at_command)); IntVal = ID; } unsigned getVerbatimBlockID() const LLVM_READONLY { assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); return IntVal; } void setVerbatimBlockID(unsigned ID) { assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); IntVal = ID; } StringRef getVerbatimBlockText() const LLVM_READONLY { assert(is(tok::verbatim_block_line)); return StringRef(TextPtr, IntVal); } void setVerbatimBlockText(StringRef Text) { assert(is(tok::verbatim_block_line)); TextPtr = Text.data(); IntVal = Text.size(); } unsigned getVerbatimLineID() const LLVM_READONLY { assert(is(tok::verbatim_line_name)); return IntVal; } void setVerbatimLineID(unsigned ID) { assert(is(tok::verbatim_line_name)); IntVal = ID; } StringRef getVerbatimLineText() const LLVM_READONLY { assert(is(tok::verbatim_line_text)); return StringRef(TextPtr, IntVal); } void setVerbatimLineText(StringRef Text) { assert(is(tok::verbatim_line_text)); TextPtr = Text.data(); IntVal = Text.size(); } StringRef getHTMLTagStartName() const LLVM_READONLY { assert(is(tok::html_start_tag)); return StringRef(TextPtr, IntVal); } void setHTMLTagStartName(StringRef Name) { assert(is(tok::html_start_tag)); TextPtr = Name.data(); IntVal = Name.size(); } StringRef getHTMLIdent() const LLVM_READONLY { assert(is(tok::html_ident)); return StringRef(TextPtr, IntVal); } void setHTMLIdent(StringRef Name) { assert(is(tok::html_ident)); TextPtr = Name.data(); IntVal = Name.size(); } StringRef getHTMLQuotedString() const LLVM_READONLY { assert(is(tok::html_quoted_string)); return StringRef(TextPtr, IntVal); } void setHTMLQuotedString(StringRef Str) { assert(is(tok::html_quoted_string)); TextPtr = Str.data(); IntVal = Str.size(); } StringRef getHTMLTagEndName() const LLVM_READONLY { assert(is(tok::html_end_tag)); return StringRef(TextPtr, IntVal); } void setHTMLTagEndName(StringRef Name) { assert(is(tok::html_end_tag)); TextPtr = Name.data(); IntVal = Name.size(); } void dump(const Lexer &L, const SourceManager &SM) const; }; /// Comment lexer. class Lexer { private: Lexer(const Lexer &) = delete; void operator=(const Lexer &) = delete; /// Allocator for strings that are semantic values of tokens and have to be /// computed (for example, resolved decimal character references). llvm::BumpPtrAllocator &Allocator; DiagnosticsEngine &Diags; const CommandTraits &Traits; const char *const BufferStart; const char *const BufferEnd; const char *BufferPtr; /// One past end pointer for the current comment. For BCPL comments points /// to newline or BufferEnd, for C comments points to star in '*/'. const char *CommentEnd; SourceLocation FileLoc; /// If true, the commands, html tags, etc will be parsed and reported as /// separate tokens inside the comment body. If false, the comment text will /// be parsed into text and newline tokens. bool ParseCommands; enum LexerCommentState : uint8_t { LCS_BeforeComment, LCS_InsideBCPLComment, LCS_InsideCComment, LCS_BetweenComments }; /// Low-level lexer state, track if we are inside or outside of comment. LexerCommentState CommentState; enum LexerState : uint8_t { /// Lexing normal comment text LS_Normal, /// Finished lexing verbatim block beginning command, will lex first body /// line. LS_VerbatimBlockFirstLine, /// Lexing verbatim block body line-by-line, skipping line-starting /// decorations. LS_VerbatimBlockBody, /// Finished lexing verbatim line beginning command, will lex text (one /// line). LS_VerbatimLineText, /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes. LS_HTMLStartTag, /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'. LS_HTMLEndTag }; /// Current lexing mode. LexerState State; /// If State is LS_VerbatimBlock, contains the name of verbatim end /// command, including command marker. SmallString<16> VerbatimBlockEndCommandName; /// Given a character reference name (e.g., "lt"), return the character that /// it stands for (e.g., "<"). StringRef resolveHTMLNamedCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-10 integer, return the character. StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-16 integer, return the character. StringRef resolveHTMLHexCharacterReference(StringRef Name) const; void formTokenWithChars(Token &Result, const char *TokEnd, tok::TokenKind Kind); void formTextToken(Token &Result, const char *TokEnd) { StringRef Text(BufferPtr, TokEnd - BufferPtr); formTokenWithChars(Result, TokEnd, tok::text); Result.setText(Text); } SourceLocation getSourceLocation(const char *Loc) const { assert(Loc >= BufferStart && Loc <= BufferEnd && "Location out of range for this buffer!"); const unsigned CharNo = Loc - BufferStart; return FileLoc.getLocWithOffset(CharNo); } DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { return Diags.Report(Loc, DiagID); } /// Eat string matching regexp \code \s*\* \endcode. void skipLineStartingDecorations(); /// Skip over pure text. const char *skipTextToken(); /// Lex comment text, including commands if ParseCommands is set to true. void lexCommentText(Token &T); void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker, const CommandInfo *Info); void lexVerbatimBlockFirstLine(Token &T); void lexVerbatimBlockBody(Token &T); void setupAndLexVerbatimLine(Token &T, const char *TextBegin, const CommandInfo *Info); void lexVerbatimLineText(Token &T); void lexHTMLCharacterReference(Token &T); void setupAndLexHTMLStartTag(Token &T); void lexHTMLStartTag(Token &T); void setupAndLexHTMLEndTag(Token &T); void lexHTMLEndTag(Token &T); public: Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, const CommandTraits &Traits, SourceLocation FileLoc, const char *BufferStart, const char *BufferEnd, bool ParseCommands = true); void lex(Token &T); StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const; }; } // end namespace comments } // end namespace clang #endif
Simpan