Doxygen XLinks
by
V: 2511R0
Website: doxygen
Loading...
Searching...
No Matches
htmlreplacer.cpp
1//==================================================================================================
2// This implementation-file is part of DoxygenXLinks - A doxygen post-processor that allows to
3// define smarter <b>Doxygen</b>-links.
4//
5// \emoji :copyright: 2025-2026 A-Worx GmbH, Germany.
6// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8#include "jobs.hpp"
9#include "dxl.hpp"
10#include "dxlapp.hpp"
11#include "mappedfile.hpp"
12#include "ALib.ALox.H"
13#include "ALib.App.H" // TODO(251204 09:14): we need this only for the definition of LOX_LOX.
14 // How can we avoid to include the whole app?
15
16#include <iostream>
17#include <fstream>
18
19using namespace alib;
20using namespace std;
21
22namespace dxl {
23
25 Lox_SetDomain("DXL/HTML/JOB", Scope::Method )
26 Lox_Info( "Reading HTML file {!Q} of size {}", htmlFileNode.Name(),
28 dxl.Stats.HTMLFileSize.fetch_add(int(htmlFileNode->Size()));
29
30 Path path;
31 {ALIB_LOCK_SHARED_WITH(dxl.GetHTMLTreeLock())
32 htmlFileNode.AssembleRealPath(path, lang::Inclusion::Include);
33 }
34
35 // read exclamations applicable to this file once
37 dxl.Exclamations.Get(htmlFileNode.Name(), exclamations);
38
40
41 Lox_Info("Reading HTML file: {}", path )
42 MappedFile& htmlFile= poolWorker->InputFile;
44 try {
45 mfc= htmlFile.Open(path.Terminate(), htmlFileNode->Size(), false);
46 } catch (std::exception&) {
47 app.cErr->Add(app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
48 path);
49 app.machine.SetExitCode(ExitCodes::CantOpenHMLFile);
50 return true;
51 }
52
53 // output buffer (fileSize * 3). And: we add some padding bytes, to be able to test
54 // backward contents without checking actual write-size.
55 AString& writeBuffer= poolWorker->WriteBuffer;
56 writeBuffer.EnsureRemainingCapacity(integer(htmlFileNode->Size() * 3));
57 constexpr integer writeBufferPadSize= 20;
58 writeBuffer._(Fill(0, writeBufferPadSize));
59 char* writeBufferStart= writeBuffer.VBuffer();
60 char* wb = writeBufferStart + writeBufferPadSize;
61
62 // if the application has an exit code, we stop right now
63 // todo: this periodical check makes sense but was never tested, yet.
64 // it has to go into the loop below and also to other jobs
65 if ( app.machine.GetExitCode().Integral() )
66 return true;
67
68 // we want maximum speed and even spare the fast log calls in the loop.
69 Verbosity verbosity;
70 Lox_GetVerbosity(verbosity)
71
72 // loop over all lines of the HTML-file
73 bool fileChanged = false;
74 int cntELReplacements = 0;
75 int cntELRefReplacements= 0;
76 int lineNo = 1;
77 String512 linkString;
78 size_t lineStartRemaining = mfc.Remaining();
79 while (!mfc.IsEOF()) {
80 char c= char( mfc.Next<NC>() );
81
82 // linefeed
83 if (c == '\n') { *wb++= '\n'; lineNo++; lineStartRemaining= mfc.Remaining(); continue; }
84
85 // check for EL-and ELREF-anchors with every '"'
86 if (c == '"') {
87 bool isElAnchor = characters::Equal<char>(wb-12, "<a class=\"el" , 12);
88 bool isElRefAnchor= characters::Equal<char>(wb-15, "<a class=\"elRef", 15);
89 if ( isElAnchor || isElRefAnchor ) {
90 // sometimes Doxygen adds two spaces
91 while (mfc.Remaining() && char(*mfc.Current()) ==' ')
92 mfc.Next<NC>();
93
94 // read 'href="x"'
95 if (mfc.Remaining() < 8) { *wb++= '\"'; *wb++= ' '; continue; }
96 #if ALIB_DEBUG
97 ALIB_ASSERT( mfc()=='h'
98 && mfc()=='r'
99 && mfc()=='e'
100 && mfc()=='f'
101 && mfc()=='='
102 && mfc()=='\"', "DXL/HTML/JOB")
103 #else
104 mfc.Skip(6);
105 #endif
106
107 // read file-name and anchor
108 String256 fileName;
109 String128 anchor;
110 bool isAnchor= false;
111 while (mfc.Remaining() && (c= mfc()) != '\"') {
112 if (c=='#') {isAnchor= true; continue;}
113 if (!isAnchor) fileName._(c);
114 else anchor ._(c);
115 }
116 Styles styles;
117 dxl.GetELDecoration( styles, isElRefAnchor, htmlFileNode, fileName, anchor,
118 lineNo, int(lineStartRemaining - mfc.Remaining() - 9) );
119 ALIB_ASSERT_ERROR(styles.Size(), "DXL/HTML/JOB", "No styles given for EL-Anchor" )
120 // re-activate AString
121 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(),
122 "DXL/HTML/JOB", "Write buffer overflow detected" )
123 writeBuffer.SetLength(wb - writeBufferStart);
124
125 for ( int i= 1; i < styles.Size(); ++i ) // starting with 1 omits el/elRef
126 writeBuffer._<NC>( ' ' )._<NC>( styles.Get(i) );
127 writeBuffer._<NC>("\" href=\"" );
128 if ( fileName.IsNotEmpty() ) writeBuffer._<NC>( fileName );
129 if ( anchor.IsNotEmpty() ) writeBuffer._<NC>( '#' )._<NC>( anchor );
130 writeBuffer._<NC>( '\"' );
131 wb= writeBuffer.VBuffer() + writeBuffer.Length();
132 fileChanged= true;
133 if (isElAnchor) ++cntELReplacements;
134 else ++cntELRefReplacements;
135 }
136 else
137 *wb++= '\"';
138 continue;
139 }
140
141 // not '#'? continue
142 if (c != '#') { *wb++= c; continue; }
143
144
145 //---- 2nd character ----
146 if (mfc.Remaining()<3) {
147 *wb++= c;
148 while (mfc.Remaining()){
149 c= char(mfc.Next<NC>());
150 if ( c == '\n')
151 ++lineNo;
152 *wb++= c;
153 }
154 break;
155 }
156
157 c= char( mfc.Next<NC>() );
158
159 // if double hash is given, remove one hash ##"..." and keep the rest.
160 // Note: This is mainly needed for the documentation of this tool ;-)
161 if (c == '#' ) { *wb++= '#'; continue;}
162
163 // not '"' ?
164 if ( c != '\"' ) {
165 *wb++= '#';
166 *wb++= c;
167 if ( c == '\n')
168 ++lineNo;
169 continue;
170 }
171
172 //---- 3rd character: not an allowed link start? ----
173 c= char(mfc.Next<NC>());
174 if ( !isalpha(c) && String(".%^_&").IndexOf(c) < 0 ) {
175 *wb++= '#';
176 *wb++= '\"';
177 *wb++= c;
178 if ( c == '\n')
179 ++lineNo;
180 continue;
181 }
182
183 int colNo= int(lineStartRemaining - mfc.Remaining() - 2);
184
185 // search for exclamations
186 { auto exclIt= exclamations.begin();
187 for (; exclIt!=exclamations.end(); ++exclIt )
188 if ( (*exclIt)->Matches(lineNo, colNo ) )
189 break;
190 if (exclIt != exclamations.end()) {
191 *wb++= '#';
192 *wb++= '\"';
193 *wb++= c;
194 continue;
195 } }
196
197 // This seems to be an XLink!
198 bool suppressedAnchor;
199 linkString.Reset(c); {
200 bool foundEnd= false;
201 while (mfc.Remaining()) {
202 c= char(mfc.Next<NC>());
203 if ( c == '\\') { linkString._<NC>(c); linkString._<NC>(char(mfc.Next())); continue; }
204 if ( c == '\"') { foundEnd= true; break;}
205 if ( c == '\n') { lineNo++; break; }
206 linkString._<NC>(c);
207 if (linkString.Length() == 511 ) {
208 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
209 linkString, path, lineNo, colNo )
210 break;
211 }
212 }
213
214 suppressedAnchor= linkString.CharAtStart() == '%';
215
216 // end not found or the "&" which (was let is pass above!) did not evaluate to '<' symbol
217 bool illegalHTMLEntity= ( linkString.CharAt(suppressedAnchor ? 1 : 0) == '&'
218 && ( ( suppressedAnchor && !linkString.StartsWith("%&lt;"))
219 || (!suppressedAnchor && !linkString.StartsWith( "&lt;")) ) ) ;
220 if ( !foundEnd || illegalHTMLEntity ) {
221 *wb++= '#';
222 *wb++= '\"';
223 for ( auto lsC : linkString )
224 *wb++= lsC;
225 if ( !illegalHTMLEntity) {
226 *wb++= '\n';
227 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
228 linkString, path, lineNo -1, colNo )
229 }
230 continue;
231 }
232
233 if (suppressedAnchor)
234 linkString[0]= ' ';
235 linkString.Trim();
236 }
237
238 ConvertHTMLEntitiesToAscii(linkString);
239 if (verbosity >= Verbosity::Info)
240 Lox_Info( "Found XLink pattern {!Q} in HTML file {}:{}:{}",
241 linkString, path, lineNo, colNo )
242
243 XLink* link= dxl.GetXLink(linkString, htmlFileNode);
244 {ALIB_LOCK_WITH(link->Lock)
245 link->HTMLLocations.push_back({htmlFileNode, lineNo, colNo});
246 }
247
248 // re-activate AString
249 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
250 "Write buffer overflow detected" )
251 writeBuffer.SetLength(wb - writeBufferStart);
252
253 // not resolved?
254 if ( !link->IsResolved() ) {
255 // paste the original XLink to the output
256 writeBuffer._<NC>( "#")._<NC>( "\"");
257 if ( suppressedAnchor )
258 writeBuffer._<NC>( "%");
259 writeBuffer._<NC>(linkString)._<NC>( "\"");
260 } else {
261 fileChanged= true;
262
263 // write replacement
264 auto& css= link->CSSClasses;
265 ALIB_ASSERT_ERROR(css.Size(), "DXL/HTML/JOB", "No styles given for XLink {}", linkString )
266
267 // -------------- write anchor -------------
268 if (!suppressedAnchor) {
269 writeBuffer._<NC>( "<a class=\"" );
270 for ( int i= 0; i < css.Size(); ++i )
271 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
272 writeBuffer.DeleteEnd(1);
273 writeBuffer._<NC>( "\" href=\"" )
274 ._<NC>( link->Result().HTMLBaseURL )
275 ._<NC>( '/' )
276 ._<NC>( link->Result().HTMLFile );
277 if (link->Result().HTMLAnchor.IsNotEmpty())
278 writeBuffer._<NC>( '#' )._<NC>( link->Result().HTMLAnchor );
279
280 writeBuffer._<NC>( "\">" )
281 ._<NC>( link->Display )
282 ._<NC>( "</a>" );
283
284 // -------------- write non-anchor text -------------
285 } else {
286 writeBuffer._<NC>( css.IsCodeEntity() ? "<code" :"<span" );
287 writeBuffer._<NC>( " class=\"" );
288 for ( int i= 1; i < css.Size(); ++i ) // start with 1, this omits el/elRef
289 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
290 writeBuffer.DeleteEnd(1);
291 writeBuffer._<NC>( "\">" );
292 writeBuffer._<NC>( link->Display )
293 ._<NC>( css.IsCodeEntity() ? "</code>" :"</span>" );
294 }
295 }
296 wb= writeBuffer.VBuffer() + writeBuffer.Length();
297 } // the read-loop
298
299 // add stats
300 dxl.Stats.HTMLFileLines.fetch_add(lineNo);
301 dxl.Stats.ELReplacements .fetch_add(cntELReplacements);
302 dxl.Stats.ELREFReplacements.fetch_add(cntELRefReplacements);
303
304 //-------------------------- write file ---------------------------------
305 if ( fileChanged && app.cli.DryRun != cli::DryRunModes::Application) {
306 Lox_Verbose("Writing file: {}", path )
307
308 Path tempPath;
309 tempPath << path << ".tmp";
310 ofstream outFile(tempPath.Terminate());
311 if ( !outFile.is_open() ) {
312 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
313 tempPath);
314 return true;
315 }
316 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
317 "Write buffer overflow detected" )
318 outFile.write(writeBuffer.Buffer() + writeBufferPadSize, wb - writeBufferStart - writeBufferPadSize);
319 outFile.close();
320
321 if ( outFile.fail() ) {
322 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
323 tempPath);
324 return true;
325 }
326
327 htmlFile.Close();
328 std::error_code ec;
329 std::filesystem::rename(tempPath.Terminate(), path.Terminate(), ec);
330 if ( ec.value() != 0 ) {
331 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
332 path, ec);
333 return true;
334 }
335 }
336 return true;
337}
338} //namespace [dxl]
339
#define ALIB_LOCK_SHARED_WITH(lock)
#define ALIB_ASSERT(cond, domain)
#define ALIB_ASSERT_ERROR(cond, domain,...)
#define ALIB_LOCK_WITH(lock)
#define Lox_Info(...)
#define Lox_SetDomain(...)
#define Lox_GetVerbosity(result,...)
#define Lox_Verbose(...)
#define Lox_Warning(...)
constexpr const TChar * Terminate() const
integer Capacity() const
TAString & DeleteEnd(const TString< TChar > &deleteIfMatch)
TChar * VBuffer() const
TAString & Trim(const TCString< TChar > &trimChars=CStringConstantsTraits< TChar >::DefaultWhitespaces())
void EnsureRemainingCapacity(integer spaceNeeded)
void SetLength(integer newLength)
constexpr integer Length() const
TChar CharAtStart() const
TChar CharAt(integer idx) const
constexpr bool IsNotEmpty() const
constexpr const TChar * Buffer() const
bool StartsWith(const TString &needle) const
class DXLApp
Definition dxlapp.hpp:37
bool IsEOF() const noexcept
std::size_t Remaining() const noexcept
const std::byte * Current() const noexcept
void Skip(std::size_t n)
Data Open(const char *path, std::size_t knownSize=std::numeric_limits< std::size_t >::max(), bool disableMMap=false)
void Close() noexcept
Release resources (unmap / free buffer).
int Size() const
Definition styles.hpp:96
const alib::String & Get(int idx) const
Definition styles.hpp:113
TApp & Get()
bool Equal(const TChar *lhs, const TChar *rhs, integer cmpLength)
lox::Verbosity Verbosity
strings::TFill< character > Fill
lang::integer integer
strings::TString< character > String
system::Path Path
LocalString< 128 > String128
LocalString< 256 > String256
strings::TAString< character, lang::HeapAllocator > AString
std::vector< T, StdMA< T > > StdVectorMA
LocalString< 512 > String512
todox
Definition doxyfile.cpp:20
void ConvertHTMLEntitiesToAscii(alib::AString &buffer)
Definition dxl.cpp:104
@ CantOpenHMLFile
A HTML file was not found or could not be accessed.
Definition dxl.hpp:96
DXLPoolWorker * poolWorker
The pool worker that executes this job.
bool Do() override
alib::files::File htmlFileNode
The HTML-file to load and search for DoxygenXLinks links.
Definition jobs.hpp:53
alib::String HTMLBaseURL
Definition index.hpp:391
alib::String HTMLFile
Definition index.hpp:395
alib::String HTMLAnchor
The HTML anchor hash. Set only with members.
Definition index.hpp:398