Autodetect and build a TSV link mapping to convert leftover Confluence links to XWiki document links using XDOM
Last modified by Nikita Petrenko on 2025/02/12 12:24
![]() | After a Confluence migration, some Confluence absolute links linking to documents may still be there. This snippet autodetects them and tries to find the documents they point to. It produces a TSV mapping that can be used to fix these links. |
Type | Snippet |
Category | Other |
Developed by | |
Rating | |
License | GNU Lesser General Public License 2.1 |
Table of contents
Description
This snippet produces a TSV mapping to be used with this other snippet: Bulk update links according to a TSV mapping using XDOM.
{{velocity}}
#if (!$request.confirm)
#set ($spacePickerParams = {
'name': 'targetSpace',
'value': "$!{request.targetSpace}"
})
This script allows generating a TSV file to update confluence links in XWiki documents. It is particularily useful after content migrations.
For each document part of a given space, XWiki will find confluence links that can be converted to a proper wiki link, and compute this new link. The result is a file in which each line is the old link and the new line separated by a tab, to be reviewed and to be used with another snippet to apply the conversion. Each line of this file will look like:
{{{oldlink<TAB>newlink}}}
For instance:
{{{http://confluence.example.org/display/myspace/mydoc doc:myspace.mydoc.WebHome}}}
If no tabs are present in the generated links, the result is compatible with TSV (CSV like with a tab character as the separator).
Programming rights are required to use this script.
#set($attachments = $doc.getAttachmentList())
#if ($attachments.empty)
First, please attach a TSV file to use for this document and [[refresh the page>>$doc.fullName]].
#else
{{html clean="false"}}
<form class="xform" action="?xpage=plain&outputSyntax=plain" method="post">
<dl>
<dt>
<label for="targetSpace">Space</label>
<span class="xHint">The link converter job will execute for every document under the given space.</span>
</dt>
<dd>
#pagePicker($spacePickerParams)
</dd>
<dt>
<label for="confluenceBaseURL">Confluence base URL:</label>
<span class="xHint">The link converter job will find links with this base URL.</span>
</dt>
<dd>
<input type="text" name="confluenceBaseURL" required="required" />
</dd>
</dl>
<p>
<span class="buttonwrapper">
<input type="hidden" name="form_token" value="$!{services.csrf.token}"/>
<input type="hidden" name="confirm" value="true"/>
<input class="button" type="submit" value="Convert links"/>
</span>
</p>
</form>
{{/html}}
#end
#end
{{/velocity}}
{{groovy wiki="false"}}
import org.apache.commons.lang3.StringUtils;
import org.xwiki.query.QueryManager;
import org.xwiki.rendering.block.Block;
import org.xwiki.rendering.block.MacroBlock;
import org.xwiki.rendering.block.LinkBlock;
import org.xwiki.rendering.block.match.ClassBlockMatcher;
import org.xwiki.rendering.macro.Macro;
import org.xwiki.rendering.transformation.MacroTransformationContext;
import org.xwiki.rendering.listener.reference.ResourceReference;
import org.xwiki.rendering.listener.reference.ResourceType;
import org.xwiki.model.reference.*;
import java.net.URLDecoder;
logger = services.logging.getLogger('ConfluenceLinkConverter');
services.logging.setLevel('ConfluenceLinkConverter', org.xwiki.logging.LogLevel.INFO);
componentManager = services.component.getComponentManager();
documentReferenceResolver = componentManager.getInstance(DocumentReferenceResolver.TYPE_STRING);
entityReferenceSerializer = componentManager.getInstance(EntityReferenceSerializer.class);
queryManager = componentManager.getInstance(QueryManager.class);
solr = services.solr;
def verifyXDOM(xdom, syntaxId, currentDocumentReference, confluenceBaseURL, linkMapping) {
// First, update any document macro that could contain nested content
xdom.getBlocks(new ClassBlockMatcher(MacroBlock.class), Block.Axes.DESCENDANT_OR_SELF).each { block ->
logger.debug('Checking macro [{}] - [{}]', block.getId(), block.getClass());
if (componentManager.hasComponent(Macro.class, block.getId())) {
// Check if the macro content is wiki syntax, in which case we'll also verify the contents of the macro
def macroContentDescriptor = componentManager.getInstance(Macro.class, block.getId()).getDescriptor().getContentDescriptor();
if (macroContentDescriptor != null && macroContentDescriptor.getType().equals(Block.LIST_BLOCK_TYPE) && StringUtils.isNotBlank(block.getContent())) {
// We will take a quick shortcut here and directly parse the macro content with the syntax of the document
logger.debug('Calling parse on [{}] with syntax [{}]', block.getId(), syntaxId)
def macroXDOM = services.rendering.parse(block.getContent(), syntaxId);
verifyXDOM(macroXDOM, syntaxId, currentDocumentReference, confluenceBaseURL, linkMapping);
}
}
}
xdom.getBlocks(new ClassBlockMatcher(LinkBlock.class), Block.Axes.DESCENDANT_OR_SELF).each { block ->
def linkRef = block.getReference();
if (ResourceType.URL.equals(linkRef.getType())) {
// we only consider URL links
def link = linkRef.getReference();
if (link.startsWith(confluenceBaseURL)) {
// compute new link
// TODO take care of the anchor
// https://solr.apache.org/guide/8_7/the-standard-query-parser.html
def decodedPath = URLDecoder.decode(link.substring(confluenceBaseURL.length()).replaceAll('#.*$', ''), 'UTF-8');
def specialSolrChars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', "'", '^', '"', '~', '*', '?', ':', '/'];
for (def c in specialSolrChars) {
decodedPath = decodedPath.replace(c, '\\' + c);
}
def candidateNameWithoutSpace = String.join(".", decodedPath.replace('\s', '').split('/'));
def candidateName = String.join(".", decodedPath.split('/'));
def results = (
queryManager
.createQuery('(fullname:' + candidateNameWithoutSpace + '.WebHome~2) or (fullname:' + candidateNameWithoutSpace + '~2)' + 'or (fullname:*' + candidateName + '*)', 'solr')
.bindValue('sort', "score desc")
.bindValue('fq', 'type:DOCUMENT')
.setLimit(1)
.execute().get(0).getResults()
);
if (results.empty) {
logger.info("Could not find any document for the link [{}]", link);
} else {
def documentReference = solr.resolveDocument(results.get(0));
logger.info("Found document [{}] for the link [{}]", documentReference, link);
linkMapping.put(link, "doc:" + entityReferenceSerializer.serialize(documentReference));
}
}
}
}
}
if (hasProgramming && services.csrf.isTokenValid(request.form_token)) {
// Check if we have enough to work on
if (request.targetSpace && StringUtils.isNotBlank(request.targetSpace) && request.confluenceBaseURL && StringUtils.isNotBlank(request.confluenceBaseURL)) {
def linkMapping = new HashMap();
def spacePrefix = "${StringUtils.removeEnd(request.targetSpace, 'WebHome')}%";
// Get every page matching the space
def documents = services.query.hql('select doc.fullName from XWikiDocument doc where doc.fullName like :spacePrefix').bindValue('spacePrefix', spacePrefix.toString()).execute();
logger.debug('Space prefix: [{}]', spacePrefix)
logger.debug('Found [{}] documents to verify', documents.size())
documents.each { documentFullName ->
try {
def document = xwiki.getDocument(documentFullName);
logger.info('Verifying document [{}]', document.getDocumentReference());
def xdom = document.getXDOM();
verifyXDOM(xdom, document.getSyntaxId(), document.getDocumentReference(), request.confluenceBaseURL + "/display/", linkMapping);
} catch (Exception e) {
logger.error('Uncaught exception', e);
}
}
response.setContentType("text/plain;charset=utf-8");
for (def entry in linkMapping.entrySet()) {
print(entry.getKey() + "\t" + entry.getValue() + "\n");
}
} else {
logger.error('Insufficient parameters. Please provide a target space and an attachment name to use. Aborting.');
}
} else {
logger.error('Insufficient permissions or invalid CSRF token. Aborting.')
}
{{/groovy}}