Autodetect and build a TSV link mapping to convert leftover Confluence links to XWiki document links  using XDOM

Last modified by Nikita Petrenko on 2025/02/12 12:24

cogAfter a Confluence migration, some Confluence absolute links linking to documents may still be there. This snippet autodetects them and tries to find the documents they point to. It produces a TSV mapping that can be used to fix these links.
TypeSnippet
CategoryOther
Developed by

Raphaƫl Jakse

Rating
0 Votes
LicenseGNU Lesser General Public License 2.1

Table of contents

Description

This snippet produces a TSV mapping to be used with this other snippet: Bulk update links according to a TSV mapping using XDOM.

{{velocity}}
#if (!$request.confirm)
  #set ($spacePickerParams = {
    'name': 'targetSpace',
    'value': "$!{request.targetSpace}"
  })
  This script allows generating a TSV file to update confluence links in XWiki documents. It is particularily useful after content migrations.

  For each document part of a given space, XWiki will find confluence links that can be converted to a proper wiki link, and compute this new link. The result is a file in which each line is the old link and the new line separated by a tab, to be reviewed and to be used with another snippet to apply the conversion. Each line of this file will look like:

  {{{oldlink<TAB>newlink}}}

  For instance:

  {{{http://confluence.example.org/display/myspace/mydoc    doc:myspace.mydoc.WebHome}}}

  If no tabs are present in the generated links, the result is compatible with TSV (CSV like with a tab character as the separator).

  Programming rights are required to use this script.

  #set($attachments = $doc.getAttachmentList())
  #if ($attachments.empty)
  First, please attach a TSV file to use for this document and [[refresh the page>>$doc.fullName]].
  #else

  {{html clean="false"}}
  <form class="xform" action="?xpage=plain&outputSyntax=plain" method="post">
    <dl>
      <dt>
        <label for="targetSpace">Space</label>
        <span class="xHint">The link converter job will execute for every document under the given space.</span>
      </dt>
      <dd>
        #pagePicker($spacePickerParams)
      </dd>
      <dt>
        <label for="confluenceBaseURL">Confluence base URL:</label>
        <span class="xHint">The link converter job will find links with this base URL.</span>
      </dt>
      <dd>
        <input type="text" name="confluenceBaseURL" required="required" />
      </dd>
    </dl>
    <p>
      <span class="buttonwrapper">
        <input type="hidden" name="form_token" value="$!{services.csrf.token}"/>
        <input type="hidden" name="confirm" value="true"/>
        <input class="button" type="submit" value="Convert links"/>
      </span>
    </p>
  </form>
  {{/html}}

  #end
#end
{{/velocity}}

{{groovy wiki="false"}}
  import org.apache.commons.lang3.StringUtils;
  import org.xwiki.query.QueryManager;
  import org.xwiki.rendering.block.Block;
  import org.xwiki.rendering.block.MacroBlock;
  import org.xwiki.rendering.block.LinkBlock;
  import org.xwiki.rendering.block.match.ClassBlockMatcher;
  import org.xwiki.rendering.macro.Macro;
  import org.xwiki.rendering.transformation.MacroTransformationContext;
  import org.xwiki.rendering.listener.reference.ResourceReference;
  import org.xwiki.rendering.listener.reference.ResourceType;
  import org.xwiki.model.reference.*;
  import java.net.URLDecoder;

  logger = services.logging.getLogger('ConfluenceLinkConverter');
  services.logging.setLevel('ConfluenceLinkConverter', org.xwiki.logging.LogLevel.INFO);
  componentManager = services.component.getComponentManager();
  documentReferenceResolver = componentManager.getInstance(DocumentReferenceResolver.TYPE_STRING);
  entityReferenceSerializer = componentManager.getInstance(EntityReferenceSerializer.class);
  queryManager = componentManager.getInstance(QueryManager.class);
  solr = services.solr;

  def verifyXDOM(xdom, syntaxId, currentDocumentReference, confluenceBaseURL, linkMapping) {
    // First, update any document macro that could contain nested content
    xdom.getBlocks(new ClassBlockMatcher(MacroBlock.class), Block.Axes.DESCENDANT_OR_SELF).each { block ->
      logger.debug('Checking macro [{}] - [{}]', block.getId(), block.getClass());
      if (componentManager.hasComponent(Macro.class, block.getId())) {
        // Check if the macro content is wiki syntax, in which case we'll also verify the contents of the macro
        def macroContentDescriptor = componentManager.getInstance(Macro.class, block.getId()).getDescriptor().getContentDescriptor();
        if (macroContentDescriptor != null && macroContentDescriptor.getType().equals(Block.LIST_BLOCK_TYPE) && StringUtils.isNotBlank(block.getContent())) {
          // We will take a quick shortcut here and directly parse the macro content with the syntax of the document
          logger.debug('Calling parse on [{}] with syntax [{}]', block.getId(), syntaxId)
          def macroXDOM = services.rendering.parse(block.getContent(), syntaxId);
          verifyXDOM(macroXDOM, syntaxId, currentDocumentReference, confluenceBaseURL, linkMapping);
        }
      }
    }

    xdom.getBlocks(new ClassBlockMatcher(LinkBlock.class), Block.Axes.DESCENDANT_OR_SELF).each { block ->
      def linkRef = block.getReference();
      if (ResourceType.URL.equals(linkRef.getType())) {
        // we only consider URL links
        def link = linkRef.getReference();
        if (link.startsWith(confluenceBaseURL)) {
          // compute new link
          // TODO take care of the anchor
          // https://solr.apache.org/guide/8_7/the-standard-query-parser.html
          def decodedPath = URLDecoder.decode(link.substring(confluenceBaseURL.length()).replaceAll('#.*$', ''), 'UTF-8');
          def specialSolrChars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', "'", '^', '"', '~', '*', '?', ':', '/'];
          for (def c in specialSolrChars) {
            decodedPath = decodedPath.replace(c, '\\' + c);
          }
          def candidateNameWithoutSpace = String.join(".", decodedPath.replace('\s', '').split('/'));
          def candidateName = String.join(".", decodedPath.split('/'));
          def results = (
            queryManager
              .createQuery('(fullname:' + candidateNameWithoutSpace + '.WebHome~2) or (fullname:' + candidateNameWithoutSpace + '~2)' + 'or (fullname:*' + candidateName + '*)', 'solr')
              .bindValue('sort', "score desc")
              .bindValue('fq', 'type:DOCUMENT')
              .setLimit(1)
              .execute().get(0).getResults()
          );
          if (results.empty) {
            logger.info("Could not find any document for the link [{}]", link);
          } else {
            def documentReference = solr.resolveDocument(results.get(0));
            logger.info("Found document [{}] for the link [{}]", documentReference, link);
            linkMapping.put(link, "doc:" + entityReferenceSerializer.serialize(documentReference));
          }
        }
      }
    }
  }

  if (hasProgramming && services.csrf.isTokenValid(request.form_token)) {
    // Check if we have enough to work on
    if (request.targetSpace && StringUtils.isNotBlank(request.targetSpace) && request.confluenceBaseURL && StringUtils.isNotBlank(request.confluenceBaseURL)) {
      def linkMapping = new HashMap();
      def spacePrefix = "${StringUtils.removeEnd(request.targetSpace, 'WebHome')}%";

      // Get every page matching the space
      def documents = services.query.hql('select doc.fullName from XWikiDocument doc where doc.fullName like :spacePrefix').bindValue('spacePrefix', spacePrefix.toString()).execute();
      logger.debug('Space prefix: [{}]', spacePrefix)
      logger.debug('Found [{}] documents to verify', documents.size())
      documents.each { documentFullName ->
        try {
          def document = xwiki.getDocument(documentFullName);
          logger.info('Verifying document [{}]', document.getDocumentReference());
          def xdom = document.getXDOM();
          verifyXDOM(xdom, document.getSyntaxId(), document.getDocumentReference(), request.confluenceBaseURL + "/display/", linkMapping);
        } catch (Exception e) {
          logger.error('Uncaught exception', e);
        }
      }
      response.setContentType("text/plain;charset=utf-8");
      for (def entry in linkMapping.entrySet()) {
        print(entry.getKey() + "\t" + entry.getValue() + "\n");
      }
    } else {
      logger.error('Insufficient parameters. Please provide a target space and an attachment name to use. Aborting.');
    }
  } else {
    logger.error('Insufficient permissions or invalid CSRF token. Aborting.')
  }
{{/groovy}}

Get Connected