Anchor text, which is the text part of a hyperlink, plays a crucial role on a webpage.By clicking on the anchor text, users can conveniently navigate between web pages, thereby greatly enhancing the user experience.At the same time, anchor text also plays an indispensable role in the field of Search Engine Optimization (SEO).Search engines analyze the content of anchor text to judge the theme and relevance of the linked page, thereby affecting the page's ranking.Therefore, setting anchor text reasonably is of great importance for improving the SEO effect of a website.

Background and requirements for automated anchor text implementation

As the content of the website continues to increase, manually setting anchor text becomes more cumbersome and time-consuming.To improve work efficiency, reduce labor costs, and make the implementation of automated anchor text a pressing need.By means of automation, it can quickly generate a large number of anchor texts, improve the efficiency of website internal link construction, and thereby enhance the SEO effect of the website.

Collecting anchor text

Before implementing automated anchor text, it is first necessary to collect the text content from the website.The following takes the automated anchor text feature of AnQi CMS as an example, and introduces the collection strategy of anchor text.

AnQi CMS provides two ways to collect anchor text: automatic extraction and manual extraction.If automatic anchor text extraction is selected, the program will automatically parse the keywords of the article when the user adds an article, and add the keywords as the anchor text of the current article.If handled manually, it provides two ways: filling in keywords individually and importing keywords in bulk.

Strategy for automated anchor text

Developing an automated anchor text strategy is a key step. This includes determining the rules for generating anchor text, such as keyword selection, anchor text length, and position, etc.

  • Custom Anchor Text Density
    AnQi CMS provides the option to customize the density of anchor text when processing the generation strategy, allowing users to freely choose the density of the anchor text.

  • Match by keyword from longest to shortest
    The AnQi CMS adopts a length-first strategy, which means that if there are different anchor texts in the content, the longest anchor text is prioritized. When a keyword like AAB appears in the content, the anchor text will be AAB, not AA or AB

  • Match only once
    If there are multiple identical anchor text keywords in an article, only the first keyword should be given anchor text, and the subsequent keywords should only be bolded to ensure that the same keyword anchor text appears only once in a content and the same URL is only anchored once, others will be displayed in bold.

  • Anchor text generation method
    The AnQi CMS adopts a length-first strategy. That is, if there are different anchor texts for the same link, the longest anchor text is used first.If there are multiple identical anchor text keywords in an article, only the first keyword is given anchor text, and subsequent keywords are only bolded.

The Anqi CMS adopts two methods of automatic insertion of keywords and manual batch updating of keywords. If automatic insertion of keywords is selected, it will automatically replace the appropriate keywords in the article with anchor text when publishing, thus realizing the generation of anchor text.If you select manual batch update of keywords, the batch update anchor text function will be provided on the anchor text page, and users can manually update anchor text according to their own needs.

Implementation code for automated anchor text

Description: Since AnQiCMS is developed using GoLang, the following code is an implementation in GoLang language.

func AutoInsertAnchors(anchors []*model.Anchor, content string, link string) string {
	if len(anchors) == 0 {
		//没有关键词,终止执行
		return ""
	}

	//获取纯文本字数
	stripedContent := library.StripTags(content)
	contentLen := len([]rune(stripedContent))
  // 获取锚文本密度
	if PluginAnchor.AnchorDensity < 20 {
		//默认设置200
		PluginAnchor.AnchorDensity = 200
	}

	// 判断是否是Markdown,如果开头是标签,则认为不是Markdown
	isMarkdown := false
	if !strings.HasPrefix(strings.TrimSpace(content), "<") {
		isMarkdown = true
	}
	//计算最大可以替换的数量
	maxAnchorNum := int(math.Ceil(float64(contentLen) / float64(PluginAnchor.AnchorDensity)))
  // 定义一个替换结构体,用于存储替换的内容
	type replaceType struct {
		Key   string
		Value string
	}
  // 记录已存在的关键词和链接
	existsKeywords := map[string]bool{}
	existsLinks := map[string]bool{}

	var replacedMatch []*replaceType
	numCount := 0
	//所有的a标签计数,并临时替换掉,防止后续替换影响
	reg, _ := regexp.Compile("(?i)<a[^>]*>(.*?)</a>")
	content = reg.ReplaceAllStringFunc(content, func(s string) string {

		reg := regexp.MustCompile("(?i)<a\\s*[^>]*href=[\"']?([^\"']*)[\"']?[^>]*>(.*?)</a>")
		match := reg.FindStringSubmatch(s)
		if len(match) > 2 {
			existsKeywords[strings.ToLower(match[2])] = true
			existsLinks[strings.ToLower(match[1])] = true
		}

		key := fmt.Sprintf("{$%d}", numCount)
		replacedMatch = append(replacedMatch, &replaceType{
			Key:   key,
			Value: s,
		})
		numCount++

		return key
	})
	//所有的strong标签替换掉
	reg, _ = regexp.Compile("(?i)<strong[^>]*>(.*?)</strong>")
	content = reg.ReplaceAllStringFunc(content, func(s string) string {
		key := fmt.Sprintf("{$%d}", numCount)
		replacedMatch = append(replacedMatch, &replaceType{
			Key:   key,
			Value: s,
		})
		numCount++

		return key
	})
  // 匹配 Markdown 格式的锚文本,同时要考虑别替换掉图片
	// [keyword](url)
	reg, _ = regexp.Compile(`(?i)(.?)\[(.*?)]\((.*?)\)`)
	content = reg.ReplaceAllStringFunc(content, func(s string) string {
		match := reg.FindStringSubmatch(s)
		if len(match) > 2 && match[1] != "!" {
			existsKeywords[strings.ToLower(match[2])] = true
			existsLinks[strings.ToLower(match[3])] = true
		}

		key := fmt.Sprintf("{$%d}", numCount)
		replacedMatch = append(replacedMatch, &replaceType{
			Key:   key,
			Value: s,
		})
		numCount++

		return key
	})
  // Markdown 格式的加粗
	// **Keyword**
	reg, _ = regexp.Compile(`(?i)\*\*(.*?)\*\*`)
	content = reg.ReplaceAllStringFunc(content, func(s string) string {
		key := fmt.Sprintf("{$%d}", numCount)
		replacedMatch = append(replacedMatch, &replaceType{
			Key:   key,
			Value: s,
		})
		numCount++

		return key
	})
	//过滤所有属性,防止在自动锚文本的时候,会将标签属性也替换
	reg, _ = regexp.Compile("(?i)</?[a-z0-9]+(\\s+[^>]+)>")
	content = reg.ReplaceAllStringFunc(content, func(s string) string {
		key := fmt.Sprintf("{$%d}", numCount)
		replacedMatch = append(replacedMatch, &replaceType{
			Key:   key,
			Value: s,
		})
		numCount++

		return key
	})

	if len(existsLinks) < maxAnchorNum {
		//开始替换关键词
		for _, anchor := range anchors {
			if anchor.Title == "" {
				continue
			}
			if strings.HasSuffix(anchor.Link, link) {
				//遇到当前url,跳过
				continue
			}
			//已经存在存在的关键词,或者链接,跳过
			if existsKeywords[strings.ToLower(anchor.Title)] || existsLinks[strings.ToLower(anchor.Link)] {
				continue
			}
			//开始替换
			replaceNum := 0
			replacer := strings.NewReplacer("\\", "\\\\", "/", "\\/", "{", "\\{", "}", "\\}", "^", "\\^", "$", "\\$", "*", "\\*", "+", "\\+", "?", "\\?", ".", "\\.", "|", "\\|", "-", "\\-", "[", "\\[", "]", "\\]", "(", "\\(", ")", "\\)")
			matchName := replacer.Replace(anchor.Title)

			reg, _ = regexp.Compile(fmt.Sprintf("(?i)%s", matchName))
			content = reg.ReplaceAllStringFunc(content, func(s string) string {
				replaceHtml := ""
				key := ""
				if replaceNum == 0 {
					//第一条替换为锚文本
					if isMarkdown {
						replaceHtml = fmt.Sprintf("[%s](%s)", s, anchor.Link)
					} else {
						replaceHtml = fmt.Sprintf("<a href=\"%s\" data-anchor=\"%d\">%s</a>", anchor.Link, anchor.Id, s)
					}
					key = fmt.Sprintf("{$%d}", numCount)

					//加入计数
					existsLinks[anchor.Link] = true
					existsKeywords[anchor.Title] = true
				} else {
					//其他则加粗
					if isMarkdown {
						replaceHtml = fmt.Sprintf("**%s**", s)
					} else {
						replaceHtml = fmt.Sprintf("<strong data-anchor=\"%d\">%s</strong>", anchor.Id, s)
					}
					key = fmt.Sprintf("{$%d}", numCount)
				}
				replaceNum++

				replacedMatch = append(replacedMatch, &replaceType{
					Key:   key,
					Value: replaceHtml,
				})
				numCount++

				return key
			})

			//判断数量是否达到了,达到了就跳出
			if len(existsLinks) >= maxAnchorNum {
				break
			}
		}
	}

	//关键词替换完毕,将原来替换的重新替换回去,需要倒序
	for i := len(replacedMatch) - 1; i >= 0; i-- {
		content = strings.Replace(content, replacedMatch[i].Key, replacedMatch[i].Value, 1)
	}

  // 返回替换后的内容
	return content
}