Daily-Question icon indicating copy to clipboard operation
Daily-Question copied to clipboard

【Q411】如何找到当前页面出现次数最多的HTML标签

Open shfshanyue opened this issue 4 years ago • 13 comments

shfshanyue avatar Aug 25 '20 00:08 shfshanyue

这是一道前端基础与编程功底具备的面试题:

  • 如果你前端基础强会了解 document.querySelectorAll(*) 能够列出页面内所有标签
  • 如果你编程能力强能够用递归/正则快速实现同等的效果

有三种 API 可以列出页面所有标签:

  1. document.querySelectorAll('*'),标准规范实现
  2. $$('*'),devtools 实现
  3. document.all,非标准规范实现
> document.querySelectorAll('*')
< NodeList(593) [html, head, meta, meta, meta, meta, meta, meta, meta, title, link#favicon, link, link#MainCss, link#mobile-style, link, link, link, script, script, script, script, script, script, script, link, script, link, link, script, input#_w_brink, body, a, div#home, div#header, div#blogTitle, a#lnkBlogLogo, img#blogLogo, h1, a#Header1_HeaderTitle.headermaintitle.HeaderMainTitle, h2, div#navigator, ul#navList, li, a#blog_nav_sitehome.menu, li, a#blog_nav_myhome.menu, li, a#blog_nav_newpost.menu, li, a#blog_nav_contact.menu, li, a#blog_nav_rss.menu, li, a#blog_nav_admin.menu, div.blogStats, span#stats_post_count, span#stats_article_count, span#stats-comment_count, div#main, div#mainContent, div.forFlow, div#post_detail, div#topics, div.post, h1.postTitle, a#cb_post_title_url.postTitle2.vertical-middle, span, div.clear, div.postBody, div#cnblogs_post_body.blogpost-body, p, p, strong, p, p, p, strong, div.cnblogs_code, pre, span, span, span, span, span, p, span, strong, pre, strong, span, strong, br, br, br, div.cnblogs_code, pre, span, span, p, p, …]
[0 … 99]
[100 … 199]
[200 … 299]
[300 … 399]
[400 … 499]
[500 … 592]
__proto__: NodeList

使用 document.querySelectorAll 实现如下

// 实现一个 maxBy 方便找出出现次数最多的 HTML 标签
const maxBy = (list, keyBy) => list.reduce((x, y) => keyBy(x) > keyBy(y) ? x : y)

function getFrequentTag () {
  const tags = [...document.querySelectorAll('*')].map(x => x.tagName).reduce((o, tag) => { 
    o[tag] = o[tag] ? o[tag] + 1 : 1;
    return o
  }, {})
  return maxBy(Object.entries(tags), tag => tag[1])
}

使用 element.children 递归迭代如下 (最终结果多一个 document)

function getAllTags(el = document) {
  const children = Array.from(el.children).reduce((x, y) => [...x, y.tagName, ...getAllTags(y)], [])
  return children
}

// 或者通过 flatMap 实现
function getAllTags(el = document) {
  const children = Array.prototype.flatMap.call(el.children, x => getAllTags(x))
  return [el, ...children]
}

如果你已经快速答了上来,那么还有两道拓展的面试题在等着你

  1. 如何找到当前页面出现次数前三多的 HTML 标签
  2. 如过多个标签出现次数同样多,则取多个标签

shfshanyue avatar Aug 26 '20 02:08 shfshanyue

使用document.querySelectorAll实现如下(包括可能次数一样多的标签)

function getMostFrequentTag() {
  const counter = {};

  document.querySelectorAll("*").forEach((element) => {
    counter[element.tagName] = counter[element.tagName]
      ? counter[element.tagName] + 1
      : 1;
  });

  const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
    if (tag1[1] < tag2[1]) {
      return 1;
    }
    if (tag1[1] > tag2[1]) {
      return -1;
    }
    return 0;
  });

  const result = [];
  for (const tag of orderedTags) {
    if (tag[1] < orderedTags[0][1]) {
      break;
    }
    result.push(tag[0]);
  }
  return result;
}

Harry3014 avatar Mar 04 '21 15:03 Harry3014

使用Element.children递归实现如下

function getMostFrequentTag() {
  const counter = {};

  const traversalElement = (parent) => {
    if (parent.tagName !== undefined) {
      counter[parent.tagName] = counter[parent.tagName]
        ? counter[parent.tagName] + 1
        : 1;
    }
    const children = parent.children;
    for (let i = 0, length = children.length; i < length; i++) {
      traversalElement(children[i]);
    }
  };

  traversalElement(document);

  const orderedTags = Object.entries(counter).sort((tag1, tag2) => {
    if (tag1[1] < tag2[1]) {
      return 1;
    }
    if (tag1[1] > tag2[1]) {
      return -1;
    }
    return 0;
  });

  const result = [];
  for (const tag of orderedTags) {
    if (tag[1] < orderedTags[0][1]) {
      break;
    }
    result.push(tag[0]);
  }
  return result;
}

Harry3014 avatar Mar 04 '21 16:03 Harry3014

codepen demo

const allElements = document.querySelectorAll("*")
const elementFrequency = Array.from(allElements).reduce((a,b) => {
  a[b.tagName] = a[b.tagName] ? a[b.tagName]+1 : 1
  return a
}, {})
console.log(elementFrequency)

const sortedElementFrequency = Object.entries(elementFrequency).sort((a, b) => b[1] - a[1])
console.log(sortedElementFrequency)

const copiedElementFrequency = JSON.parse(JSON.stringify(sortedElementFrequency))
const mergedElementFrequency = copiedElementFrequency.reduce((a,b) => {
  if (a.length === 0) {
    a.push(b)
    return a
  }
  let lastItem = a[a.length - 1]
  if (lastItem[1] === b[1]) {
    // if (Array.isArray(lastItem[0])) {
    //   lastItem[0].push(b[0])
    // } else {
    //   lastItem[0] = [lastItem[0], b[0]]
    // }
    lastItem[0] = Array.isArray(lastItem[0]) ? lastItem[0].concat([b[0],]) : [lastItem[0], b[0]]
  } else {
    a.push(b)
  }
  return a
}, [])
console.log(mergedElementFrequency)

hwb2017 avatar Oct 11 '21 13:10 hwb2017

// 获取当前页面所有HTML标签
const allelements = document.querySelectorAll('*')

function findMost(arr) {
  let temp = {}
  let maxNum = 0
  let maxEle = null
  for (let i = 0; i < arr.length; i++) {
    let ele = arr[i].tagName  // 标签名
    temp[ele] === undefined ? temp[ele] = 1 : temp[ele]++
    if (temp[ele] > maxNum) {
      maxNum = temp[ele]
      maxEle = ele
    }
  }
  // 应考虑次数相同的情况
  let eleArry=[]
  for (let key in temp) {
    if(temp[key]===maxNum){
      eleArry.push(key)
    }
  }
  return { eleArry ,maxNum}
}
let result = findMost(allelements)
console.log(result);

ethanlamm avatar Sep 06 '22 08:09 ethanlamm

// 利用hash
const map =new Map();
[...document.querySelectorAll("*")].forEach(item => {
    const tagName = item.tagName.toLowerCase(); 
    map.set( tagName , map.has(tagName) ? map.get(tagName)+1 : 1);
});

hatedMe avatar Apr 18 '23 09:04 hatedMe

function findMostEle() {
  const els = document.querySelectorAll("*");
  const map = new Map();
  for (let i = 0; i < els.length; i++) {
    const el = els[i];
    const tag = el.tagName;
    if (map.has(tag)) {
      map.set(tag, map.get(tag) + 1);
    } else {
      map.set(tag, 1);
    }
  }
  return [...map].sort((a, b) => b[1] - a[1]);
}

Ghaining avatar Aug 08 '23 21:08 Ghaining

@Ghaining markdown 没有标记 javascript 语言呀

shfshanyue avatar Aug 08 '23 22:08 shfshanyue

文中有个代码有错 使用 element.children 递归迭代如下 (最终结果多一个 document) 修改如下:

function getAllTags(el = document) {
  const children = Array.from(el.children).reduce(
    (x, y) => [...x, y.tagName, ...getAllTags(y)],
    []
  );
  return children;
}

601odd avatar Sep 07 '23 07:09 601odd

@601odd 已修复

shfshanyue avatar Sep 07 '23 23:09 shfshanyue

// 找到当前页面出现次数前几位的 HTML 标签
// 如果多个标签出现次数同样多,则取多个标签
function getMaxFreguentTag(top = 1) {
    const tags = [...document.querySelectorAll('*')]
        .map(el => el.tagName)
        .reduce((res, tag) => {
            res[tag] = res[tag] ? res[tag] + 1 : 1
            return res
        }, {})
    
    // 利用数组把标签排序
    const sortedTags = []
    for (const [k, v] of Object.entries(tags)) {
        sortedTags[v] ||= []
        sortedTags[v].push(k)
    }

    // 数组末尾 top 个非空元素,即所要的结果
    const res = []
    const len = Math.min(top, sortedTags.length)
    for (let i = 0; i < len;) {
        const tag = sortedTags.pop()
        if (tag) {
            res.push(...tag) // 包含同频次标签
            i++
        }
    }
    return res
}

justorez avatar Oct 08 '23 08:10 justorez

Object.entries($$('*').map(it => it.tagName.toLowerCase()).reduce((cntArr, tag) => { cntArr[tag] = cntArr[tag] ? cntArr[tag] + 1 : 1; return cntArr }, {})).reduce((x, y) => x[1] > y[1] ? x : y)

Si3ver avatar Nov 07 '23 07:11 Si3ver