Quite useful if you are writing an browser extension or something.
To get all the images in DOM there are actually three places we are going to look at: <img>
element, background-image
CSS property and, <iframe>
. Yes, every iframe hides a magical kingdom.
img
If you only want to get images in <img>
, two options for you to choose:
You can either search the DOM for img
tag:
1 2 3 4 5 6 7 8 9 10
function getImgs (doc ) { return Array .from(doc.getElementsByTagName('img' )) .map(img => ({ src: img.currentSrc, width: img.naturalWidth, height: img.naturalHeight })) } getImgs(document )
Or just use document.images :
1 2 3 4 5 6 7 8 9 10
function getImgs (doc ) { return Array .from(doc.images) .map(img => ({ src: img.currentSrc, width: img.naturalWidth, height: img.naturalHeight })) } getImgs(document )
background-image
For background-image
, we need to check every node in DOM:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
function getBgImgs (doc ) { const srcChecker = /url\(\s*?['"]?\s*?(\S+?)\s*?["']?\s*?\)/i return Array .from( Array .from(doc.querySelectorAll('*' )) .reduce((collection, node ) => { let prop = window .getComputedStyle(node, null ) .getPropertyValue('background-image' ) let match = srcChecker.exec(prop) if (match) { collection.add(match[1 ]) } return collection }, new Set ()) ) } getBgImgs(document )
We can't simply get the width and height of a background image. If you need them, you have to load it.
Since the images you get in DOM are most likely already in the browser cache, the loading process should be fairly quick.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
function loadImg (src, timeout = 500 ) { var imgPromise = new Promise ((resolve, reject ) => { let img = new Image() img.onload = () => { resolve({ src: src, width: img.naturalWidth, height: img.naturalHeight }) } img.onerror = reject img.src = src }) var timer = new Promise ((resolve, reject ) => { setTimeout(reject, timeout) }) return Promise .race([imgPromise, timer]) } function loadImgAll (imgList, timeout = 500 ) { return new Promise ((resolve, reject ) => { Promise .all( imgList .map(src => loadImg(src, timeout)) .map(p => p.catch(e => false )) ).then(results => resolve(results.filter(r => r))) }) } loadImgAll(getBgImgs(document )).then(imgs => console .log(imgs))
iframe
Just recursively search in all iframes
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
function searchIframes (doc ) { var imgList = [] doc.querySelectorAll('iframe' ) .forEach(iframe => { try { iframeDoc = iframe.contentDocument || iframe.contentWindow.document imgList = imgList.concat(getImgs(iframeDoc) || []) imgList = imgList.concat(searchIframes(iframeDoc) || []) } catch (e) { } }) return imgList } searchIframes(document )
Together
Can be used out of the box. It was made when I was writing a Chrome Extension .
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
function getImgAll (doc ) { return new Promise ((resolve, reject ) => { loadImgAll(Array .from(searchDOM(doc))) .then(resolve, reject) }) function searchDOM (doc ) { const srcChecker = /url\(\s*?['"]?\s*?(\S+?)\s*?["']?\s*?\)/i return Array .from(doc.querySelectorAll('*' )) .reduce((collection, node ) => { let prop = window .getComputedStyle(node, null ) .getPropertyValue('background-image' ) let match = srcChecker.exec(prop) if (match) { collection.add(match[1 ]) } if (/^img$/i .test(node.tagName)) { collection.add(node.src) } else if (/^frame$/i .test(node.tagName)) { try { searchDOM(node.contentDocument || node.contentWindow.document) .forEach(img => { if (img) { collection.add(img) } }) } catch (e) {} } return collection }, new Set ()) } function loadImg (src, timeout = 500 ) { var imgPromise = new Promise ((resolve, reject ) => { let img = new Image() img.onload = () => { resolve({ src: src, width: img.naturalWidth, height: img.naturalHeight }) } img.onerror = reject img.src = src }) var timer = new Promise ((resolve, reject ) => { setTimeout(reject, timeout) }) return Promise .race([imgPromise, timer]) } function loadImgAll (imgList, timeout = 500 ) { return new Promise ((resolve, reject ) => { Promise .all( imgList .map(src => loadImg(src, timeout)) .map(p => p.catch(e => false )) ).then(results => resolve(results.filter(r => r))) }) } } getImgAll(document ).then(list => console .log(list))
[EOF]