跳转到内容

英文维基 | 中文维基 | 日文维基 | 草榴社区

User:SunAfterRain/js/Wordcount.js

维基百科,自由的百科全书
注意:保存之后,你必须清除浏览器缓存才能看到做出的更改。Google ChromeFirefoxMicrosoft EdgeSafari:按住⇧ Shift键并单击工具栏的“刷新”按钮。参阅Help:绕过浏览器缓存以获取更多帮助。
/* jshint esversion: 8 */
// <nowiki>
$.when(
	$.ready,
	mw.loader.using('ext.gadget.HanAssist')
).then((_$, require) => {
	const HanAssist = require('ext.gadget.HanAssist');
	const deleteTable = {
		tags: [
			// 表格
			'table',
			'tbody',
			'td',
			'tr',
			'th',
			'pre',
			// 樣式
			'style',
			// 標題常常解析出一堆亂象
			'h1',
			'h2',
			'h3',
			'h4',
			'h5',
			'h6',
			// 不留原始 LaTex
			// math > semantics > annotation 
			'annotation'
		],
		ids: [
			// 小作品標籤
			'stub',
			// 目錄
			'toc'
		],
		classes: [
			// NoteTA
			'noteTA',
			// 表格
			'infobox',
			'wikitable',
			'navbox',
			// <syntaxhighlight>
			'mw-highlight',
			// 圖片說明
			'thumb',
			// <reference />
			'reflist',
			'references',
			'reference',
			// 不印出來的
			'noprint',
			// 消歧義
			'hatnote',
			'navigation-not-searchable',
			// 目錄
			'toc',
			// edit
			'mw-editsection'
		]
	};
	
	if (
		mw.config.get('wgArticleId') === 0 ||
		mw.config.get('wgRevisionId') === 0 ||
		mw.config.get('wgAction') !== 'view' ||
		[
			'json',
			'javascript',
			'css',
			'sanitized-css',
			'scribunto'
		].includes(mw.config.get('wgPageContentModel', 'wikitext').toLowerCase()) ||
		$('span#redirectsub').length ||
		$('span#softredirect').length ||
		$('span#Template:Rfd').length
	){
		return;
	}
	
	function log(method, ...args) {
		args.unshift('[Wordcount] ');
		console[method](...args);
	}
	
	const wordcountPromoise = (async () => {
		const pageHtml = await $.ajax({
			url: mw.util.wikiScript(),
			data: {
				action: 'render',
				oldid: mw.config.get('wgRevisionId')
			},
			dataType: 'html'
		});

		if (typeof pageHtml !== 'string') {
			throw new Error('Fail to fetch page html.');
		}

		const $parseHTML = $($.parseHTML(pageHtml));
		$parseHTML.find(
			[
				...deleteTable.tags,
				...deleteTable.ids,
				...deleteTable.classes
			].join(', ')
		).remove();
		const countText = $parseHTML.text().replace(/\n/g, '').replace(/[ \s\t\r]+/g, ' ');

		log('debug', 'Wordcount.js 解析出文本:\n' + countText);
		return getCountOutput(countText);
	})().then((count) => {
		window.wordcount = count;
		log('info', count);
		return count;
	},(error) => {
		log('error', error);
		return String(error);
	});
	
	function cjkCount(text) {
		text = text
			.replace(/\./g, '')
			.replace(/[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DB5\u4E00-\u9FCC\uF900-\uFA6D\uFA70-\uFAD9]|[\uD840-\uD868][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|[\uD86A-\uD86C][\uDC00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D]|\uD87E[\uDC00-\uDE1D]/g, '.')
			.replace(/[^\.]/g, '');
		return text.length;
	}

	function byteCount(text) {
		text = text
			.replace(/[\u0000-\u007F]/g, '.')
			.replace(/[\u0080-\u07FF\uD800-\uDFFF]/g, '..')
			.replace(/[\u0800-\uD7FF\uE000-\uFFFF]/g, '...');
		return text.length;
	}

	function getCount(text) {
		if (text.length === 0) {
			return {
				length: 0,
				cjkCount: 0,
				byteCount: 0,
				calculusCjkCount: 0
			};
		} else {
			const cjkRet = cjkCount(text);
			return {
				length: text.length,
				cjkCount: cjkRet,
				byteCount: byteCount(text),
				calculusCjkCount: cjkRet + (text.length - cjkRet)/2
			};
		}
	}

	function getCountOutput(text) {
		const { length, cjkCount, byteCount, calculusCjkCount } = getCount(text);
		let output;
		if (window.wordcountOuputTextFormat) {
			output = String(window.wordcountOuputTextFormat);
		} else {	
			output = HanAssist.conv({
				hans: '在页面“' + mw.config.get('wgPageName') + '”中:',
				hant: '在頁面「' + mw.config.get('wgPageName') + '」中:'
			});
			if (length === 0) {
				output += HanAssist.conv({
					hans: '不存在有效字符',
					hant: '不存在有效字元'
				});
			} else {
				output +=
					'\n' +
					HanAssist.conv({
						hans: '有 $length 个字符,其中包含 $cjkCount 个汉字',
						hant: '有 $length 個字元,其中包含 $cjkCount 個漢字'
					}) +
					'\n' +
					HanAssist.conv({
						hans: '合计 $calculusCjkCount 个汉字,统计共 $byteCount 个字节',
						hant: '合計 $calculusCjkCount 個漢字,總計共 $byteCount 個位元組'
					});
			}
		}
		return output
			.replace(/\$length/g, length)
			.replace(/\$cjkCount/g, cjkCount)
			.replace(/\$calculusCjkCount/g, calculusCjkCount)
			.replace(/\$byteCount/g, byteCount);
	}

	var link = mw.util.addPortletLink(
		'p-cactions',
		'#wordcount',
		
		HanAssist.conv({
			hans: '字符统计',
			hant: '字元統計'
		})
	);
	
	$(link).on('click', function () {
		wordcountPromoise.then((count) => {
			var split = count.split('\n');
			var output = [document.createTextNode(split.shift())];
			while (split.length) {
				output.push(document.createElement('br'), document.createTextNode(split.shift()));
			}
			OO.ui.alert($('<div>').append(...output), { size: 'large' });
		});
	});
});
// </nowiki>