Sharkey/src/mfm/from-html.ts

import * as parse5 from 'parse5';
import treeAdapter = require('parse5/lib/tree-adapters/default');
import { URL } from 'url';

const urlRegex     = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;

export function fromHtml(html: string, hashtagNames?: string[]): string {
	const dom = parse5.parseFragment(html);

	let text = '';

	for (const n of dom.childNodes) {
		analyze(n);
	}

	return text.trim();

	function getText(node: parse5.Node): string {
		if (treeAdapter.isTextNode(node)) return node.value;
		if (!treeAdapter.isElementNode(node)) return '';

		if (node.childNodes) {
			return node.childNodes.map(n => getText(n)).join('');
		}

		return '';
	}

	function analyze(node: parse5.Node) {
		if (treeAdapter.isTextNode(node)) {
			text += node.value;
			return;
		}

		// Skip comment or document type node
		if (!treeAdapter.isElementNode(node)) return;

		switch (node.nodeName) {
			case 'br':
				text += '\n';
				break;

			case 'a':
				const txt = getText(node);
				const rel = node.attrs.find(x => x.name === 'rel');
				const href = node.attrs.find(x => x.name === 'href');

				// ハッシュタグ
				if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) {
					text += txt;
				// メンション
				} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
					const part = txt.split('@');

					if (part.length === 2 && href) {
						//#region ホスト名部分が省略されているので復元する
						const acct = `${txt}@${(new URL(href.value)).hostname}`;
						text += acct;
						//#endregion
					} else if (part.length === 3) {
						text += txt;
					}
				// その他
				} else {
					const generateLink = () => {
						if (!href && !txt) {
							return '';
						}
						if (!href) {
							return txt;
						}
						if (!txt || txt === href.value) {	// #6383: Missing text node
							if (href.value.match(urlRegexFull)) {
								return href.value;
							} else {
								return `<${href.value}>`;
							}
						}
						if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
							return `[${txt}](<${href.value}>)`;	// #6846
						} else {
							return `[${txt}](${href.value})`;
						}
					};

					text += generateLink();
				}
				break;

			case 'p':
				text += '\n\n';
				if (node.childNodes) {
					for (const n of node.childNodes) {
						analyze(n);
					}
				}
				break;

			default:
				if (node.childNodes) {
					for (const n of node.childNodes) {
						analyze(n);
					}
				}
				break;
		}
	}
}
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`import * as parse5 from 'parse5';`
			`import treeAdapter = require('parse5/lib/tree-adapters/default');`
			`import { URL } from 'url';`
Use mfm-js for MFM parsing (#7415) * wip * Update mfm.ts * wip * update mfmjs * refactor * nanka * Update mfm.ts * Update to-html.ts * Update to-html.ts * wip * fix test * fix test 2021-04-01 18:36:11 -07:00
			`const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;`
			`const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
APメンションはaudienceじゃなくてtagを参照するなど (#6128) * APメンションはaudienceじゃなくてtagを参照するなど * AP/tag/Mentionではurlじゃなくてuriを提示する * createPersonでaliasが入力された場合に対応 * AP HTMLパースでMention/Hashtag判定にtagを使うように * fix * indent * use hashtag name * fix * URLエンコード不要だったら<>を使わないの条件が消えたたのを修正 2020-04-03 06:51:38 -07:00			`export function fromHtml(html: string, hashtagNames?: string[]): string {`
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`const dom = parse5.parseFragment(html);`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
			`let text = '';`

Use for-of instead of forEach (#3583) Co-authored-by: syuilo <syuilotan@yahoo.co.jp> Co-authored-by: Acid Chicken (硫酸鶏) <root@acid-chicken.com> 2018-12-11 03:36:55 -08:00			`for (const n of dom.childNodes) {`
			`analyze(n);`
			`}`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
			`return text.trim();`

Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`function getText(node: parse5.Node): string {`
			`if (treeAdapter.isTextNode(node)) return node.value;`
			`if (!treeAdapter.isElementNode(node)) return '';`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
			`if (node.childNodes) {`
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`return node.childNodes.map(n => getText(n)).join('');`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`}`

			`return '';`
			`}`

Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`function analyze(node: parse5.Node) {`
			`if (treeAdapter.isTextNode(node)) {`
			`text += node.value;`
			`return;`
			`}`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`// Skip comment or document type node`
			`if (!treeAdapter.isElementNode(node)) return;`

			`switch (node.nodeName) {`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`case 'br':`
			`text += '\n';`
			`break;`

			`case 'a':`
			`const txt = getText(node);`
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`const rel = node.attrs.find(x => x.name === 'rel');`
			`const href = node.attrs.find(x => x.name === 'href');`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00
APメンションはaudienceじゃなくてtagを参照するなど (#6128) * APメンションはaudienceじゃなくてtagを参照するなど * AP/tag/Mentionではurlじゃなくてuriを提示する * createPersonでaliasが入力された場合に対応 * AP HTMLパースでMention/Hashtag判定にtagを使うように * fix * indent * use hashtag name * fix * URLエンコード不要だったら<>を使わないの条件が消えたたのを修正 2020-04-03 06:51:38 -07:00			`// ハッシュタグ`
			`if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) {`
			`text += txt;`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`// メンション`
Fix htmlToMfm (#3600) 2018-12-11 18:47:07 -08:00			`} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`const part = txt.split('@');`

Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`if (part.length === 2 && href) {`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`//#region ホスト名部分が省略されているので復元する`
Use string interpolation 2018-09-01 07:12:51 -07:00			const acct = `${txt}@${(new URL(href.value)).hostname}`;
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`text += acct;`
			`//#endregion`
refactor: Use === 2020-04-03 16:46:54 -07:00			`} else if (part.length === 3) {`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`text += txt;`
			`}`
fix #2315 (#2339) * improve MFM to html * improve html to MFM * missing semicolon * missing semicolon * fix html to MFM タグのリンクは解除するように * fix bug * misssing semicolon * Update html-to-mfm.ts * Update html-to-mfm.ts 2018-09-01 06:45:27 -07:00			`// その他`
			`} else {`
Fix HTML to MFM (#7150) * Fix type * Fix HTML to MFM 2021-02-06 04:44:46 -08:00			`const generateLink = () => {`
			`if (!href && !txt) {`
			`return '';`
			`}`
			`if (!href) {`
			`return txt;`
			`}`
			`if (!txt \|\| txt === href.value) { // #6383: Missing text node`
			`if (href.value.match(urlRegexFull)) {`
			`return href.value;`
			`} else {`
			return `<${href.value}>`;
			`}`
			`}`
			`if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {`
			return `[${txt}](<${href.value}>)`; // #6846
			`} else {`
			return `[${txt}](${href.value})`;
			`}`
			`};`

			`text += generateLink();`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`}`
			`break;`

			`case 'p':`
			`text += '\n\n';`
			`if (node.childNodes) {`
Use for-of instead of forEach (#3583) Co-authored-by: syuilo <syuilotan@yahoo.co.jp> Co-authored-by: Acid Chicken (硫酸鶏) <root@acid-chicken.com> 2018-12-11 03:36:55 -08:00			`for (const n of node.childNodes) {`
			`analyze(n);`
			`}`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`}`
			`break;`

			`default:`
			`if (node.childNodes) {`
Use for-of instead of forEach (#3583) Co-authored-by: syuilo <syuilotan@yahoo.co.jp> Co-authored-by: Acid Chicken (硫酸鶏) <root@acid-chicken.com> 2018-12-11 03:36:55 -08:00			`for (const n of node.childNodes) {`
			`analyze(n);`
			`}`
リモートユーザーのHTMLで表現されたプロフィールをMFMに変換するように 2018-06-20 09:21:57 -07:00			`}`
			`break;`
			`}`
			`}`
			`}`