ヘボン式変換 - javascript
2014/01/12
javascript
ヘボン式
ひらがなをヘボン式ローマ字に変換します。
はじめにjavaでの実装してたので、それをjavascript版に置き換えてみました。
実装については、いろいろ参考にしつつ、基本、前回のjava版を置き換える感じやってます。
以下、実装ソース
/*
ご自由に使ってください
deiji.jp
*/
function installHebon() {
this.map = [];
var sss = [ ["あ","A" ], ["い","I" ], ["う","U" ],
["え","E" ], ["お","O" ], ["か","KA" ],
["き","KI" ], ["く","KU" ], ["け","KE" ],
["こ","KO" ], ["さ","SA" ], ["し","SHI" ],
["す","SU" ], ["せ","SE" ], ["そ","SO" ],
["た","TA" ], ["ち","CHI" ], ["つ","TSU" ],
["て","TE" ], ["と","TO" ], ["な","NA" ],
["に","NI" ], ["ぬ","NU" ], ["ね","NE" ],
["の","NO" ], ["は","HA" ], ["ひ","HI" ],
["ふ","FU" ], ["へ","HE" ], ["ほ","HO" ],
["ま","MA" ], ["み","MI" ], ["む","MU" ],
["め","ME" ], ["も","MO" ], ["や","YA" ],
["ゆ","YU" ], ["よ","YO" ], ["ら","RA" ],
["り","RI" ], ["る","RU" ], ["れ","RE" ],
["ろ","RO" ], ["わ","WA" ], ["ゐ","I" ],
["う","U" ], ["ゑ","E" ], ["を","O" ],
["ん","N" ], ["が","GA" ], ["ぎ","GI" ],
["ぐ","GU" ], ["げ","GE" ], ["ご","GO" ],
["ざ","ZA" ], ["じ","JI" ], ["ず","ZU" ],
["ぜ","ZE" ], ["ぞ","ZO" ], ["だ","DA" ],
["ぢ","JI" ], ["づ","ZU" ], ["で","DE" ],
["ど","DO" ], ["ば","BA" ], ["び","BI" ],
["ぶ","BU" ], ["べ","BE" ], ["ぼ","BO" ],
["ぱ","PA" ], ["ぴ","PI" ], ["ぷ","PU" ],
["ぺ","PE" ], ["ぽ","PO" ], ["きゃ","KYA" ],
["きゅ","KYU" ], ["きょ","KYO" ], ["しゃ","SHA" ],
["しゅ","SHU" ], ["しょ","SHO" ], ["ちゃ","CHA" ],
["ちゅ","CHU" ], ["ちょ","CHO" ], ["にゃ","NYA" ],
["にゅ","NYU" ], ["にょ","NYO" ], ["ひゃ","HYA" ],
["ひゅ","HYU" ], ["ひょ","HYO" ], ["みゃ","MYA" ],
["みゅ","MYU" ], ["みょ","MYO" ], ["りゃ","RYA" ],
["りゅ","RYU" ], ["りょ","RYO" ], ["ぎゃ","GYA" ],
["ぎゅ","GYU" ], ["ぎょ","GYO" ], ["じゃ","JA" ],
["じゅ","JU" ], ["じょ","JO" ], ["びゃ","BYA" ],
["びゅ","BYU" ], ["びょ","BYO" ], ["ぴゃ","PYA" ],
["ぴゅ","PYU" ], ["ぴょ","PYO" ],
/* この二文字は特別あつかい */
["ー",""],["っ","っ"]];
for(i = 0; i<sss.length ;i++) {
this.map[sss[i][0]] = sss[i][1];
}
function toHebon(text) {
hebon = ""
var pos = 0;
var len = text.length;
var lastConvert = null;
loop: while (pos < len) {
c ="";
/*
* mapで変換
*/
convert: {
c2: if (pos + 2 <= len) {
c = map[text.substring(pos, pos + 2)];
if (!(typeof c === "undefined")) {
pos += 2;
break convert;
}
}
c1: if (pos < len) {
c = map[text.substring(pos, pos + 1)];
if (!(typeof c === "undefined")) {
pos += 1;
break convert;
}
}
c_nothing: {
c = text.substring(pos, pos + 1);
pos += 1;
}
} // convert
/*
* その他の規則での置き換え
*/
convert_another_rule: {
isLastConvert_Xtu ="っ" == (lastConvert);
isCH = c.startsWith("CH");
isLastConvert_N ="N" == lastConvert;
isBMP = c.match(/[B|M|P].*/);
/* ひつと前の変換が[っ]の場合であとにCHが続く場合 */
if (isLastConvert_Xtu && isCH) {
hebon += "T";
break convert_another_rule;
}
/* ひつと前の変換が[っ]の場合 */
if (isLastConvert_Xtu && !isCH) {
hebon += c.substring(0, 1);
break convert_another_rule;
}
/* ひとつ前の変換が「N」の場合かつ 「B」「M」「P」が続く場合 */
if (isLastConvert_N && isBMP) {
hebon += "M";
break convert_another_rule;
}
/* ひとつ前の変換が「N」の場合かつ 「B」「M」「P」が続かない場合 */
if (isLastConvert_N && !isBMP) {
hebon += "N";
break convert_another_rule;
}
} // convert_another_rule
/*
* 最後に変換したものと+変換したもので母音がつながる場合は、追加しない
*/
append: {
/* 母音がつながる判定 */
isBoon = (lastConvert + c)
.match(/.*(AA|II|UU|EE|OO|OU)$/);
/* 最後の文字 */
isLastLetter = !(pos < len);
/* 「っ」もしくは「ん」 */
isXtuOrN = c.match(/っ/) || c == "N";
/* 母音としてつながっておらず、かつ、 */
if (!isBoon && (!isXtuOrN || isLastLetter)) {
hebon += c;
}
/* 母音がつながったら 最後に変換したものとして扱わない */
if (isBoon) {
lastConvert ="";
continue;
}
} // end append
lastConvert = c;
} // end loop
return hebon;
}
if (typeof String.prototype.startsWith != 'function') {
// see below for better implementation!
String.prototype.startsWith = function (str){
return this.indexOf(str) == 0;
};
}
String.prototype.toHebon = function() {
return toHebon(this);
}
}
installHebon() ;
: