source: trunk/dports/textproc/mecab-ipadic-neologd/files/patch-test.sh.diff @ 133890

Last change on this file since 133890 was 133890, checked in by hum@…, 5 years ago

New port: mecab-ipadic-neologd @ 20150315 - Neologism dictionary for MeCab

File size: 4.0 KB
  • libexec/test-mecab-ipadic-neologd.sh

    old new  
    2121
    2222echo "$ECHO_PREFIX Start.."
    2323
    24 echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
    25 ${BASEDIR}/../misc/git-set-file-times
     24#echo "$ECHO_PREFIX Replace timestamp from 'git clone' date to 'git commit' date"
     25#${BASEDIR}/../misc/git-set-file-times
    2626
    2727YMD=`ls -c \`find ${BASEDIR}/../seed/mecab-user-dict-seed.*.csv.xz\` | head -1 | egrep -o '[0-9]{8}' | tail -1`
    2828if [ ! -e ${BASEDIR}/../build/mecab-ipadic-2.7.0-20070801-neologd-${YMD} ]; then
     
    3636
    3737echo "$ECHO_PREFIX Get buzz phrases"
    3838
    39 curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "/tmp/realtime_buzz.html"
    40 sed -i -e "/\n/d" /tmp/realtime_buzz.html
    41 cat /tmp/realtime_buzz.html | perl -ne '$l = $_;  if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > /tmp/buzz_phrase
     39mkdir -p tmp
     40curl http://searchranking.yahoo.co.jp/realtime_buzz/ -o "./tmp/realtime_buzz.html"
     41sed -i -e "/\n/d" ./tmp/realtime_buzz.html
     42cat ./tmp/realtime_buzz.html | perl -ne '$l = $_;  if ($l =~ m|<h3><a href="http://rdsig\.yahoo\.co\.jp.+?">(.+)</a></h3>|g){ print $1."\n";}' > ./tmp/buzz_phrase
    4243
    43 PHRASE_FILE=/tmp/buzz_phrase
     44PHRASE_FILE=./tmp/buzz_phrase
    4445if [ ! -s ${PHRASE_FILE} ]; then
    4546   PHRASE_FILE=""#${BASEDIR}/../misc/buzz_phrase_201402181610
    4647fi
    4748
    4849echo "$ECHO_PREFIX Get difference between default system dictionary and mecab-ipadic-neologd"
    4950
    50 cat /tmp/buzz_phrase| mecab -Owakati > /tmp/buzz_phrase_tokenized_using_defdic
    51 cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > /tmp/buzz_phrase_tokenized_using_neologismdic
    52 /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
     51cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 > ./tmp/buzz_phrase_tokenized_using_defdic
     52cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} > ./tmp/buzz_phrase_tokenized_using_neologismdic
     53/usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
    5354
    54 if [ -s /tmp/buzz_phrase_tokenized_diff ]; then
     55if [ -s ./tmp/buzz_phrase_tokenized_diff ]; then
    5556    echo "$ECHO_PREFIX Tokenize phrase using default system dictionary"
    56     echo "default system dictonary" > /tmp/buzz_phrase_tokenized_using_defdic
    57     cat /tmp/buzz_phrase| mecab -Owakati >> /tmp/buzz_phrase_tokenized_using_defdic
     57    echo "default system dictonary" > ./tmp/buzz_phrase_tokenized_using_defdic
     58    cat ./tmp/buzz_phrase| mecab -Owakati -d @PREFIX@/lib/mecab/dic/ipadic-utf8 >> ./tmp/buzz_phrase_tokenized_using_defdic
    5859
    5960    echo "$ECHO_PREFIX Tokenize phrase using mecab-ipadic-neologd"
    60     echo "mecab-ipadic-neologd" > /tmp/buzz_phrase_tokenized_using_neologismdic
    61     cat /tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> /tmp/buzz_phrase_tokenized_using_neologismdic
     61    echo "mecab-ipadic-neologd" > ./tmp/buzz_phrase_tokenized_using_neologismdic
     62    cat ./tmp/buzz_phrase| mecab -Owakati -d ${MECAB_DIC_DIR} >> ./tmp/buzz_phrase_tokenized_using_neologismdic
    6263
    6364    echo "$ECHO_PREFIX Get result of diff"
    64     /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines /tmp/buzz_phrase_tokenized_using_defdic /tmp/buzz_phrase_tokenized_using_neologismdic > /tmp/buzz_phrase_tokenized_diff
     65    /usr/bin/diff -y -W60 --side-by-side --suppress-common-lines ./tmp/buzz_phrase_tokenized_using_defdic ./tmp/buzz_phrase_tokenized_using_neologismdic > ./tmp/buzz_phrase_tokenized_diff
    6566
    6667    echo "$ECHO_PREFIX Please check difference between default system dictionary and mecab-ipadic-neologd"
    6768    echo ""
    68     cat /tmp/buzz_phrase_tokenized_diff
     69    cat ./tmp/buzz_phrase_tokenized_diff
    6970    echo ""
    7071else
    7172    echo "$ECHO_PREFIX Something wrong. You shouldn't install mecab-ipadic-neologd yet."
Note: See TracBrowser for help on using the repository browser.