Changeset 86585


Ignore:
Timestamp:
Oct 29, 2011, 2:06:15 AM (8 years ago)
Author:
takanori@…
Message:

chasen:

  • Split ChaSen into chasen (meta), chasen-base, and chasen dictionaries.
  • Add support for NAIST JDICs.
  • From now on, UTF-8 based ChaSen (chasen-utf8) is also installed by default.
  • If you wish you can use darts-clone instead of darts. (+dartsclone)

Ticket #31553. Thanks to hum@!

Location:
trunk/dports/textproc
Files:
17 added
1 deleted
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/dports/textproc/chasen/Portfile

    r85955 r86585  
     1# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
    12# $Id$
    23
    3 PortSystem      1.0
     4PortSystem          1.0
    45
    5 name            chasen
    6 version         2.4.4
    7 set ipa         ipadic-2.7.0
    8 categories      textproc japanese
    9 maintainers     takanori openmaintainer
    10 description     Japanese morphological analysis system
    11 long_description \
    12                 ChaSen is a tool to analyze a text according to a set of dictionaries that \
    13                 specifies grammer, part of speech of words, and so on. Because ChaSen itself \
    14                 has no dictionaries, we need explicit dictionaries to use. IPADIC is such a \
    15                 set of dictionaries that specifies Japanese language. \
    16                 \
    17                 This version of chasen treats encoding of input as EUC-JP, and its output is \
    18                 encoded as EUC-JP by default.
    19 platforms       darwin
    20 license         BSD Restrictive/Distributable
    21 homepage        http://chasen-legacy.sourceforge.jp/
    22 #master_sites   sourceforge_jp:chasen-legacy/32224/:chasen \
    23 #               sourceforge_jp:ipadic/24435/:ipadic
    24 set sfjpid_chasen   32224
    25 set sfjpid_ipadic   24435
    26 master_sites    http://keihanna.dl.sourceforge.jp/chasen-legacy/${sfjpid_chasen}/:chasen \
    27                 http://osdn.dl.sourceforge.jp/chasen-legacy/${sfjpid_chasen}/:chasen \
    28                 http://keihanna.dl.sourceforge.jp/ipadic/${sfjpid_ipadic}/:ipadic \
    29                 http://osdn.dl.sourceforge.jp/ipadic/${sfjpid_ipadic}/:ipadic \
    30                 http://chasen.naist.jp/stable/ipadic/:ipadic
    31 distfiles       ${distname}.tar.gz:chasen
    32 patchfiles      patch-configure.diff
    33 checksums       ${name}-${version}.tar.gz md5 42b20d41a33e1f5e7a05cc56aaa548b0 \
    34                                           sha1 3328e6667d6b74955b6c4b1b49a7d97805e4de8c \
    35                                           rmd160 ceb4a7c064ba5bf0d356d6db0e64b644b88dbc54 \
    36                 ${ipa}.tar.gz md5 f36d315cae25b086a889b7090c674977 \
    37                               sha1 52784d8abb3361eafec83ab543be63cf087f9eb2 \
    38                               rmd160 5e37c750801df63439032be4954c4b63a44012cb
     6name                chasen
     7version             2.4.4
     8revision            1
     9categories          textproc japanese
     10platforms           darwin
     11maintainers         takanori hum openmaintainer
     12license             BSD
    3913
    40 depends_lib     port:darts \
    41                 port:libiconv
    42 depends_build   port:nkf
     14homepage            http://chasen-legacy.sourceforge.jp/
     15description         Japanese morphological analysis system
     16long_description    ChaSen is a tool to analyze a text according to a set of dictionaries that \
     17                    specifies grammar, part of speech of words, and so on. \
     18                    This port depends on ChaSen base analyzer and a Japanese dictionary.
    4319
    44 default_variants    +ipadic
     20distfiles
    4521
    46 post-extract {
    47     foreach f {tests/test-chasen.sh tests/test-dic.sh} {
    48         file attributes ${worksrcpath}/${f} -permissions 0755
     22depends_lib         port:chasen-base
     23
     24use_configure       no
     25supported_archs     noarch
     26
     27build {}
     28
     29variant eucjp description {Enable EUC-JP encoding} {}
     30
     31variant utf8 description {Add chasen-utf8 for UTF-8 texts} {
     32    post-destroot {
     33        set encoding     utf8
     34        set nkf_encoding w
     35        set script  ${destroot}${prefix}/bin/chasen-${encoding}
     36        xinstall -m 755 ${filespath}/chasen.in         ${script}
     37        reinplace "s|@NKF_ENCODING@|${nkf_encoding}|"  ${script}
     38        reinplace "s|@RC_FILE@|${prefix}/etc/chasen/chasenrc-${encoding}|" \
     39            ${script}
     40    }
     41    notes-append  "You can run 'chasen-utf8' for UTF-8 texts."
     42}
     43
     44variant sjis description {Add chasen-sjis for Shift-JIS texts} {
     45    post-destroot {
     46        set encoding     sjis
     47        set nkf_encoding s
     48        set script  ${destroot}${prefix}/bin/chasen-${encoding}
     49        xinstall -m 755 ${filespath}/chasen.in         ${script}
     50        reinplace "s|@NKF_ENCODING@|${nkf_encoding}|"  ${script}
     51        reinplace "s|@RC_FILE@|${prefix}/etc/chasen/chasenrc-${encoding}|" \
     52            ${script}
     53    }
     54    notes-append  "You can run 'chasen-sjis' for Shift-JIS texts."
     55}
     56
     57default_variants    +eucjp +utf8
     58
     59set dicname         ipadic
     60
     61variant ipadic conflicts naistjdic description {Use ipadic} {
     62    set dicname         ipadic
     63    if {[variant_isset eucjp]} {
     64        depends_lib-append  port:chasen-${dicname}
     65    }
     66    foreach encoding {utf8 sjis} {
     67        if {[variant_isset ${encoding}]} {
     68            depends_lib-append  port:chasen-${dicname}-${encoding}
     69        }
    4970    }
    5071}
    5172
    52 configure.args  --libexecdir=${prefix}/lib --with-darts=${prefix}/include --with-libiconv=${prefix}
    53 
    54 use_parallel_build  yes
    55 
    56 test.run        yes
    57 test.target     check
    58 
    59 post-destroot {
    60     xinstall -m 755 -d ${destroot}${prefix}/share/doc/${name}/doc
    61 
    62     xinstall -m 644 -W ${worksrcpath} AUTHORS COPYING ChangeLog INSTALL NEWS README ${destroot}${prefix}/share/doc/${name}
    63     xinstall -m 644 ${worksrcpath}/doc/manual-j.pdf ${destroot}${prefix}/share/doc/${name}/doc
     73variant naistjdic conflicts ipadic description {Use naist-jdic} {
     74    set dicname         naist-jdic
     75    if {[variant_isset eucjp]} {
     76        depends_lib-append  port:chasen-${dicname}
     77    }
     78    foreach encoding {utf8 sjis} {
     79        if {[variant_isset ${encoding}]} {
     80            depends_lib-append  port:chasen-${dicname}-${encoding}
     81        }
     82    }
    6483}
    6584
    66 variant ipadic description {Install ipadic} {
    67     # Japanese dictionaries for ChaSen
    68     distfiles-append ${ipa}.tar.gz:ipadic
    69     post-destroot {
    70         foreach {enc c} {UTF-8 w EUC-JP e Shift_JIS s} {
    71             file mkdir ${workpath}/${ipa}-${enc}
    72             system "(cd ${workpath}/${ipa}; tar cf - .) | (cd ${workpath}/${ipa}-${enc}; tar xf -)"
    73             reinplace "s|PACKAGE=ipadic|PACKAGE=ipadic-${enc}|" ${workpath}/${ipa}-${enc}/configure
    74             reinplace "s|/makemat|/makemat -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
    75             reinplace "s|/makeda|/makeda -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
    76             foreach f [glob ${workpath}/${ipa}-${enc}/*.cha ${workpath}/${ipa}-${enc}/*.dic ${workpath}/${ipa}-${enc}/chasenrc.in] {
    77                 system "mv ${f} ${f}_ && ${prefix}/bin/nkf --ic=EUC-JP --oc=${enc} ${f}_ > ${f}"
    78             }
    79             system "cd ${workpath}/${ipa}-${enc} && ./configure --with-dicdir=${prefix}/lib/chasen/dic --with-mkchadic=${destroot}${prefix}/lib/chasen --with-chasenrc-path=${destroot}${prefix}/etc/chasenrc-${enc} && DYLD_LIBRARY_PATH=${destroot}${prefix}/lib make && make install DESTDIR=${destroot}"
    80         }
    81         ln -s chasenrc-EUC-JP ${destroot}${prefix}/etc/chasenrc
    82 
    83         xinstall -m 755 -d ${destroot}${prefix}/share/doc/chasen-ipadic/doc
    84 
    85         xinstall -m 644 -W ${workpath}/${ipa} AUTHORS COPYING ChangeLog INSTALL INSTALL-ja NEWS README ${destroot}${prefix}/share/doc/chasen-ipadic
    86         xinstall -m 644 ${workpath}/${ipa}/doc/ipadic-ja.pdf ${destroot}${prefix}/share/doc/chasen-ipadic/doc
    87     }
    88 
    89     notes-append "
    90         Due to historical reasons, the default encoding of ChaSen is set to EUC-JP.\
    91         If you'd like to handle text files written in UTF-8 or Shift_JIS, you may use\
    92         -r and -i options.
    93        
    94         UTF-8)     chasen -r ${prefix}/etc/chasenrc-UTF-8 -i w <input>
    95         Shift_JIS) chasen -r ${prefix}/etc/chasenrc-Shift_JIS -i s <input>
    96         "
     85if {![variant_isset naistjdic]} {
     86    default_variants    +ipadic
    9787}
    9888
    99 livecheck.type  regex
    100 livecheck.url   http://sourceforge.jp/projects/chasen-legacy/releases/rss
    101 livecheck.regex <title>${name}.* - ${name}-(\[0-9.a-z\-\]+)</title>
     89destroot {
     90    set dest_chasen ${destroot}${prefix}/etc/chasen
     91    file mkdir ${dest_chasen}
     92    ln -s chasenrc-${dicname} ${dest_chasen}/chasenrc
     93    foreach encoding {utf8 sjis} {
     94        if {[variant_isset ${encoding}]} {
     95            ln -s chasenrc-${dicname}-${encoding} ${dest_chasen}/chasenrc-${encoding}
     96        }
     97    }
     98}
     99
     100livecheck.type  none
Note: See TracChangeset for help on using the changeset viewer.