# $Id: Portfile 22311 2007-02-26 03:11:44Z takanori@macports.org $
PortSystem 1.0
name chasen
version 2.3.3
revision 3
set ipa ipadic-2.7.0
categories textproc japanese
maintainers takanori@macports.org
description Japanese morphological analysis system
long_description ChaSen is a tool to analyze a text according to a set of dictionaries that \
specifies grammer, part of speech of words, and so on. Because ChaSen itself \
has no dictionaries, we need explicit dictionaries to use. IPADIC is such a \
set of dictionaries that specifies Japanese language. \
\
This version of chasen treats encoding of input as EUC-JP, and its output is \
encoded as EUC-JP by default.
platforms darwin
homepage http://chasen.naist.jp/hiki/ChaSen/
master_sites http://chasen.naist.jp/stable/chasen/:chasen \
http://chasen.naist.jp/stable/ipadic/:ipadic
distfiles ${name}-${version}.tar.gz:chasen \
chasen-2.3.3.20030821.patch:chasen \
chasen-2.3.3.20030822.patch:chasen
extract.only ${name}-${version}.tar.gz
patchfiles configure.diff dartsdic.cpp.patch
checksums ${name}-${version}.tar.gz md5 629e90d9490bac95606c38c2d344cc5f \
sha1 711f5717ee284d26aa5616292f7aaffaef11d03a \
rmd160 2f2416f82fb0341023314a4672e442fed86207a9 \
chasen-2.3.3.20030821.patch md5 44b14a50c8269f982fc0d59cb580ecc5 \
sha1 c608e5e613bc8a9758436681846047099610bf4a \
rmd160 a7e581132abc3ae89eb109b25b917d704ca006bf \
chasen-2.3.3.20030822.patch md5 22466f08c3916b415d222508fdcc9bbe \
sha1 0728884dcdf6983a3e32774111e7996a43008722 \
rmd160 30c2ff84ad8b68271fcd7656d9972e7392a806f5 \
${ipa}.tar.gz md5 f36d315cae25b086a889b7090c674977 \
sha1 52784d8abb3361eafec83ab543be63cf087f9eb2 \
rmd160 5e37c750801df63439032be4954c4b63a44012cb
default_variants +ipadic
depends_lib port:darts port:libiconv
depends_build port:nkf
post-extract {
system "cd ${worksrcpath} && patch -p0 < ${distpath}/chasen-2.3.3.20030821.patch"
system "cd ${worksrcpath} && patch -p0 < ${distpath}/chasen-2.3.3.20030822.patch"
}
configure.args --libexecdir=${prefix}/lib --with-darts=${prefix}/include --with-libiconv=${prefix}
test.run yes
test.target check
post-destroot {
xinstall -m 755 -d ${destroot}${prefix}/share/doc/${name}/doc
xinstall -m 644 -W ${worksrcpath} AUTHORS COPYING ChangeLog INSTALL NEWS README ${destroot}${prefix}/share/doc/${name}
xinstall -m 644 ${worksrcpath}/doc/manual-j.pdf ${destroot}${prefix}/share/doc/${name}/doc
}
variant ipadic {
# Japanese dictionaries for ChaSen
distfiles-append ${ipa}.tar.gz:ipadic
extract.only-append ${ipa}.tar.gz
post-destroot {
foreach {enc c} {UTF-8 w EUC-JP e Shift_JIS s} {
file mkdir ${workpath}/${ipa}-${enc}
system "(cd ${workpath}/${ipa}; tar cf - .) | (cd ${workpath}/${ipa}-${enc}; tar xf -)"
reinplace "s|PACKAGE=ipadic|PACKAGE=ipadic-${enc}|" ${workpath}/${ipa}-${enc}/configure
reinplace "s|/makemat|/makemat -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
reinplace "s|/makeda|/makeda -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
foreach f [glob ${workpath}/${ipa}-${enc}/*.cha ${workpath}/${ipa}-${enc}/*.dic ${workpath}/${ipa}-${enc}/chasenrc.in] {
system "mv ${f} ${f}_ && ${prefix}/bin/nkf --ic=EUC-JP --oc=${enc} ${f}_ > ${f}"
}
system "cd ${workpath}/${ipa}-${enc} && ./configure --with-dicdir=${prefix}/lib/chasen/dic --with-mkchadic=${destroot}${prefix}/lib/chasen --with-chasenrc-path=${destroot}${prefix}/etc/chasenrc-${enc} && DYLD_LIBRARY_PATH=${destroot}${prefix}/lib make && make install DESTDIR=${destroot}"
}
system "ln -sf chasenrc-EUC-JP ${destroot}${prefix}/etc/chasenrc"
xinstall -m 755 -d ${destroot}${prefix}/share/doc/ipadic/doc
xinstall -m 644 -W ${workpath}/${ipa} AUTHORS COPYING ChangeLog INSTALL INSTALL-ja NEWS README ${destroot}${prefix}/share/doc/ipadic
xinstall -m 644 ${workpath}/${ipa}/doc/ipadic-ja.pdf ${destroot}${prefix}/share/doc/ipadic/doc
}
post-activate {
ui_msg "\nDue to historical reasons, the default encoding of ChaSen is set to EUC-JP."
ui_msg "If you'd like to handle text files written in UTF-8 or Shift_JIS, you may use -r and -i options.\n"
ui_msg " UTF-8) chasen -r ${prefix}/etc/chasenrc-UTF-8 -i w "
ui_msg " Shift_JIS) chasen -r ${prefix}/etc/chasenrc-Shift_JIS -i s \n"
}
}
livecheck.check regex
livecheck.url http://chasen.naist.jp/hiki/ChaSen/?%C3%E3%E4%A5%A4%CE%C7%DB%C9%DB
livecheck.regex ${name}-(\[0-9.a-z\]+)\\.tar