# $Id: Portfile 36030 2008-04-15 01:37:00Z takanori@macports.org $
PortSystem 1.0
name chasen
version 2.4.2
set ipa ipadic-2.7.0
categories textproc japanese
maintainers takanori
description Japanese morphological analysis system
long_description \
ChaSen is a tool to analyze a text according to a set of dictionaries that \
specifies grammer, part of speech of words, and so on. Because ChaSen itself \
has no dictionaries, we need explicit dictionaries to use. IPADIC is such a \
set of dictionaries that specifies Japanese language. \
\
This version of chasen treats encoding of input as EUC-JP, and its output is \
encoded as EUC-JP by default.
platforms darwin
homepage http://chasen-legacy.sourceforge.jp/
#master_sites sourceforge_jp:chasen-legacy/26441/:chasen \
# sourceforge_jp:ipadic/24435/:ipadic
set sfjpid_chasen 26441
set sfjpid_ipadic 24435
master_sites http://keihanna.dl.sourceforge.jp/chasen-legacy/${sfjpid_chasen}/:chasen \
http://osdn.dl.sourceforge.jp/chasen-legacy/${sfjpid_chasen}/:chasen \
http://keihanna.dl.sourceforge.jp/ipadic/${sfjpid_ipadic}/:ipadic \
http://osdn.dl.sourceforge.jp/ipadic/${sfjpid_ipadic}/:ipadic \
http://chasen.naist.jp/stable/ipadic/:ipadic
distfiles ${distname}.tar.gz:chasen
patchfiles patch-configure.diff patch-lib_dartsdic.cpp.diff
checksums ${name}-${version}.tar.gz md5 5e8084d4a729d05a53c5e9575e79ab01 \
sha1 0aeb0c6e4e1562cf0398f20f6505d1ae4eab4130 \
rmd160 50b8bf67c9f7587b0ca61a3db6e4c118d58b9adc \
${ipa}.tar.gz md5 f36d315cae25b086a889b7090c674977 \
sha1 52784d8abb3361eafec83ab543be63cf087f9eb2 \
rmd160 5e37c750801df63439032be4954c4b63a44012cb
depends_lib port:darts \
port:libiconv
depends_build port:nkf
default_variants +ipadic
post-extract {
foreach f {tests/test-chasen.sh tests/test-dic.sh} {
file attributes ${worksrcpath}/${f} -permissions 0755
}
}
configure.args --libexecdir=${prefix}/lib --with-darts=${prefix}/include --with-libiconv=${prefix}
test.run yes
test.target check
post-destroot {
xinstall -m 755 -d ${destroot}${prefix}/share/doc/${name}/doc
xinstall -m 644 -W ${worksrcpath} AUTHORS COPYING ChangeLog INSTALL NEWS README ${destroot}${prefix}/share/doc/${name}
xinstall -m 644 ${worksrcpath}/doc/manual-j.pdf ${destroot}${prefix}/share/doc/${name}/doc
}
variant ipadic description {Install ipadic} {
# Japanese dictionaries for ChaSen
distfiles-append ${ipa}.tar.gz:ipadic
post-destroot {
foreach {enc c} {UTF-8 w EUC-JP e Shift_JIS s} {
file mkdir ${workpath}/${ipa}-${enc}
system "(cd ${workpath}/${ipa}; tar cf - .) | (cd ${workpath}/${ipa}-${enc}; tar xf -)"
reinplace "s|PACKAGE=ipadic|PACKAGE=ipadic-${enc}|" ${workpath}/${ipa}-${enc}/configure
reinplace "s|/makemat|/makemat -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
reinplace "s|/makeda|/makeda -i ${c}|" ${workpath}/${ipa}-${enc}/Makefile.in
foreach f [glob ${workpath}/${ipa}-${enc}/*.cha ${workpath}/${ipa}-${enc}/*.dic ${workpath}/${ipa}-${enc}/chasenrc.in] {
system "mv ${f} ${f}_ && ${prefix}/bin/nkf --ic=EUC-JP --oc=${enc} ${f}_ > ${f}"
}
system "cd ${workpath}/${ipa}-${enc} && ./configure --with-dicdir=${prefix}/lib/chasen/dic --with-mkchadic=${destroot}${prefix}/lib/chasen --with-chasenrc-path=${destroot}${prefix}/etc/chasenrc-${enc} && DYLD_LIBRARY_PATH=${destroot}${prefix}/lib make && make install DESTDIR=${destroot}"
}
ln -s chasenrc-EUC-JP ${destroot}${prefix}/etc/chasenrc
xinstall -m 755 -d ${destroot}${prefix}/share/doc/chasen-ipadic/doc
xinstall -m 644 -W ${workpath}/${ipa} AUTHORS COPYING ChangeLog INSTALL INSTALL-ja NEWS README ${destroot}${prefix}/share/doc/chasen-ipadic
xinstall -m 644 ${workpath}/${ipa}/doc/ipadic-ja.pdf ${destroot}${prefix}/share/doc/chasen-ipadic/doc
}
post-activate {
ui_msg "\nDue to historical reasons, the default encoding of ChaSen is set to EUC-JP."
ui_msg "If you'd like to handle text files written in UTF-8 or Shift_JIS, you may use -r and -i options.\n"
ui_msg " UTF-8) chasen -r ${prefix}/etc/chasenrc-UTF-8 -i w "
ui_msg " Shift_JIS) chasen -r ${prefix}/etc/chasenrc-Shift_JIS -i s \n"
}
}
livecheck.check regex
livecheck.url http://chasen-legacy.sourceforge.jp/
livecheck.regex ${name}-(\[0-9.a-z\-\]+)