Skip to content

Commit 4c5e442

Browse files
rimruldscho
authored andcommitted
Convert pdf, doc and docx files to text by default
Converting PDF and Word files to text before diffing them allows an easier comparison between changed files. This reintroduces some functionality of Git for Windows 1.x. The pdftotext tool exists both in the xpdf package and the poppler package; we opted to include the xpdf one because requires us to add only its dependency libstdc++-6.dll in addition. Poppler's version would require 23 additional dlls. In Johannes' tests, this change increased the portable Git by a scant 701kB -- which he deems worth the benefit. This fixes git-for-windows/git#355 [jes: re-wrapped commit message, avoided double list entry for astextplain (in git-extra) and unzip.exe (dependency of docx2txt) that 7-Zip would complain about] Signed-off-by: Matthias Aßhauer <[email protected]> Signed-off-by: Johannes Schindelin <[email protected]>
1 parent d81733f commit 4c5e442

File tree

5 files changed

+57
-3
lines changed

5 files changed

+57
-3
lines changed

git-extra/PKGBUILD

+3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ pkgver() {
1818
build() {
1919
test $startdir/$pkgname.install -nt $startdir/$pkgname.install.in ||
2020
sed -e "/^@@GITCONFIG@@$/r $startdir/gitconfig" -e "/^@@GITCONFIG@@$/d" \
21+
-e "/^@@GITATTRIBUTES@@$/r $startdir/gitattributes" \
22+
-e "/^@@GITATTRIBUTES@@$/d" \
2123
<$startdir/$pkgname.install.in >$startdir/$pkgname.install
2224

2325
gcc -o create-shortcut.exe $startdir/create-shortcut.c -luuid -lole32
@@ -50,4 +52,5 @@ package() {
5052
install -m755 $startdir/bash_profile.sh $pkgdir/etc/profile.d
5153
install -m644 $startdir/msys2-32.ico $pkgdir/usr/share/git
5254
install -m644 $startdir/99-post-install-cleanup.post $pkgdir/etc/post-install
55+
install -m755 $startdir/astextplain $pkgdir/$mingwdir/bin
5356
}

git-extra/astextplain

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/sh -e
2+
# minimalistic replacement for `run-mailcap --action=cat <file>`
3+
4+
if test "$#" != 1 ; then
5+
echo "Usage: astextplain <file>" 1>&2
6+
exit 1
7+
fi
8+
9+
# XXX output encoding (UTF-8) hardcoded
10+
case "$1" in
11+
*.doc | *.DOC | *.dot | *.DOT)
12+
antiword -m UTF-8 "$1" | sed "s/\^M$//" || cat "$1"
13+
;;
14+
*.docx | *.DOCX)
15+
docx2txt.pl "$1" -
16+
;;
17+
*.pdf | *.PDF)
18+
pdftotext -layout "$1" -enc UTF-8 - | sed "s/(\^M$)|(^\^L)//"
19+
;;
20+
# TODO add rtf support
21+
*.rtf | *.RTF)
22+
cat "$1"
23+
;;
24+
*)
25+
echo "E: unsupported filetype $1" 1>&2
26+
exit 1
27+
;;
28+
esac
29+
30+
exit 0

git-extra/git-extra.install.in

+6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ post_install () {
88
cat > /$dir/etc/gitconfig <<\GITCONFIG
99
@@GITCONFIG@@
1010
GITCONFIG
11+
test ! -d /$dir ||
12+
test -f /$dir/etc/gitattributes ||
13+
cat > /$dir/etc/gitattributes <<\GITATTRIBUTES
14+
@@GITATTRIBUTES@@
15+
16+
GITATTRIBUTES
1117
done
1218

1319
grep -q '^db_home: env windows' /etc/nsswitch.conf ||

git-extra/gitattributes

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
*.doc diff=astextplain
2+
*.DOC diff=astextplain
3+
*.docx diff=astextplain
4+
*.DOCX diff=astextplain
5+
*.dot diff=astextplain
6+
*.DOT diff=astextplain
7+
*.pdf diff=astextplain
8+
*.PDF diff=astextplain
9+
*.rtf diff=astextplain
10+
*.RTF diff=astextplain

make-file-list.sh

+8-3
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ pacman_list () {
2828
}
2929

3030
# Packages that have been added after Git SDK 1.0.0 was released...
31-
pacman -S --needed --noconfirm mingw-w64-$ARCH-connect git-flow >&2 ||
31+
pacman -S --needed --noconfirm mingw-w64-$ARCH-connect git-flow unzip docx2txt \
32+
mingw-w64-$ARCH-antiword mingw-w64-$ARCH-xpdf >&2 ||
3233
die "Could not install required packages"
3334

3435
pacman_list mingw-w64-$ARCH-git mingw-w64-$ARCH-git-doc-html \
3536
git-extra ncurses mintty vim openssh winpty \
3637
sed awk less grep gnupg tar findutils coreutils diffutils patch \
3738
dos2unix which subversion mingw-w64-$ARCH-tk \
38-
mingw-w64-$ARCH-connect git-flow "$@" |
39+
mingw-w64-$ARCH-connect git-flow docx2txt mingw-w64-$ARCH-antiword "$@" |
3940
grep -v -e '\.[acho]$' -e '\.l[ao]$' -e '/aclocal/' \
4041
-e '/man/' -e '/pkgconfig/' -e '/emacs/' \
4142
-e '^/usr/lib/python' -e '^/usr/lib/ruby' \
@@ -71,7 +72,8 @@ grep --perl-regexp -v -e '^/usr/(lib|share)/terminfo/(?!.*/(cygwin|dumb|xterm.*)
7172
sed 's/^\///'
7273

7374
test -z "$PACKAGE_VERSIONS_FILE" ||
74-
pacman -Q filesystem dash rebase util-linux >>"$PACKAGE_VERSIONS_FILE"
75+
pacman -Q filesystem dash rebase util-linux unzip \
76+
mingw-w64-$ARCH-xpdf >>"$PACKAGE_VERSIONS_FILE"
7577

7678
cat <<EOF
7779
etc/profile
@@ -89,4 +91,7 @@ usr/bin/dash.exe
8991
usr/bin/rebase.exe
9092
usr/bin/rebaseall
9193
usr/bin/getopt.exe
94+
mingw$BITNESS/etc/gitattributes
95+
mingw$BITNESS/bin/pdftotext.exe
96+
mingw$BITNESS/bin/libstdc++-6.dll
9297
EOF

0 commit comments

Comments
 (0)