From dc43032b7568eb4a389105675ed3d153c1a63206 Mon Sep 17 00:00:00 2001 From: Javran Cheng Date: Wed, 27 May 2020 12:06:25 -0700 Subject: [PATCH 01/10] Migrating scripts to Python 3. Python 2 is already sunset. This patch upgrades existing scripts to Python 3. --- .gitignore | 3 +++ benchmarks/python/cut.py | 2 +- benchmarks/python/multilang.py | 13 ++++++------- benchmarks/python/sort.py | 2 +- benchmarks/python/strip_tags.py | 2 +- benchmarks/python/utils.py | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 3498bc01..44e8ce9b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ /GNUmakefile /.ghc.environment.* /cabal.project.local + +# Test data repo ignored. Please see instruction in tests-and-benchmarks.markdown +/tests/text-test-data/ diff --git a/benchmarks/python/cut.py b/benchmarks/python/cut.py index fbfc7b7a..da06f3cf 100644 --- a/benchmarks/python/cut.py +++ b/benchmarks/python/cut.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import utils, sys, codecs diff --git a/benchmarks/python/multilang.py b/benchmarks/python/multilang.py index f2868545..88d186f1 100755 --- a/benchmarks/python/multilang.py +++ b/benchmarks/python/multilang.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import math import sys @@ -12,23 +12,22 @@ def timer(f, count=100): a = 1e300 def g(): return - for i in xrange(3): + for i in range(3): start = time.time() - for j in xrange(count): + for j in range(count): g() a = min(a, (time.time() - start) / count) b = 1e300 - for i in xrange(3): + for i in range(3): start = time.time() - for j in xrange(count): + for j in range(count): f() b = min(b, (time.time() - start) / count) return round(b - a, int(round(math.log(count, 10) - math.log(b - a, 10)))) contents = open('../../tests/text-test-data/yiwiki.xml', 'r').read() -contents = contents.decode('utf-8') benchmarks = ( find_first, @@ -47,4 +46,4 @@ def g(): for b in bms: sys.stdout.write(b.__name__ + ' ') sys.stdout.flush() - print b() + print(b()) diff --git a/benchmarks/python/sort.py b/benchmarks/python/sort.py index 2c8b3507..e24b187f 100644 --- a/benchmarks/python/sort.py +++ b/benchmarks/python/sort.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import utils, sys, codecs diff --git a/benchmarks/python/strip_tags.py b/benchmarks/python/strip_tags.py index 8f144bac..62cc8193 100644 --- a/benchmarks/python/strip_tags.py +++ b/benchmarks/python/strip_tags.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import utils, sys diff --git a/benchmarks/python/utils.py b/benchmarks/python/utils.py index 5651e9b2..7c513dd5 100755 --- a/benchmarks/python/utils.py +++ b/benchmarks/python/utils.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys, time From 7b2bd22a323485888d9f04331e7795d9824ccbe3 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Fri, 17 Jul 2020 00:52:39 +0300 Subject: [PATCH 02/10] Regenerate .travis.yml, bump version to 1.2.4.1 - There is no public API changes, only adopting to changes in dependencies - Use `else` in `text.cabal` --- .travis.yml | 122 ++++++++++++++++++++++++++++----------------------- changelog.md | 4 ++ text.cabal | 6 +-- 3 files changed, 74 insertions(+), 58 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3afb7710..072fa449 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,17 @@ # # haskell-ci 'text.cabal' # +# To regenerate the script (for example after adjusting tested-with) run +# +# haskell-ci regenerate +# # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.3.20190521 +# version: 0.10.1 # +version: ~> 1.0 language: c +os: linux dist: xenial git: # whether to recursively clone submodules @@ -15,6 +21,7 @@ cache: directories: - $HOME/.cabal/packages - $HOME/.cabal/store + - $HOME/.hlint before_cache: - rm -fv $CABALHOME/packages/hackage.haskell.org/build-reports.log # remove files that are regenerated by 'cabal update' @@ -24,68 +31,66 @@ before_cache: - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar - rm -fv $CABALHOME/packages/hackage.haskell.org/01-index.tar.idx - rm -rfv $CABALHOME/packages/head.hackage -matrix: +jobs: include: + - compiler: ghc-8.10.1 + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.10.1","cabal-install-3.2"]}} + os: linux + - compiler: ghc-8.8.3 + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.3","cabal-install-3.2"]}} + os: linux - compiler: ghc-8.6.5 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.6.5","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.2"]}} + os: linux - compiler: ghc-8.4.4 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.4.4","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.2"]}} + os: linux - compiler: ghc-8.2.2 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.2.2","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.2.2","cabal-install-3.2"]}} + os: linux - compiler: ghc-8.0.2 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-8.0.2","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.0.2","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.10.3 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.10.3","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.10.3","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.8.4 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.8.4","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.8.4","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.6.3 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.6.3","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.6.3","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.4.2 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.4.2","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.4.2","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.2.2 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.2.2","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.2.2","cabal-install-3.2"]}} + os: linux - compiler: ghc-7.0.4 - addons: {"apt":{"sources":["hvr-ghc"],"packages":["ghc-7.0.4","cabal-install-2.4"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.0.4","cabal-install-3.2"]}} + os: linux before_install: - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//') + - WITHCOMPILER="-w $HC" + - HADDOCK=$(echo "/opt/$CC/bin/haddock" | sed 's/-/\//') - HCPKG="$HC-pkg" - unset CC - CABAL=/opt/ghc/bin/cabal - CABALHOME=$HOME/.cabal - export PATH="$CABALHOME/bin:$PATH" - TOP=$(pwd) - - HCNUMVER=$(( $(${HC} --numeric-version|sed -E 's/([0-9]+)\.([0-9]+)\.([0-9]+).*/\1 * 10000 + \2 * 100 + \3/') )) + - "HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\\d+)\\.(\\d+)\\.(\\d+)(\\.(\\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))')" - echo $HCNUMVER - - CABAL="$CABAL -vnormal+nowrap+markoutput" + - CABAL="$CABAL -vnormal+nowrap" - set -o pipefail - - | - echo 'function blue(s) { printf "\033[0;34m" s "\033[0m " }' >> .colorful.awk - echo 'BEGIN { state = "output"; }' >> .colorful.awk - echo '/^-----BEGIN CABAL OUTPUT-----$/ { state = "cabal" }' >> .colorful.awk - echo '/^-----END CABAL OUTPUT-----$/ { state = "output" }' >> .colorful.awk - echo '!/^(-----BEGIN CABAL OUTPUT-----|-----END CABAL OUTPUT-----)/ {' >> .colorful.awk - echo ' if (state == "cabal") {' >> .colorful.awk - echo ' print blue($0)' >> .colorful.awk - echo ' } else {' >> .colorful.awk - echo ' print $0' >> .colorful.awk - echo ' }' >> .colorful.awk - echo '}' >> .colorful.awk - - cat .colorful.awk - - | - color_cabal_output () { - awk -f $TOP/.colorful.awk - } - - echo text | color_cabal_output -install: - - ${CABAL} --version - - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" - TEST=--enable-tests - BENCH=--enable-benchmarks - - GHCHEAD=${GHCHEAD-false} + - HEADHACKAGE=false - rm -f $CABALHOME/config - | echo "verbose: normal +nowrap +markoutput" >> $CABALHOME/config echo "remote-build-reporting: anonymous" >> $CABALHOME/config + echo "write-ghc-environment-files: always" >> $CABALHOME/config echo "remote-repo-cache: $CABALHOME/packages" >> $CABALHOME/config echo "logs-dir: $CABALHOME/logs" >> $CABALHOME/config echo "world-file: $CABALHOME/world" >> $CABALHOME/config @@ -98,6 +103,12 @@ install: echo " prefix: $CABALHOME" >> $CABALHOME/config echo "repository hackage.haskell.org" >> $CABALHOME/config echo " url: http://hackage.haskell.org/" >> $CABALHOME/config +install: + - ${CABAL} --version + - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" + - | + echo "program-default-options" >> $CABALHOME/config + echo " ghc-options: $GHCJOBS +RTS -M6G -RTS" >> $CABALHOME/config - cat $CABALHOME/config - rm -fv cabal.project cabal.project.local cabal.project.freeze - travis_retry ${CABAL} v2-update -v @@ -105,54 +116,55 @@ install: - rm -rf cabal.project cabal.project.local cabal.project.freeze - touch cabal.project - | - echo 'packages: "."' >> cabal.project + echo "packages: ." >> cabal.project + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" - | - echo "write-ghc-environment-files: always" >> cabal.project - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" - cat cabal.project || true - cat cabal.project.local || true - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi - - ${CABAL} v2-freeze -w ${HC} ${TEST} ${BENCH} | color_cabal_output + - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH} - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'" - rm cabal.project.freeze - - ${CABAL} v2-build -w ${HC} ${TEST} ${BENCH} --dep -j2 all | color_cabal_output - - ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks --dep -j2 all | color_cabal_output + - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all + - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all script: - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) # Packaging... - - ${CABAL} v2-sdist all | color_cabal_output + - ${CABAL} v2-sdist all # Unpacking... - mv dist-newstyle/sdist/*.tar.gz ${DISTDIR}/ - - cp -av th-tests ${DISTDIR}/th-tests.src - cd ${DISTDIR} || false - - find . -maxdepth 1 -name '*.tar.gz' -exec tar -xvf '{}' \; + - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \; + - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm '{}' \; + - PKGDIR_text="$(find . -maxdepth 1 -type d -regex '.*/text-[0-9.]*')" # Generate cabal.project - rm -rf cabal.project cabal.project.local cabal.project.freeze - touch cabal.project - | - echo 'packages: "text-*/*.cabal"' >> cabal.project - - | - echo 'packages: ./th-tests.src/' >> cabal.project + echo "packages: ${PKGDIR_text}" >> cabal.project + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" - | - echo "write-ghc-environment-files: always" >> cabal.project - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" - cat cabal.project || true - cat cabal.project.local || true # Building... # this builds all libraries and executables (without tests/benchmarks) - - ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks all | color_cabal_output + - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all # Building with tests and benchmarks... # build & run tests, build benchmarks - - ${CABAL} v2-build -w ${HC} ${TEST} ${BENCH} all | color_cabal_output + - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all # Testing... - - ${CABAL} v2-test -w ${HC} ${TEST} ${BENCH} all | color_cabal_output + - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all # cabal check... - - (cd text-* && ${CABAL} -vnormal check) + - (cd ${PKGDIR_text} && ${CABAL} -vnormal check) # haddock... - - ${CABAL} v2-haddock -w ${HC} ${TEST} ${BENCH} all | color_cabal_output + - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all # Building without installed constraints for packages in global-db... - rm -f cabal.project.local - - ${CABAL} v2-build -w ${HC} --disable-tests --disable-benchmarks all | color_cabal_output + - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ["text.cabal"] +# REGENDATA ("0.10.1",["text.cabal"]) # EOF diff --git a/changelog.md b/changelog.md index 99641c02..3522ba09 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,7 @@ +### 1.2.4.1 + +* Support `template-haskell-2.17.0.0` + ### 1.2.4.0 * Add TH `Lift` instances for `Data.Text.Text` and `Data.Text.Lazy.Text` (gh-232) diff --git a/text.cabal b/text.cabal index 6b46fd65..bb297749 100644 --- a/text.cabal +++ b/text.cabal @@ -1,6 +1,6 @@ cabal-version: >= 1.10 name: text -version: 1.2.4.0 +version: 1.2.4.1 homepage: https://github.com/haskell/text bug-reports: https://github.com/haskell/text/issues @@ -56,7 +56,7 @@ maintainer: Bryan O'Sullivan , Herbert Valerio Riedel = 8.11) build-depends: ghc-bignum - if impl(ghc < 8.11) + else if flag(integer-simple) cpp-options: -DINTEGER_SIMPLE build-depends: integer-simple >= 0.1 && < 0.5 From d5118aa5a8301cfff4ff55b2e7325900c34ebdeb Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Fri, 17 Jul 2020 01:22:53 +0300 Subject: [PATCH 03/10] Break tests dependency cycle - move source to src/ - Use symbolic links for package replicas --- .travis.yml | 38 ++- benchmarks/LICENSE | 1 + benchmarks/cbits | 1 + .../{cbits => cbits-bench}/time_iconv.c | 0 benchmarks/include | 1 + benchmarks/src | 1 + benchmarks/text-benchmarks.cabal | 18 +- cabal.haskell-ci | 5 + cabal.project | 5 +- {Data => src/Data}/Text.hs | 0 {Data => src/Data}/Text/Array.hs | 0 {Data => src/Data}/Text/Encoding.hs | 0 {Data => src/Data}/Text/Encoding/Error.hs | 0 {Data => src/Data}/Text/Foreign.hs | 0 {Data => src/Data}/Text/IO.hs | 0 {Data => src/Data}/Text/Internal.hs | 0 {Data => src/Data}/Text/Internal/Builder.hs | 0 .../Data}/Text/Internal/Builder/Functions.hs | 0 .../Data}/Text/Internal/Builder/Int/Digits.hs | 0 .../Internal/Builder/RealFloat/Functions.hs | 0 .../Data}/Text/Internal/Encoding/Fusion.hs | 0 .../Text/Internal/Encoding/Fusion/Common.hs | 0 .../Data}/Text/Internal/Encoding/Utf16.hs | 0 .../Data}/Text/Internal/Encoding/Utf32.hs | 0 .../Data}/Text/Internal/Encoding/Utf8.hs | 0 {Data => src/Data}/Text/Internal/Functions.hs | 0 {Data => src/Data}/Text/Internal/Fusion.hs | 0 .../Data}/Text/Internal/Fusion/CaseMapping.hs | 0 .../Data}/Text/Internal/Fusion/Common.hs | 0 .../Data}/Text/Internal/Fusion/Size.hs | 0 .../Data}/Text/Internal/Fusion/Types.hs | 0 {Data => src/Data}/Text/Internal/IO.hs | 0 {Data => src/Data}/Text/Internal/Lazy.hs | 0 .../Text/Internal/Lazy/Encoding/Fusion.hs | 0 .../Data}/Text/Internal/Lazy/Fusion.hs | 0 .../Data}/Text/Internal/Lazy/Search.hs | 0 {Data => src/Data}/Text/Internal/Private.hs | 0 {Data => src/Data}/Text/Internal/Read.hs | 0 {Data => src/Data}/Text/Internal/Search.hs | 0 {Data => src/Data}/Text/Internal/Unsafe.hs | 0 .../Data}/Text/Internal/Unsafe/Char.hs | 0 .../Data}/Text/Internal/Unsafe/Shift.hs | 0 {Data => src/Data}/Text/Lazy.hs | 0 {Data => src/Data}/Text/Lazy/Builder.hs | 0 {Data => src/Data}/Text/Lazy/Builder/Int.hs | 0 .../Data}/Text/Lazy/Builder/RealFloat.hs | 0 {Data => src/Data}/Text/Lazy/Encoding.hs | 0 {Data => src/Data}/Text/Lazy/IO.hs | 0 {Data => src/Data}/Text/Lazy/Internal.hs | 0 {Data => src/Data}/Text/Lazy/Read.hs | 0 {Data => src/Data}/Text/Read.hs | 0 {Data => src/Data}/Text/Show.hs | 0 {Data => src/Data}/Text/Unsafe.hs | 0 tests/LICENSE | 1 + tests/cbits | 1 + tests/include | 1 + tests/src | 1 + tests/text-tests.cabal | 257 ++++++++++-------- text.cabal | 115 +------- th-tests/th-tests.cabal | 8 +- 60 files changed, 217 insertions(+), 237 deletions(-) create mode 120000 benchmarks/LICENSE create mode 120000 benchmarks/cbits rename benchmarks/{cbits => cbits-bench}/time_iconv.c (100%) create mode 120000 benchmarks/include create mode 120000 benchmarks/src create mode 100644 cabal.haskell-ci rename {Data => src/Data}/Text.hs (100%) rename {Data => src/Data}/Text/Array.hs (100%) rename {Data => src/Data}/Text/Encoding.hs (100%) rename {Data => src/Data}/Text/Encoding/Error.hs (100%) rename {Data => src/Data}/Text/Foreign.hs (100%) rename {Data => src/Data}/Text/IO.hs (100%) rename {Data => src/Data}/Text/Internal.hs (100%) rename {Data => src/Data}/Text/Internal/Builder.hs (100%) rename {Data => src/Data}/Text/Internal/Builder/Functions.hs (100%) rename {Data => src/Data}/Text/Internal/Builder/Int/Digits.hs (100%) rename {Data => src/Data}/Text/Internal/Builder/RealFloat/Functions.hs (100%) rename {Data => src/Data}/Text/Internal/Encoding/Fusion.hs (100%) rename {Data => src/Data}/Text/Internal/Encoding/Fusion/Common.hs (100%) rename {Data => src/Data}/Text/Internal/Encoding/Utf16.hs (100%) rename {Data => src/Data}/Text/Internal/Encoding/Utf32.hs (100%) rename {Data => src/Data}/Text/Internal/Encoding/Utf8.hs (100%) rename {Data => src/Data}/Text/Internal/Functions.hs (100%) rename {Data => src/Data}/Text/Internal/Fusion.hs (100%) rename {Data => src/Data}/Text/Internal/Fusion/CaseMapping.hs (100%) rename {Data => src/Data}/Text/Internal/Fusion/Common.hs (100%) rename {Data => src/Data}/Text/Internal/Fusion/Size.hs (100%) rename {Data => src/Data}/Text/Internal/Fusion/Types.hs (100%) rename {Data => src/Data}/Text/Internal/IO.hs (100%) rename {Data => src/Data}/Text/Internal/Lazy.hs (100%) rename {Data => src/Data}/Text/Internal/Lazy/Encoding/Fusion.hs (100%) rename {Data => src/Data}/Text/Internal/Lazy/Fusion.hs (100%) rename {Data => src/Data}/Text/Internal/Lazy/Search.hs (100%) rename {Data => src/Data}/Text/Internal/Private.hs (100%) rename {Data => src/Data}/Text/Internal/Read.hs (100%) rename {Data => src/Data}/Text/Internal/Search.hs (100%) rename {Data => src/Data}/Text/Internal/Unsafe.hs (100%) rename {Data => src/Data}/Text/Internal/Unsafe/Char.hs (100%) rename {Data => src/Data}/Text/Internal/Unsafe/Shift.hs (100%) rename {Data => src/Data}/Text/Lazy.hs (100%) rename {Data => src/Data}/Text/Lazy/Builder.hs (100%) rename {Data => src/Data}/Text/Lazy/Builder/Int.hs (100%) rename {Data => src/Data}/Text/Lazy/Builder/RealFloat.hs (100%) rename {Data => src/Data}/Text/Lazy/Encoding.hs (100%) rename {Data => src/Data}/Text/Lazy/IO.hs (100%) rename {Data => src/Data}/Text/Lazy/Internal.hs (100%) rename {Data => src/Data}/Text/Lazy/Read.hs (100%) rename {Data => src/Data}/Text/Read.hs (100%) rename {Data => src/Data}/Text/Show.hs (100%) rename {Data => src/Data}/Text/Unsafe.hs (100%) create mode 120000 tests/LICENSE create mode 120000 tests/cbits create mode 120000 tests/include create mode 120000 tests/src diff --git a/.travis.yml b/.travis.yml index 072fa449..c9f3948a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ # This Travis job script has been generated by a script via # -# haskell-ci 'text.cabal' +# haskell-ci '--config=cabal.haskell-ci' 'cabal.project' # # To regenerate the script (for example after adjusting tested-with) run # @@ -85,6 +85,7 @@ before_install: - set -o pipefail - TEST=--enable-tests - BENCH=--enable-benchmarks + - if [ $HCNUMVER -lt 70400 ] ; then BENCH=--disable-benchmarks ; fi - HEADHACKAGE=false - rm -f $CABALHOME/config - | @@ -117,18 +118,28 @@ install: - touch cabal.project - | echo "packages: ." >> cabal.project + echo "packages: tests" >> cabal.project + echo "packages: th-tests" >> cabal.project + if [ $HCNUMVER -ge 71000 ] ; then echo "packages: benchmarks" >> cabal.project ; fi - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text' >> cabal.project ; fi - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text-tests' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package th-tests' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text-benchmarks' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" - | - - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" + - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(parsec|text|text-benchmarks|text-tests|th-tests)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" - cat cabal.project || true - cat cabal.project.local || true - if [ -f "./configure.ac" ]; then (cd "." && autoreconf -i); fi + - if [ -f "tests/configure.ac" ]; then (cd "tests" && autoreconf -i); fi + - if [ -f "th-tests/configure.ac" ]; then (cd "th-tests" && autoreconf -i); fi + - if [ -f "benchmarks/configure.ac" ]; then (cd "benchmarks" && autoreconf -i); fi - ${CABAL} v2-freeze $WITHCOMPILER ${TEST} ${BENCH} - "cat cabal.project.freeze | sed -E 's/^(constraints: *| *)//' | sed 's/any.//'" - rm cabal.project.freeze - - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} --dep -j2 all - - travis_wait 40 ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks --dep -j2 all script: - DISTDIR=$(mktemp -d /tmp/dist-test.XXXX) # Packaging... @@ -139,15 +150,27 @@ script: - find . -maxdepth 1 -type f -name '*.tar.gz' -exec tar -xvf '{}' \; - find . -maxdepth 1 -type f -name '*.tar.gz' -exec rm '{}' \; - PKGDIR_text="$(find . -maxdepth 1 -type d -regex '.*/text-[0-9.]*')" + - PKGDIR_text_tests="$(find . -maxdepth 1 -type d -regex '.*/text-tests-[0-9.]*')" + - PKGDIR_th_tests="$(find . -maxdepth 1 -type d -regex '.*/th-tests-[0-9.]*')" + - PKGDIR_text_benchmarks="$(find . -maxdepth 1 -type d -regex '.*/text-benchmarks-[0-9.]*')" # Generate cabal.project - rm -rf cabal.project cabal.project.local cabal.project.freeze - touch cabal.project - | echo "packages: ${PKGDIR_text}" >> cabal.project + echo "packages: ${PKGDIR_text_tests}" >> cabal.project + echo "packages: ${PKGDIR_th_tests}" >> cabal.project + if [ $HCNUMVER -ge 71000 ] ; then echo "packages: ${PKGDIR_text_benchmarks}" >> cabal.project ; fi - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text' >> cabal.project ; fi - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text-tests' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package th-tests' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" + - if [ $HCNUMVER -ge 80200 ] ; then echo 'package text-benchmarks' >> cabal.project ; fi + - "if [ $HCNUMVER -ge 80200 ] ; then echo ' ghc-options: -Werror=missing-methods' >> cabal.project ; fi" - | - - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(text)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" + - "for pkg in $($HCPKG list --simple-output); do echo $pkg | sed 's/-[^-]*$//' | (grep -vE -- '^(parsec|text|text-benchmarks|text-tests|th-tests)$' || true) | sed 's/^/constraints: /' | sed 's/$/ installed/' >> cabal.project.local; done" - cat cabal.project || true - cat cabal.project.local || true # Building... @@ -160,11 +183,14 @@ script: - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all # cabal check... - (cd ${PKGDIR_text} && ${CABAL} -vnormal check) + - (cd ${PKGDIR_text_tests} && ${CABAL} -vnormal check) + - (cd ${PKGDIR_th_tests} && ${CABAL} -vnormal check) + - if [ $HCNUMVER -ge 71000 ] ; then (cd ${PKGDIR_text_benchmarks} && ${CABAL} -vnormal check) ; fi # haddock... - ${CABAL} v2-haddock $WITHCOMPILER --with-haddock $HADDOCK ${TEST} ${BENCH} all # Building without installed constraints for packages in global-db... - rm -f cabal.project.local - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ("0.10.1",["text.cabal"]) +# REGENDATA ("0.10.1",["--config=cabal.haskell-ci","cabal.project"]) # EOF diff --git a/benchmarks/LICENSE b/benchmarks/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/benchmarks/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/benchmarks/cbits b/benchmarks/cbits new file mode 120000 index 00000000..904f446c --- /dev/null +++ b/benchmarks/cbits @@ -0,0 +1 @@ +../cbits \ No newline at end of file diff --git a/benchmarks/cbits/time_iconv.c b/benchmarks/cbits-bench/time_iconv.c similarity index 100% rename from benchmarks/cbits/time_iconv.c rename to benchmarks/cbits-bench/time_iconv.c diff --git a/benchmarks/include b/benchmarks/include new file mode 120000 index 00000000..f5030fe8 --- /dev/null +++ b/benchmarks/include @@ -0,0 +1 @@ +../include \ No newline at end of file diff --git a/benchmarks/src b/benchmarks/src new file mode 120000 index 00000000..5cd551cf --- /dev/null +++ b/benchmarks/src @@ -0,0 +1 @@ +../src \ No newline at end of file diff --git a/benchmarks/text-benchmarks.cabal b/benchmarks/text-benchmarks.cabal index bc562b07..7f606f3c 100644 --- a/benchmarks/text-benchmarks.cabal +++ b/benchmarks/text-benchmarks.cabal @@ -2,10 +2,10 @@ cabal-version: 1.12 name: text-benchmarks version: 0.0.0.0 synopsis: Benchmarks for the text package -description: Benchmarks for the text package +description: Benchmarks for the text package. homepage: https://bitbucket.org/bos/text license: BSD2 -license-file: ../LICENSE +license-file: LICENSE author: Jasper Van der Jeugt , Bryan O'Sullivan , Tom Harper , @@ -13,6 +13,11 @@ author: Jasper Van der Jeugt , maintainer: jaspervdj@gmail.com category: Text build-type: Simple +tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4, + GHC==8.2.2, GHC==8.0.2, GHC==7.10.3 + +extra-source-files: + include/*.h flag bytestring-builder description: Depend on the bytestring-builder package for backwards compatibility. @@ -54,7 +59,7 @@ executable text-benchmarks build-depends: bytestring >= 0.10.4 -- modules for benchmark proper - c-sources: cbits/time_iconv.c + c-sources: cbits-bench/time_iconv.c hs-source-dirs: haskell main-is: Benchmarks.hs other-modules: @@ -81,9 +86,9 @@ executable text-benchmarks -- Source code for IUT (implementation under test) -- "borrowed" from parent folder - include-dirs: ../include - c-sources: ../cbits/cbits.c - hs-source-dirs: .. + include-dirs: include + c-sources: cbits/cbits.c + hs-source-dirs: src other-modules: Data.Text Data.Text.Array @@ -132,6 +137,7 @@ executable text-benchmarks default-language: Haskell2010 default-extensions: NondecreasingIndentation + other-extensions: DeriveAnyClass executable text-multilang diff --git a/cabal.haskell-ci b/cabal.haskell-ci new file mode 100644 index 00000000..e8fd8e5d --- /dev/null +++ b/cabal.haskell-ci @@ -0,0 +1,5 @@ +benchmarks: >=7.4 +jobs-selection: any + +installed: +all -text -parsec +install-dependencies: False diff --git a/cabal.project b/cabal.project index 50143f87..5393178a 100644 --- a/cabal.project +++ b/cabal.project @@ -1,3 +1,6 @@ -- See http://cabal.readthedocs.io/en/latest/nix-local-build-overview.html -packages: ., benchmarks, th-tests +packages: . +packages: tests +packages: th-tests +packages: benchmarks tests: True diff --git a/Data/Text.hs b/src/Data/Text.hs similarity index 100% rename from Data/Text.hs rename to src/Data/Text.hs diff --git a/Data/Text/Array.hs b/src/Data/Text/Array.hs similarity index 100% rename from Data/Text/Array.hs rename to src/Data/Text/Array.hs diff --git a/Data/Text/Encoding.hs b/src/Data/Text/Encoding.hs similarity index 100% rename from Data/Text/Encoding.hs rename to src/Data/Text/Encoding.hs diff --git a/Data/Text/Encoding/Error.hs b/src/Data/Text/Encoding/Error.hs similarity index 100% rename from Data/Text/Encoding/Error.hs rename to src/Data/Text/Encoding/Error.hs diff --git a/Data/Text/Foreign.hs b/src/Data/Text/Foreign.hs similarity index 100% rename from Data/Text/Foreign.hs rename to src/Data/Text/Foreign.hs diff --git a/Data/Text/IO.hs b/src/Data/Text/IO.hs similarity index 100% rename from Data/Text/IO.hs rename to src/Data/Text/IO.hs diff --git a/Data/Text/Internal.hs b/src/Data/Text/Internal.hs similarity index 100% rename from Data/Text/Internal.hs rename to src/Data/Text/Internal.hs diff --git a/Data/Text/Internal/Builder.hs b/src/Data/Text/Internal/Builder.hs similarity index 100% rename from Data/Text/Internal/Builder.hs rename to src/Data/Text/Internal/Builder.hs diff --git a/Data/Text/Internal/Builder/Functions.hs b/src/Data/Text/Internal/Builder/Functions.hs similarity index 100% rename from Data/Text/Internal/Builder/Functions.hs rename to src/Data/Text/Internal/Builder/Functions.hs diff --git a/Data/Text/Internal/Builder/Int/Digits.hs b/src/Data/Text/Internal/Builder/Int/Digits.hs similarity index 100% rename from Data/Text/Internal/Builder/Int/Digits.hs rename to src/Data/Text/Internal/Builder/Int/Digits.hs diff --git a/Data/Text/Internal/Builder/RealFloat/Functions.hs b/src/Data/Text/Internal/Builder/RealFloat/Functions.hs similarity index 100% rename from Data/Text/Internal/Builder/RealFloat/Functions.hs rename to src/Data/Text/Internal/Builder/RealFloat/Functions.hs diff --git a/Data/Text/Internal/Encoding/Fusion.hs b/src/Data/Text/Internal/Encoding/Fusion.hs similarity index 100% rename from Data/Text/Internal/Encoding/Fusion.hs rename to src/Data/Text/Internal/Encoding/Fusion.hs diff --git a/Data/Text/Internal/Encoding/Fusion/Common.hs b/src/Data/Text/Internal/Encoding/Fusion/Common.hs similarity index 100% rename from Data/Text/Internal/Encoding/Fusion/Common.hs rename to src/Data/Text/Internal/Encoding/Fusion/Common.hs diff --git a/Data/Text/Internal/Encoding/Utf16.hs b/src/Data/Text/Internal/Encoding/Utf16.hs similarity index 100% rename from Data/Text/Internal/Encoding/Utf16.hs rename to src/Data/Text/Internal/Encoding/Utf16.hs diff --git a/Data/Text/Internal/Encoding/Utf32.hs b/src/Data/Text/Internal/Encoding/Utf32.hs similarity index 100% rename from Data/Text/Internal/Encoding/Utf32.hs rename to src/Data/Text/Internal/Encoding/Utf32.hs diff --git a/Data/Text/Internal/Encoding/Utf8.hs b/src/Data/Text/Internal/Encoding/Utf8.hs similarity index 100% rename from Data/Text/Internal/Encoding/Utf8.hs rename to src/Data/Text/Internal/Encoding/Utf8.hs diff --git a/Data/Text/Internal/Functions.hs b/src/Data/Text/Internal/Functions.hs similarity index 100% rename from Data/Text/Internal/Functions.hs rename to src/Data/Text/Internal/Functions.hs diff --git a/Data/Text/Internal/Fusion.hs b/src/Data/Text/Internal/Fusion.hs similarity index 100% rename from Data/Text/Internal/Fusion.hs rename to src/Data/Text/Internal/Fusion.hs diff --git a/Data/Text/Internal/Fusion/CaseMapping.hs b/src/Data/Text/Internal/Fusion/CaseMapping.hs similarity index 100% rename from Data/Text/Internal/Fusion/CaseMapping.hs rename to src/Data/Text/Internal/Fusion/CaseMapping.hs diff --git a/Data/Text/Internal/Fusion/Common.hs b/src/Data/Text/Internal/Fusion/Common.hs similarity index 100% rename from Data/Text/Internal/Fusion/Common.hs rename to src/Data/Text/Internal/Fusion/Common.hs diff --git a/Data/Text/Internal/Fusion/Size.hs b/src/Data/Text/Internal/Fusion/Size.hs similarity index 100% rename from Data/Text/Internal/Fusion/Size.hs rename to src/Data/Text/Internal/Fusion/Size.hs diff --git a/Data/Text/Internal/Fusion/Types.hs b/src/Data/Text/Internal/Fusion/Types.hs similarity index 100% rename from Data/Text/Internal/Fusion/Types.hs rename to src/Data/Text/Internal/Fusion/Types.hs diff --git a/Data/Text/Internal/IO.hs b/src/Data/Text/Internal/IO.hs similarity index 100% rename from Data/Text/Internal/IO.hs rename to src/Data/Text/Internal/IO.hs diff --git a/Data/Text/Internal/Lazy.hs b/src/Data/Text/Internal/Lazy.hs similarity index 100% rename from Data/Text/Internal/Lazy.hs rename to src/Data/Text/Internal/Lazy.hs diff --git a/Data/Text/Internal/Lazy/Encoding/Fusion.hs b/src/Data/Text/Internal/Lazy/Encoding/Fusion.hs similarity index 100% rename from Data/Text/Internal/Lazy/Encoding/Fusion.hs rename to src/Data/Text/Internal/Lazy/Encoding/Fusion.hs diff --git a/Data/Text/Internal/Lazy/Fusion.hs b/src/Data/Text/Internal/Lazy/Fusion.hs similarity index 100% rename from Data/Text/Internal/Lazy/Fusion.hs rename to src/Data/Text/Internal/Lazy/Fusion.hs diff --git a/Data/Text/Internal/Lazy/Search.hs b/src/Data/Text/Internal/Lazy/Search.hs similarity index 100% rename from Data/Text/Internal/Lazy/Search.hs rename to src/Data/Text/Internal/Lazy/Search.hs diff --git a/Data/Text/Internal/Private.hs b/src/Data/Text/Internal/Private.hs similarity index 100% rename from Data/Text/Internal/Private.hs rename to src/Data/Text/Internal/Private.hs diff --git a/Data/Text/Internal/Read.hs b/src/Data/Text/Internal/Read.hs similarity index 100% rename from Data/Text/Internal/Read.hs rename to src/Data/Text/Internal/Read.hs diff --git a/Data/Text/Internal/Search.hs b/src/Data/Text/Internal/Search.hs similarity index 100% rename from Data/Text/Internal/Search.hs rename to src/Data/Text/Internal/Search.hs diff --git a/Data/Text/Internal/Unsafe.hs b/src/Data/Text/Internal/Unsafe.hs similarity index 100% rename from Data/Text/Internal/Unsafe.hs rename to src/Data/Text/Internal/Unsafe.hs diff --git a/Data/Text/Internal/Unsafe/Char.hs b/src/Data/Text/Internal/Unsafe/Char.hs similarity index 100% rename from Data/Text/Internal/Unsafe/Char.hs rename to src/Data/Text/Internal/Unsafe/Char.hs diff --git a/Data/Text/Internal/Unsafe/Shift.hs b/src/Data/Text/Internal/Unsafe/Shift.hs similarity index 100% rename from Data/Text/Internal/Unsafe/Shift.hs rename to src/Data/Text/Internal/Unsafe/Shift.hs diff --git a/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs similarity index 100% rename from Data/Text/Lazy.hs rename to src/Data/Text/Lazy.hs diff --git a/Data/Text/Lazy/Builder.hs b/src/Data/Text/Lazy/Builder.hs similarity index 100% rename from Data/Text/Lazy/Builder.hs rename to src/Data/Text/Lazy/Builder.hs diff --git a/Data/Text/Lazy/Builder/Int.hs b/src/Data/Text/Lazy/Builder/Int.hs similarity index 100% rename from Data/Text/Lazy/Builder/Int.hs rename to src/Data/Text/Lazy/Builder/Int.hs diff --git a/Data/Text/Lazy/Builder/RealFloat.hs b/src/Data/Text/Lazy/Builder/RealFloat.hs similarity index 100% rename from Data/Text/Lazy/Builder/RealFloat.hs rename to src/Data/Text/Lazy/Builder/RealFloat.hs diff --git a/Data/Text/Lazy/Encoding.hs b/src/Data/Text/Lazy/Encoding.hs similarity index 100% rename from Data/Text/Lazy/Encoding.hs rename to src/Data/Text/Lazy/Encoding.hs diff --git a/Data/Text/Lazy/IO.hs b/src/Data/Text/Lazy/IO.hs similarity index 100% rename from Data/Text/Lazy/IO.hs rename to src/Data/Text/Lazy/IO.hs diff --git a/Data/Text/Lazy/Internal.hs b/src/Data/Text/Lazy/Internal.hs similarity index 100% rename from Data/Text/Lazy/Internal.hs rename to src/Data/Text/Lazy/Internal.hs diff --git a/Data/Text/Lazy/Read.hs b/src/Data/Text/Lazy/Read.hs similarity index 100% rename from Data/Text/Lazy/Read.hs rename to src/Data/Text/Lazy/Read.hs diff --git a/Data/Text/Read.hs b/src/Data/Text/Read.hs similarity index 100% rename from Data/Text/Read.hs rename to src/Data/Text/Read.hs diff --git a/Data/Text/Show.hs b/src/Data/Text/Show.hs similarity index 100% rename from Data/Text/Show.hs rename to src/Data/Text/Show.hs diff --git a/Data/Text/Unsafe.hs b/src/Data/Text/Unsafe.hs similarity index 100% rename from Data/Text/Unsafe.hs rename to src/Data/Text/Unsafe.hs diff --git a/tests/LICENSE b/tests/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/tests/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/tests/cbits b/tests/cbits new file mode 120000 index 00000000..904f446c --- /dev/null +++ b/tests/cbits @@ -0,0 +1 @@ +../cbits \ No newline at end of file diff --git a/tests/include b/tests/include new file mode 120000 index 00000000..f5030fe8 --- /dev/null +++ b/tests/include @@ -0,0 +1 @@ +../include \ No newline at end of file diff --git a/tests/src b/tests/src new file mode 120000 index 00000000..5cd551cf --- /dev/null +++ b/tests/src @@ -0,0 +1 @@ +../src \ No newline at end of file diff --git a/tests/text-tests.cabal b/tests/text-tests.cabal index 06361b32..7a84c901 100644 --- a/tests/text-tests.cabal +++ b/tests/text-tests.cabal @@ -1,34 +1,96 @@ -cabal-version: 1.12 -name: text-tests -version: 0.0.0.0 - -synopsis: Functional tests for the text package -description: Functional tests for the text package -homepage: https://github.com/bos/text -license: BSD2 -license-file: ../LICENSE -author: Jasper Van der Jeugt , - Bryan O'Sullivan , - Tom Harper , - Duncan Coutts -maintainer: Bryan O'Sullivan -category: Text -build-type: Simple - - -flag hpc - description: Enable HPC to generate coverage reports - default: False - manual: True +cabal-version: >= 1.10 +name: text-tests +version: 1.2.4.1 + +homepage: https://github.com/haskell/text +bug-reports: https://github.com/haskell/text/issues +synopsis: An efficient packed Unicode text type. +description: + . + An efficient packed, immutable Unicode text type (both strict and + lazy), with a powerful loop fusion optimization framework. + . + The 'Text' type represents Unicode character strings, in a time and + space-efficient manner. This package provides text processing + capabilities that are optimized for performance critical use, both + in terms of large data quantities and high speed. + . + The 'Text' type provides character-encoding, type-safe case + conversion via whole-string case conversion functions (see "Data.Text"). + It also provides a range of functions for converting 'Text' values to + and from 'ByteStrings', using several standard encodings + (see "Data.Text.Encoding"). + . + Efficient locale-sensitive support for text IO is also supported + (see "Data.Text.IO"). + . + These modules are intended to be imported qualified, to avoid name + clashes with Prelude functions, e.g. + . + > import qualified Data.Text as T + . + == ICU Support + . + To use an extended and very rich family of functions for working + with Unicode text (including normalization, regular expressions, + non-standard encodings, text breaking, and locales), see + the [text-icu package](https://hackage.haskell.org/package/text-icu) + based on the well-respected and liberally + licensed [ICU library](http://site.icu-project.org/). + . + == Internal Representation: UTF-16 vs. UTF-8 + . + Currently the @text@ library uses UTF-16 as its internal representation + which is [neither a fixed-width nor always the most dense representation](http://utf8everywhere.org/) + for Unicode text. We're currently investigating the feasibility + of [changing Text's internal representation to UTF-8](https://github.com/text-utf8) + and if you need such a 'Text' type right now you might be interested in using the spin-off + packages and + . + + +license: BSD2 +license-file: LICENSE +author: Bryan O'Sullivan +maintainer: Bryan O'Sullivan , Herbert Valerio Riedel +copyright: 2009-2011 Bryan O'Sullivan, 2008-2009 Tom Harper +category: Data, Text +build-type: Simple +tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4, + GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4, + GHC==7.6.3, GHC==7.4.2, GHC==7.2.2, GHC==7.0.4 +extra-source-files: + include/*.h flag bytestring-builder - description: Depend on the bytestring-builder package for backwards compatibility. + description: + Depend on the [bytestring-builder](https://hackage.haskell.org/package/bytestring-builder) + package for backwards compatibility. default: False manual: False -executable text-tests - main-is: Tests.hs +flag integer-simple + description: + Use the [simple integer library](http://hackage.haskell.org/package/integer-simple) + instead of [integer-gmp](http://hackage.haskell.org/package/integer-gmp) + default: False + manual: False + +test-suite tests + type: exitcode-stdio-1.0 + c-sources: cbits/cbits.c + include-dirs: include + hs-source-dirs: src + + ghc-options: + -Wall -threaded -rtsopts + cpp-options: + -DASSERTS -DTEST_SUITE + + -- modules specific to test-suite + hs-source-dirs: . + main-is: Tests.hs other-modules: Tests.Properties Tests.Properties.Mul @@ -37,121 +99,77 @@ executable text-tests Tests.SlowFunctions Tests.Utils - ghc-options: - -Wall -threaded -O0 -rtsopts - - if flag(hpc) - ghc-options: - -fhpc - - cpp-options: - -DTEST_SUITE - -DASSERTS - - build-depends: - HUnit >= 1.2, - QuickCheck >= 2.7, - base == 4.*, - deepseq, - directory, - quickcheck-unicode >= 1.0.1.0, - random, - test-framework >= 0.4, - test-framework-hunit >= 0.2, - test-framework-quickcheck2 >= 0.2, - text-tests - - if flag(bytestring-builder) - build-depends: bytestring >= 0.9 && < 0.10.4, - bytestring-builder >= 0.10.4 - else - build-depends: bytestring >= 0.10.4 - - default-language: Haskell2010 - default-extensions: NondecreasingIndentation - -executable text-tests-stdio - main-is: Tests/IO.hs - - ghc-options: - -Wall -threaded -rtsopts - - -- Optional HPC support - if flag(hpc) - ghc-options: - -fhpc - - build-depends: - text-tests, - base >= 4 && < 5 - -library - hs-source-dirs: .. - c-sources: ../cbits/cbits.c - include-dirs: ../include - ghc-options: -Wall - exposed-modules: + -- This can be merged back to `text` package, when cabal + -- will support per-component solving. Otherwise we have loops. + -- + -- Same as in `library` stanza; this is needed by cabal for accurate + -- file-monitoring as well as to avoid `-Wmissing-home-modules` + -- warnings We can't use an inter-package library dependency because + -- of different `ghc-options`/`cpp-options` (as a side-benefitt, + -- this enables per-component build parallelism in `cabal + -- new-build`!); We could, however, use cabal-version:2.2's `common` + -- blocks at some point in the future to reduce the duplication. + other-modules: Data.Text Data.Text.Array Data.Text.Encoding Data.Text.Encoding.Error + Data.Text.Foreign + Data.Text.IO + Data.Text.Internal + Data.Text.Internal.Builder + Data.Text.Internal.Builder.Functions + Data.Text.Internal.Builder.Int.Digits + Data.Text.Internal.Builder.RealFloat.Functions Data.Text.Internal.Encoding.Fusion Data.Text.Internal.Encoding.Fusion.Common Data.Text.Internal.Encoding.Utf16 Data.Text.Internal.Encoding.Utf32 Data.Text.Internal.Encoding.Utf8 - Data.Text.Foreign + Data.Text.Internal.Functions Data.Text.Internal.Fusion Data.Text.Internal.Fusion.CaseMapping Data.Text.Internal.Fusion.Common Data.Text.Internal.Fusion.Size Data.Text.Internal.Fusion.Types - Data.Text.IO Data.Text.Internal.IO - Data.Text.Internal - Data.Text.Lazy - Data.Text.Lazy.Builder - Data.Text.Internal.Builder.Functions - Data.Text.Lazy.Builder.Int - Data.Text.Internal.Builder.Int.Digits - Data.Text.Internal.Builder - Data.Text.Lazy.Builder.RealFloat - Data.Text.Internal.Builder.RealFloat.Functions - Data.Text.Lazy.Encoding + Data.Text.Internal.Lazy Data.Text.Internal.Lazy.Encoding.Fusion Data.Text.Internal.Lazy.Fusion - Data.Text.Lazy.IO - Data.Text.Internal.Lazy - Data.Text.Lazy.Read Data.Text.Internal.Lazy.Search Data.Text.Internal.Private - Data.Text.Read - Data.Text.Show Data.Text.Internal.Read Data.Text.Internal.Search - Data.Text.Unsafe Data.Text.Internal.Unsafe Data.Text.Internal.Unsafe.Char Data.Text.Internal.Unsafe.Shift - Data.Text.Internal.Functions - - if flag(hpc) - ghc-options: - -fhpc - - cpp-options: - -DTEST_SUITE - -DASSERTS - -DINTEGER_GMP + Data.Text.Lazy + Data.Text.Lazy.Builder + Data.Text.Lazy.Builder.Int + Data.Text.Lazy.Builder.RealFloat + Data.Text.Lazy.Encoding + Data.Text.Lazy.IO + Data.Text.Lazy.Internal + Data.Text.Lazy.Read + Data.Text.Read + Data.Text.Unsafe + Data.Text.Show build-depends: + HUnit >= 1.2, + QuickCheck >= 2.14.1 && < 2.15, array, - base == 4.*, + base <5, binary, deepseq, + directory, ghc-prim, - integer-gmp, - template-haskell + quickcheck-unicode >= 1.0.1.0, + random, + template-haskell, + test-framework >= 0.4, + test-framework-hunit >= 0.2, + test-framework-quickcheck2 >= 0.2 if flag(bytestring-builder) build-depends: bytestring >= 0.9 && < 0.10.4, @@ -159,5 +177,24 @@ library else build-depends: bytestring >= 0.10.4 + if impl(ghc >= 8.11) + build-depends: ghc-bignum + + if impl(ghc < 8.11) + if flag(integer-simple) + cpp-options: -DINTEGER_SIMPLE + build-depends: integer-simple >= 0.1 && < 0.5 + else + cpp-options: -DINTEGER_GMP + build-depends: integer-gmp >= 0.2 + default-language: Haskell2010 default-extensions: NondecreasingIndentation + +source-repository head + type: git + location: https://github.com/haskell/text + +source-repository head + type: mercurial + location: https://bitbucket.org/bos/text diff --git a/text.cabal b/text.cabal index bb297749..3991b30f 100644 --- a/text.cabal +++ b/text.cabal @@ -64,7 +64,7 @@ extra-source-files: -- scripts/SpecialCasing.txt README.markdown benchmarks/Setup.hs - benchmarks/cbits/*.c + benchmarks/cbits-bench/*.c benchmarks/haskell/*.hs benchmarks/haskell/Benchmarks/*.hs benchmarks/haskell/Benchmarks/Programs/*.hs @@ -106,6 +106,7 @@ flag integer-simple library c-sources: cbits/cbits.c include-dirs: include + hs-source-dirs: src exposed-modules: Data.Text @@ -216,118 +217,6 @@ library else other-extensions: TemplateHaskell -test-suite tests - type: exitcode-stdio-1.0 - c-sources: cbits/cbits.c - include-dirs: include - - ghc-options: - -Wall -threaded -rtsopts - - cpp-options: - -DASSERTS -DTEST_SUITE - - -- modules specific to test-suite - hs-source-dirs: tests - main-is: Tests.hs - other-modules: - Tests.Properties - Tests.Properties.Mul - Tests.QuickCheckUtils - Tests.Regressions - Tests.SlowFunctions - Tests.Utils - - -- Same as in `library` stanza; this is needed by cabal for accurate - -- file-monitoring as well as to avoid `-Wmissing-home-modules` - -- warnings We can't use an inter-package library dependency because - -- of different `ghc-options`/`cpp-options` (as a side-benefitt, - -- this enables per-component build parallelism in `cabal - -- new-build`!); We could, however, use cabal-version:2.2's `common` - -- blocks at some point in the future to reduce the duplication. - hs-source-dirs: . - other-modules: - Data.Text - Data.Text.Array - Data.Text.Encoding - Data.Text.Encoding.Error - Data.Text.Foreign - Data.Text.IO - Data.Text.Internal - Data.Text.Internal.Builder - Data.Text.Internal.Builder.Functions - Data.Text.Internal.Builder.Int.Digits - Data.Text.Internal.Builder.RealFloat.Functions - Data.Text.Internal.Encoding.Fusion - Data.Text.Internal.Encoding.Fusion.Common - Data.Text.Internal.Encoding.Utf16 - Data.Text.Internal.Encoding.Utf32 - Data.Text.Internal.Encoding.Utf8 - Data.Text.Internal.Functions - Data.Text.Internal.Fusion - Data.Text.Internal.Fusion.CaseMapping - Data.Text.Internal.Fusion.Common - Data.Text.Internal.Fusion.Size - Data.Text.Internal.Fusion.Types - Data.Text.Internal.IO - Data.Text.Internal.Lazy - Data.Text.Internal.Lazy.Encoding.Fusion - Data.Text.Internal.Lazy.Fusion - Data.Text.Internal.Lazy.Search - Data.Text.Internal.Private - Data.Text.Internal.Read - Data.Text.Internal.Search - Data.Text.Internal.Unsafe - Data.Text.Internal.Unsafe.Char - Data.Text.Internal.Unsafe.Shift - Data.Text.Lazy - Data.Text.Lazy.Builder - Data.Text.Lazy.Builder.Int - Data.Text.Lazy.Builder.RealFloat - Data.Text.Lazy.Encoding - Data.Text.Lazy.IO - Data.Text.Lazy.Internal - Data.Text.Lazy.Read - Data.Text.Read - Data.Text.Unsafe - Data.Text.Show - - build-depends: - HUnit >= 1.2, - QuickCheck >= 2.7 && < 2.11, - array, - base, - binary, - deepseq, - directory, - ghc-prim, - quickcheck-unicode >= 1.0.1.0, - random, - template-haskell, - test-framework >= 0.4, - test-framework-hunit >= 0.2, - test-framework-quickcheck2 >= 0.2 - - if flag(bytestring-builder) - build-depends: bytestring >= 0.9 && < 0.10.4, - bytestring-builder >= 0.10.4 - else - build-depends: bytestring >= 0.10.4 - - if impl(ghc >= 8.11) - build-depends: ghc-bignum - - if impl(ghc < 8.11) - if flag(integer-simple) - cpp-options: -DINTEGER_SIMPLE - build-depends: integer-simple >= 0.1 && < 0.5 - else - cpp-options: -DINTEGER_GMP - build-depends: integer-gmp >= 0.2 - - default-language: Haskell2010 - default-extensions: NondecreasingIndentation - source-repository head type: git location: https://github.com/haskell/text diff --git a/th-tests/th-tests.cabal b/th-tests/th-tests.cabal index 10819451..a65b7c3e 100644 --- a/th-tests/th-tests.cabal +++ b/th-tests/th-tests.cabal @@ -1,12 +1,18 @@ cabal-version: 2.2 name: th-tests version: 0 +synopsis: TH text tests +maintainer: text maintainers description: Tests that use 'Text' functions during compile time. . These are in a separate package because of https://github.com/haskell/cabal/issues/5623 license: BSD-2-Clause license-file: LICENSE +category: Text +tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4, + GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4, + GHC==7.6.3, GHC==7.4.2, GHC==7.2.2, GHC==7.0.4 test-suite th-tests default-language: Haskell2010 @@ -18,7 +24,7 @@ test-suite th-tests Lift build-depends: HUnit >= 1.2, - base, + base <5, template-haskell, text, test-framework >= 0.4, From 21281e7a0284ddcbfeca4b344be1bc88166d0df0 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Thu, 17 Sep 2020 20:45:09 +0300 Subject: [PATCH 04/10] Support for bytestring-0.11 --- .gitignore | 1 + benchmarks/text-benchmarks.cabal | 1 + cabal.tests.project | 5 + scripts/tests.sh | 30 ++++ src/Data/Text/Encoding.hs | 164 +++++++++--------- src/Data/Text/Internal/ByteStringCompat.hs | 55 ++++++ src/Data/Text/Internal/Encoding/Fusion.hs | 3 +- .../Text/Internal/Lazy/Encoding/Fusion.hs | 4 +- tests/text-tests.cabal | 1 + text.cabal | 3 +- 10 files changed, 183 insertions(+), 84 deletions(-) create mode 100644 cabal.tests.project create mode 100644 scripts/tests.sh create mode 100644 src/Data/Text/Internal/ByteStringCompat.hs diff --git a/.gitignore b/.gitignore index 44e8ce9b..f26c6f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ /GNUmakefile /.ghc.environment.* /cabal.project.local +/cabal.test.project.local # Test data repo ignored. Please see instruction in tests-and-benchmarks.markdown /tests/text-test-data/ diff --git a/benchmarks/text-benchmarks.cabal b/benchmarks/text-benchmarks.cabal index 7f606f3c..c31f82b2 100644 --- a/benchmarks/text-benchmarks.cabal +++ b/benchmarks/text-benchmarks.cabal @@ -101,6 +101,7 @@ executable text-benchmarks Data.Text.Internal.Builder.Functions Data.Text.Internal.Builder.Int.Digits Data.Text.Internal.Builder.RealFloat.Functions + Data.Text.Internal.ByteStringCompat Data.Text.Internal.Encoding.Fusion Data.Text.Internal.Encoding.Fusion.Common Data.Text.Internal.Encoding.Utf16 diff --git a/cabal.tests.project b/cabal.tests.project new file mode 100644 index 00000000..f6e61142 --- /dev/null +++ b/cabal.tests.project @@ -0,0 +1,5 @@ +-- this project doesn't have local 'text' package, +-- so tests build faster. + +packages: tests +tests: True diff --git a/scripts/tests.sh b/scripts/tests.sh new file mode 100644 index 00000000..b3d6e87f --- /dev/null +++ b/scripts/tests.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -ex + +runtest() { + HC=$1 + shift + + # EDIT last line to pass arguments + + cabal run text-tests:test:tests \ + --project-file=cabal.tests.project \ + --builddir="dist-newstyle/$HC" \ + --with-compiler="$HC" \ + -- "$@" +} + +runtest ghc-8.10.2 "$@" +runtest ghc-8.8.4 "$@" +runtest ghc-8.6.5 "$@" +runtest ghc-8.4.4 "$@" +runtest ghc-8.2.2 "$@" +runtest ghc-8.0.2 "$@" + +runtest ghc-7.10.3 "$@" +runtest ghc-7.8.4 "$@" +runtest ghc-7.6.3 "$@" +runtest ghc-7.4.2 "$@" +runtest ghc-7.2.2 "$@" +runtest ghc-7.0.4 "$@" diff --git a/src/Data/Text/Encoding.hs b/src/Data/Text/Encoding.hs index d8936796..239e15e3 100644 --- a/src/Data/Text/Encoding.hs +++ b/src/Data/Text/Encoding.hs @@ -96,6 +96,7 @@ import qualified Data.Text.Array as A import qualified Data.Text.Internal.Encoding.Fusion as E import qualified Data.Text.Internal.Encoding.Utf16 as U16 import qualified Data.Text.Internal.Fusion as F +import Data.Text.Internal.ByteStringCompat #include "text_cbits.h" @@ -123,12 +124,13 @@ decodeASCII = decodeUtf8 -- 'decodeLatin1' is semantically equivalent to -- @Data.Text.pack . Data.ByteString.Char8.unpack@ decodeLatin1 :: ByteString -> Text -decodeLatin1 (PS fp off len) = text a 0 len - where - a = A.run (A.new len >>= unsafeIOToST . go) - go dest = withForeignPtr fp $ \ptr -> do - c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len)) - return dest +decodeLatin1 bs = withBS bs aux where + aux fp len = text a 0 len + where + a = A.run (A.new len >>= unsafeIOToST . go) + go dest = withForeignPtr fp $ \ptr -> do + c_decode_latin1 (A.maBA dest) ptr (ptr `plusPtr` len) + return dest -- | Decode a 'ByteString' containing UTF-8 encoded text. -- @@ -139,36 +141,38 @@ decodeLatin1 (PS fp off len) = text a 0 len -- 'error' (/since 1.2.3.1/); For earlier versions of @text@ using -- those unsupported code points would result in undefined behavior. decodeUtf8With :: OnDecodeError -> ByteString -> Text -decodeUtf8With onErr (PS fp off len) = runText $ \done -> do - let go dest = withForeignPtr fp $ \ptr -> - with (0::CSize) $ \destOffPtr -> do - let end = ptr `plusPtr` (off + len) - loop curPtr = do - curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end - if curPtr' == end - then do - n <- peek destOffPtr - unsafeSTToIO (done dest (fromIntegral n)) - else do - x <- peek curPtr' - case onErr desc (Just x) of - Nothing -> loop $ curPtr' `plusPtr` 1 - Just c - | c > '\xFFFF' -> throwUnsupportedReplChar - | otherwise -> do - destOff <- peek destOffPtr - w <- unsafeSTToIO $ - unsafeWrite dest (fromIntegral destOff) - (safe c) - poke destOffPtr (destOff + fromIntegral w) - loop $ curPtr' `plusPtr` 1 - loop (ptr `plusPtr` off) - (unsafeIOToST . go) =<< A.new len +decodeUtf8With onErr bs = withBS bs aux where - desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream" + aux fp len = runText $ \done -> do + let go dest = withForeignPtr fp $ \ptr -> + with (0::CSize) $ \destOffPtr -> do + let end = ptr `plusPtr` len + loop curPtr = do + curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end + if curPtr' == end + then do + n <- peek destOffPtr + unsafeSTToIO (done dest (fromIntegral n)) + else do + x <- peek curPtr' + case onErr desc (Just x) of + Nothing -> loop $ curPtr' `plusPtr` 1 + Just c + | c > '\xFFFF' -> throwUnsupportedReplChar + | otherwise -> do + destOff <- peek destOffPtr + w <- unsafeSTToIO $ + unsafeWrite dest (fromIntegral destOff) + (safe c) + poke destOffPtr (destOff + fromIntegral w) + loop $ curPtr' `plusPtr` 1 + loop ptr + (unsafeIOToST . go) =<< A.new len + where + desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream" - throwUnsupportedReplChar = throwIO $ - ErrorCall "decodeUtf8With: non-BMP replacement characters not supported" + throwUnsupportedReplChar = throwIO $ + ErrorCall "decodeUtf8With: non-BMP replacement characters not supported" -- TODO: The code currently assumes that the transcoded UTF-16 -- stream is at most twice as long (in bytes) as the input UTF-8 -- stream. To justify this assumption one has to assume that the @@ -292,50 +296,50 @@ streamDecodeUtf8With onErr = decodeChunk B.empty 0 0 -- potential surrogate pair started in the last buffer decodeChunk :: ByteString -> CodePoint -> DecoderState -> ByteString -> Decoding - decodeChunk undecoded0 codepoint0 state0 bs@(PS fp off len) = - runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1) - where - decodeChunkToBuffer :: A.MArray s -> IO Decoding - decodeChunkToBuffer dest = withForeignPtr fp $ \ptr -> - with (0::CSize) $ \destOffPtr -> - with codepoint0 $ \codepointPtr -> - with state0 $ \statePtr -> - with nullPtr $ \curPtrPtr -> - let end = ptr `plusPtr` (off + len) - loop curPtr = do - poke curPtrPtr curPtr - curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr - curPtrPtr end codepointPtr statePtr - state <- peek statePtr - case state of - UTF8_REJECT -> do - -- We encountered an encoding error - x <- peek curPtr' - poke statePtr 0 - case onErr desc (Just x) of - Nothing -> loop $ curPtr' `plusPtr` 1 - Just c -> do - destOff <- peek destOffPtr - w <- unsafeSTToIO $ - unsafeWrite dest (fromIntegral destOff) (safe c) - poke destOffPtr (destOff + fromIntegral w) - loop $ curPtr' `plusPtr` 1 - - _ -> do - -- We encountered the end of the buffer while decoding - n <- peek destOffPtr - codepoint <- peek codepointPtr - chunkText <- unsafeSTToIO $ do - arr <- A.unsafeFreeze dest - return $! text arr 0 (fromIntegral n) - lastPtr <- peek curPtrPtr - let left = lastPtr `minusPtr` curPtr - !undecoded = case state of - UTF8_ACCEPT -> B.empty - _ -> B.append undecoded0 (B.drop left bs) - return $ Some chunkText undecoded - (decodeChunk undecoded codepoint state) - in loop (ptr `plusPtr` off) + decodeChunk undecoded0 codepoint0 state0 bs = withBS bs aux where + aux fp len = runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1) + where + decodeChunkToBuffer :: A.MArray s -> IO Decoding + decodeChunkToBuffer dest = withForeignPtr fp $ \ptr -> + with (0::CSize) $ \destOffPtr -> + with codepoint0 $ \codepointPtr -> + with state0 $ \statePtr -> + with nullPtr $ \curPtrPtr -> + let end = ptr `plusPtr` len + loop curPtr = do + poke curPtrPtr curPtr + curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr + curPtrPtr end codepointPtr statePtr + state <- peek statePtr + case state of + UTF8_REJECT -> do + -- We encountered an encoding error + x <- peek curPtr' + poke statePtr 0 + case onErr desc (Just x) of + Nothing -> loop $ curPtr' `plusPtr` 1 + Just c -> do + destOff <- peek destOffPtr + w <- unsafeSTToIO $ + unsafeWrite dest (fromIntegral destOff) (safe c) + poke destOffPtr (destOff + fromIntegral w) + loop $ curPtr' `plusPtr` 1 + + _ -> do + -- We encountered the end of the buffer while decoding + n <- peek destOffPtr + codepoint <- peek codepointPtr + chunkText <- unsafeSTToIO $ do + arr <- A.unsafeFreeze dest + return $! text arr 0 (fromIntegral n) + lastPtr <- peek curPtrPtr + let left = lastPtr `minusPtr` curPtr + !undecoded = case state of + UTF8_ACCEPT -> B.empty + _ -> B.append undecoded0 (B.drop left bs) + return $ Some chunkText undecoded + (decodeChunk undecoded codepoint state) + in loop ptr desc = "Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream" -- | Decode a 'ByteString' containing UTF-8 encoded text that is known @@ -436,12 +440,12 @@ encodeUtf8 (Text arr off len) newDest <- peek destPtr let utf8len = newDest `minusPtr` ptr if utf8len >= len `shiftR` 1 - then return (PS fp 0 utf8len) + then return (mkBS fp utf8len) else do fp' <- mallocByteString utf8len withForeignPtr fp' $ \ptr' -> do memcpy ptr' ptr (fromIntegral utf8len) - return (PS fp' 0 utf8len) + return (mkBS fp' utf8len) -- | Decode text from little endian UTF-16 encoding. decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text diff --git a/src/Data/Text/Internal/ByteStringCompat.hs b/src/Data/Text/Internal/ByteStringCompat.hs new file mode 100644 index 00000000..ee6dc18d --- /dev/null +++ b/src/Data/Text/Internal/ByteStringCompat.hs @@ -0,0 +1,55 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +module Data.Text.Internal.ByteStringCompat (mkBS, withBS) where + +import Data.ByteString.Internal (ByteString (..)) +import Data.Word (Word8) +import Foreign.ForeignPtr (ForeignPtr) + +#if !MIN_VERSION_bytestring(0,11,0) +#if MIN_VERSION_base(4,10,0) +import GHC.ForeignPtr (plusForeignPtr) +#else +import GHC.ForeignPtr (ForeignPtr(ForeignPtr)) +import GHC.Types (Int (..)) +import GHC.Prim (plusAddr#) +#endif +#endif + +mkBS :: ForeignPtr Word8 -> Int -> ByteString +#if MIN_VERSION_bytestring(0,11,0) +mkBS dfp n = BS dfp n +#else +mkBS dfp n = PS dfp 0 n +#endif +{-# INLINE mkBS #-} + +withBS :: ByteString -> (ForeignPtr Word8 -> Int -> r) -> r +#if MIN_VERSION_bytestring(0,11,0) +withBS (BS !sfp !slen) kont = kont sfp slen +#else +withBS (PS !sfp !soff !slen) kont = kont (plusForeignPtr sfp soff) slen +#endif +{-# INLINE withBS #-} + +#if !MIN_VERSION_bytestring(0,11,0) +#if !MIN_VERSION_base(4,10,0) +-- |Advances the given address by the given offset in bytes. +-- +-- The new 'ForeignPtr' shares the finalizer of the original, +-- equivalent from a finalization standpoint to just creating another +-- reference to the original. That is, the finalizer will not be +-- called before the new 'ForeignPtr' is unreachable, nor will it be +-- called an additional time due to this call, and the finalizer will +-- be called with the same address that it would have had this call +-- not happened, *not* the new address. +plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b +plusForeignPtr (ForeignPtr addr guts) (I# offset) = ForeignPtr (plusAddr# addr offset) guts +{-# INLINE [0] plusForeignPtr #-} +{-# RULES +"ByteString plusForeignPtr/0" forall fp . + plusForeignPtr fp 0 = fp + #-} +#endif +#endif diff --git a/src/Data/Text/Internal/Encoding/Fusion.hs b/src/Data/Text/Internal/Encoding/Fusion.hs index 66b3d0bc..41e0926f 100644 --- a/src/Data/Text/Internal/Encoding/Fusion.hs +++ b/src/Data/Text/Internal/Encoding/Fusion.hs @@ -53,6 +53,7 @@ import qualified Data.Text.Internal.Encoding.Utf8 as U8 import qualified Data.Text.Internal.Encoding.Utf16 as U16 import qualified Data.Text.Internal.Encoding.Utf32 as U32 import Data.Text.Unsafe (unsafeDupablePerformIO) +import Data.Text.Internal.ByteStringCompat streamASCII :: ByteString -> Stream Char streamASCII bs = Stream next 0 (maxSize l) @@ -185,7 +186,7 @@ unstream (Stream next s0 len) = unsafeDupablePerformIO $ do withForeignPtr fp' $ \p -> pokeByteOff p off x loop n' (off+1) s fp' {-# NOINLINE trimUp #-} - trimUp fp _ off = return $! PS fp 0 off + trimUp fp _ off = return $! mkBS fp off copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8) copy0 !src !srcLen !destLen = #if defined(ASSERTS) diff --git a/src/Data/Text/Internal/Lazy/Encoding/Fusion.hs b/src/Data/Text/Internal/Lazy/Encoding/Fusion.hs index 7dafc0a2..eff06071 100644 --- a/src/Data/Text/Internal/Lazy/Encoding/Fusion.hs +++ b/src/Data/Text/Internal/Lazy/Encoding/Fusion.hs @@ -52,7 +52,7 @@ import Data.ByteString.Internal (mallocByteString, memcpy) #if defined(ASSERTS) import Control.Exception (assert) #endif -import qualified Data.ByteString.Internal as B +import Data.Text.Internal.ByteStringCompat data S = S0 | S1 {-# UNPACK #-} !Word8 @@ -297,7 +297,7 @@ unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 (upperBound 4 len0) fp' <- copy0 fp n n' withForeignPtr fp' $ \p -> pokeByteOff p off x loop n' (off+1) s fp' - trimUp fp off = B.PS fp 0 off + trimUp fp off = mkBS fp off copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8) copy0 !src !srcLen !destLen = #if defined(ASSERTS) diff --git a/tests/text-tests.cabal b/tests/text-tests.cabal index 7a84c901..e0041154 100644 --- a/tests/text-tests.cabal +++ b/tests/text-tests.cabal @@ -121,6 +121,7 @@ test-suite tests Data.Text.Internal.Builder.Functions Data.Text.Internal.Builder.Int.Digits Data.Text.Internal.Builder.RealFloat.Functions + Data.Text.Internal.ByteStringCompat Data.Text.Internal.Encoding.Fusion Data.Text.Internal.Encoding.Fusion.Common Data.Text.Internal.Encoding.Utf16 diff --git a/text.cabal b/text.cabal index 3991b30f..b7060c2e 100644 --- a/text.cabal +++ b/text.cabal @@ -120,6 +120,7 @@ library Data.Text.Internal.Builder.Functions Data.Text.Internal.Builder.Int.Digits Data.Text.Internal.Builder.RealFloat.Functions + Data.Text.Internal.ByteStringCompat Data.Text.Internal.Encoding.Fusion Data.Text.Internal.Encoding.Fusion.Common Data.Text.Internal.Encoding.Utf16 @@ -168,7 +169,7 @@ library build-depends: bytestring >= 0.9 && < 0.10.4, bytestring-builder >= 0.10.4.0.2 && < 0.11 else - build-depends: bytestring >= 0.10.4 && < 0.11 + build-depends: bytestring >= 0.10.4 && < 0.12 ghc-options: -Wall -fwarn-tabs -funbox-strict-fields -O2 if flag(developer) From 3a6127d3b885b530ca6b360b0260926bc9be2000 Mon Sep 17 00:00:00 2001 From: Falco Peijnenburg Date: Mon, 12 Oct 2020 13:20:07 +0200 Subject: [PATCH 05/10] Add rewrite rule to unfuse (take . drop) The combination of take . drop is commonly used as a substring operation. The original take and drop functions both work by calculating just a new offset and length. When fused, the streaming implementation unnecessarily copies the data. This makes it much slower and more memory consuming. --- src/Data/Text.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 972a8e09..81f97430 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -1224,6 +1224,8 @@ drop n t@(Text arr off len) drop n t = unstream (S.drop n (stream t)) "TEXT drop -> unfused" [1] forall n t. unstream (S.drop n (stream t)) = drop n t +"TEXT take . drop -> unfused" [1] forall len off t. + unstream (S.take len (S.drop off (stream t))) = take len (drop off t) #-} -- | /O(n)/ 'dropEnd' @n@ @t@ returns the prefix remaining after From 8c0a8dad0f60c68826869930ecc68860d383bb39 Mon Sep 17 00:00:00 2001 From: Falco Peijnenburg Date: Sat, 17 Oct 2020 15:37:46 +0200 Subject: [PATCH 06/10] Add regression test for (take . drop) fusion The combination of (take . drop) should not be fused. This test makes sure they are not by comparing the underlying array before and after applying those functions. See #301. --- tests/Tests/Regressions.hs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/Tests/Regressions.hs b/tests/Tests/Regressions.hs index 8ff45b3c..a5610a43 100644 --- a/tests/Tests/Regressions.hs +++ b/tests/Tests/Regressions.hs @@ -14,7 +14,9 @@ import qualified Data.ByteString as B import Data.ByteString.Char8 () import qualified Data.ByteString.Lazy as LB import qualified Data.Text as T +import qualified Data.Text.Array as TA import qualified Data.Text.Encoding as TE +import qualified Data.Text.Internal as T import qualified Data.Text.IO as T import qualified Data.Text.Lazy as LT import qualified Data.Text.Lazy.Encoding as LE @@ -95,6 +97,24 @@ t227 = (T.length $ T.filter isLetter $ T.take (-3) "Hello! How are you doing today?") 0 +-- See GitHub issue #301 +-- This tests whether the "TEXT take . drop -> unfused" rule is applied to the +-- slice function. When the slice function is fused, a new array will be +-- constructed that is shorter than the original array. Without fusion the +-- array remains unmodified. +t301 :: IO () +t301 = do + assertEqual "The length of the array remains the same despite slicing" + (TA.length originalArr) + (TA.length newArr) + + assertEqual "The new array still contains the original value" + (T.Text newArr originalOff originalLen) + original + where + original@(T.Text originalArr originalOff originalLen) = T.pack "1234567890" + T.Text newArr _off _len = T.take 1 $ T.drop 1 original + tests :: F.Test tests = F.testGroup "Regressions" [ F.testCase "hGetContents_crash" hGetContents_crash @@ -105,4 +125,5 @@ tests = F.testGroup "Regressions" , F.testCase "t197" t197 , F.testCase "t221" t221 , F.testCase "t227" t227 + , F.testCase "t301" t301 ] From be54b46175db603aafea3e3f19a6a75e87a29828 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Sun, 18 Oct 2020 02:12:24 +0300 Subject: [PATCH 07/10] Update changelog --- changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog.md b/changelog.md index 3522ba09..70e819dd 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,8 @@ ### 1.2.4.1 * Support `template-haskell-2.17.0.0` +* Support `bytestring-0.11` +* Add `take . drop` related RULE ### 1.2.4.0 From fafa63ad45cb94127fc5fdcbce19fdf8c59a50d0 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Sun, 6 Dec 2020 00:50:16 +0200 Subject: [PATCH 08/10] Run doctest on CI, test only master branch (and PR to it) --- .travis.yml | 40 ++++++++++++++++++-------------- benchmarks/text-benchmarks.cabal | 2 +- cabal.haskell-ci | 3 +++ src/Data/Text.hs | 5 ++++ tests/text-tests.cabal | 2 +- text.cabal | 2 +- th-tests/th-tests.cabal | 2 +- 7 files changed, 35 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index c9f3948a..ca413a3e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ # # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.10.1 +# version: 0.11.20201125 # version: ~> 1.0 language: c @@ -17,6 +17,9 @@ dist: xenial git: # whether to recursively clone submodules submodules: false +branches: + only: + - master cache: directories: - $HOME/.cabal/packages @@ -33,41 +36,41 @@ before_cache: - rm -rfv $CABALHOME/packages/head.hackage jobs: include: - - compiler: ghc-8.10.1 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.10.1","cabal-install-3.2"]}} + - compiler: ghc-8.10.2 + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.10.2","cabal-install-3.4"]}} os: linux - - compiler: ghc-8.8.3 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.3","cabal-install-3.2"]}} + - compiler: ghc-8.8.4 + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.8.4","cabal-install-3.4"]}} os: linux - compiler: ghc-8.6.5 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.6.5","cabal-install-3.4"]}} os: linux - compiler: ghc-8.4.4 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.4.4","cabal-install-3.4"]}} os: linux - compiler: ghc-8.2.2 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.2.2","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.2.2","cabal-install-3.4"]}} os: linux - compiler: ghc-8.0.2 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.0.2","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-8.0.2","cabal-install-3.4"]}} os: linux - compiler: ghc-7.10.3 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.10.3","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.10.3","cabal-install-3.4"]}} os: linux - compiler: ghc-7.8.4 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.8.4","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.8.4","cabal-install-3.4"]}} os: linux - compiler: ghc-7.6.3 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.6.3","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.6.3","cabal-install-3.4"]}} os: linux - compiler: ghc-7.4.2 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.4.2","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.4.2","cabal-install-3.4"]}} os: linux - compiler: ghc-7.2.2 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.2.2","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.2.2","cabal-install-3.4"]}} os: linux - compiler: ghc-7.0.4 - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.0.4","cabal-install-3.2"]}} + addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.0.4","cabal-install-3.4"]}} os: linux before_install: - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//') @@ -113,6 +116,7 @@ install: - cat $CABALHOME/config - rm -fv cabal.project cabal.project.local cabal.project.freeze - travis_retry ${CABAL} v2-update -v + - if [ $HCNUMVER -ge 80000 ] ; then ${CABAL} v2-install $WITHCOMPILER --ignore-project -j2 doctest --constraint='doctest ^>=0.17' ; fi # Generate cabal.project - rm -rf cabal.project cabal.project.local cabal.project.freeze - touch cabal.project @@ -180,7 +184,9 @@ script: # build & run tests, build benchmarks - ${CABAL} v2-build $WITHCOMPILER ${TEST} ${BENCH} all # Testing... - - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all + - ${CABAL} v2-test $WITHCOMPILER ${TEST} ${BENCH} all --test-show-details=direct + # Doctest... + - if [ $HCNUMVER -ge 80000 ] ; then (cd ${PKGDIR_text} && doctest -fobject-code -DINTEGER_GMP -XNondecreasingIndentation src) ; fi # cabal check... - (cd ${PKGDIR_text} && ${CABAL} -vnormal check) - (cd ${PKGDIR_text_tests} && ${CABAL} -vnormal check) @@ -192,5 +198,5 @@ script: - rm -f cabal.project.local - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ("0.10.1",["--config=cabal.haskell-ci","cabal.project"]) +# REGENDATA ("0.11.20201125",["--config=cabal.haskell-ci","cabal.project"]) # EOF diff --git a/benchmarks/text-benchmarks.cabal b/benchmarks/text-benchmarks.cabal index c31f82b2..67378715 100644 --- a/benchmarks/text-benchmarks.cabal +++ b/benchmarks/text-benchmarks.cabal @@ -13,7 +13,7 @@ author: Jasper Van der Jeugt , maintainer: jaspervdj@gmail.com category: Text build-type: Simple -tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4, +tested-with: GHC==8.10.2, GHC==8.8.4, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2, GHC==7.10.3 extra-source-files: diff --git a/cabal.haskell-ci b/cabal.haskell-ci index e8fd8e5d..0a4b84e7 100644 --- a/cabal.haskell-ci +++ b/cabal.haskell-ci @@ -1,5 +1,8 @@ benchmarks: >=7.4 jobs-selection: any +doctest: True +doctest-options: -fobject-code -DINTEGER_GMP +branches: master installed: +all -text -parsec install-dependencies: False diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 81f97430..ea0eb0b4 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -259,6 +259,11 @@ import qualified Language.Haskell.TH.Syntax as TH import Text.Printf (PrintfArg, formatArg, formatString) #endif +-- $setup +-- >>> import Data.Text +-- >>> import qualified Data.Text as T +-- >>> :seti -XOverloadedStrings + -- $character_definition -- -- This package uses the term /character/ to denote Unicode /code points/. diff --git a/tests/text-tests.cabal b/tests/text-tests.cabal index e0041154..1c04cc7d 100644 --- a/tests/text-tests.cabal +++ b/tests/text-tests.cabal @@ -56,7 +56,7 @@ maintainer: Bryan O'Sullivan , Herbert Valerio Riedel , Herbert Valerio Riedel Date: Sat, 5 Dec 2020 20:55:38 +0100 Subject: [PATCH 09/10] Add examples to 'span' and 'break' --- src/Data/Text.hs | 6 ++++++ src/Data/Text/Lazy.hs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index ea0eb0b4..0734cbaf 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -1359,6 +1359,9 @@ splitAt n t@(Text arr off len) -- a pair whose first element is the longest prefix (possibly empty) -- of @t@ of elements that satisfy @p@, and whose second is the -- remainder of the list. +-- +-- >>> T.span (=='0') "000AB" +-- ("000","AB") span :: (Char -> Bool) -> Text -> (Text, Text) span p t = case span_ p t of (# hd,tl #) -> (hd,tl) @@ -1366,6 +1369,9 @@ span p t = case span_ p t of -- | /O(n)/ 'break' is like 'span', but the prefix returned is -- over elements that fail the predicate @p@. +-- +-- >>> T.break (=='c') "180cm" +-- ("180","cm") break :: (Char -> Bool) -> Text -> (Text, Text) break p = span (not . p) {-# INLINE break #-} diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index e1aa1309..c974c979 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -307,6 +307,11 @@ import Text.Printf (PrintfArg, formatArg, formatString) -- measure. For details, see -- .) +-- $setup +-- >>> import Data.Text +-- >>> import qualified Data.Text as T +-- >>> :seti -XOverloadedStrings + equal :: Text -> Text -> Bool equal Empty Empty = True equal Empty _ = False @@ -1385,6 +1390,9 @@ breakOnAll pat src -- | /O(n)/ 'break' is like 'span', but the prefix returned is over -- elements that fail the predicate @p@. +-- +-- >>> T.break (=='c') "180cm" +-- ("180","cm") break :: (Char -> Bool) -> Text -> (Text, Text) break p t0 = break' t0 where break' Empty = (empty, empty) @@ -1400,6 +1408,9 @@ break p t0 = break' t0 -- a pair whose first element is the longest prefix (possibly empty) -- of @t@ of elements that satisfy @p@, and whose second is the -- remainder of the list. +-- +-- >>> T.span (=='0') "000AB" +-- ("000","AB") span :: (Char -> Bool) -> Text -> (Text, Text) span p = break (not . p) {-# INLINE span #-} From fe7ba1dc0aa40ffec0daf78158069266af4e8e27 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 30 Jan 2021 22:33:07 +0000 Subject: [PATCH 10/10] Use base-provided unsafe shifts This makes the 9.2 prim changes less invasive. If we don't care about de-optimizing ancient GHCs we can remove this module altogether. --- src/Data/Text/Internal/Unsafe/Shift.hs | 30 +++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/Data/Text/Internal/Unsafe/Shift.hs b/src/Data/Text/Internal/Unsafe/Shift.hs index b2fef9b6..21246db4 100644 --- a/src/Data/Text/Internal/Unsafe/Shift.hs +++ b/src/Data/Text/Internal/Unsafe/Shift.hs @@ -1,3 +1,8 @@ +{-# LANGUAGE CPP #-} +#if MIN_VERSION_base(4,5,0) +-- base-4.5.0 is 7.4, default sigs introduced in 7.2 +{-# LANGUAGE DefaultSignatures #-} +#endif {-# LANGUAGE MagicHash #-} -- | @@ -20,9 +25,13 @@ module Data.Text.Internal.Unsafe.Shift UnsafeShift(..) ) where --- import qualified Data.Bits as Bits +#if MIN_VERSION_base(4,5,0) +import qualified Data.Bits as Bits +import Data.Word +#else import GHC.Base import GHC.Word +#endif -- | This is a workaround for poor optimisation in GHC 6.8.2. It -- fails to notice constant-width shifts, and adds a test and branch @@ -32,35 +41,54 @@ import GHC.Word -- greater than the size in bits of a machine Int#. class UnsafeShift a where shiftL :: a -> Int -> a +#if MIN_VERSION_base(4,5,0) + {-# INLINE shiftL #-} + default shiftL :: Bits.Bits a => a -> Int -> a + shiftL = Bits.unsafeShiftL +#endif + shiftR :: a -> Int -> a +#if MIN_VERSION_base(4,5,0) + {-# INLINE shiftR #-} + default shiftR :: Bits.Bits a => a -> Int -> a + shiftR = Bits.unsafeShiftR +#endif instance UnsafeShift Word16 where +#if !MIN_VERSION_base(4,5,0) {-# INLINE shiftL #-} shiftL (W16# x#) (I# i#) = W16# (narrow16Word# (x# `uncheckedShiftL#` i#)) {-# INLINE shiftR #-} shiftR (W16# x#) (I# i#) = W16# (x# `uncheckedShiftRL#` i#) +#endif instance UnsafeShift Word32 where +#if !MIN_VERSION_base(4,5,0) {-# INLINE shiftL #-} shiftL (W32# x#) (I# i#) = W32# (narrow32Word# (x# `uncheckedShiftL#` i#)) {-# INLINE shiftR #-} shiftR (W32# x#) (I# i#) = W32# (x# `uncheckedShiftRL#` i#) +#endif instance UnsafeShift Word64 where +#if !MIN_VERSION_base(4,5,0) {-# INLINE shiftL #-} shiftL (W64# x#) (I# i#) = W64# (x# `uncheckedShiftL64#` i#) {-# INLINE shiftR #-} shiftR (W64# x#) (I# i#) = W64# (x# `uncheckedShiftRL64#` i#) +#endif instance UnsafeShift Int where +#if !MIN_VERSION_base(4,5,0) {-# INLINE shiftL #-} shiftL (I# x#) (I# i#) = I# (x# `iShiftL#` i#) {-# INLINE shiftR #-} shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) +#endif {- instance UnsafeShift Integer where