From adfa23bdb9a61063a33ed9b6836b1d0e57a792dc Mon Sep 17 00:00:00 2001 From: Xun Li Date: Wed, 25 Oct 2023 23:25:57 -0700 Subject: [PATCH] init Signed-off-by: Xun Li --- modules/arrow/src/index.ts | 10 + modules/arrow/src/schema/geoarrow-utils.ts | 522 +++++++++++++++++++++ modules/arrow/test/data/line.arrow | Bin 0 -> 3146 bytes modules/arrow/test/data/multiline.arrow | Bin 0 -> 3370 bytes modules/arrow/test/data/multipoint.arrow | Bin 0 -> 3146 bytes modules/arrow/test/data/multipolygon.arrow | Bin 0 -> 3602 bytes modules/arrow/test/data/point.arrow | Bin 0 -> 2954 bytes modules/arrow/test/data/polygon.arrow | Bin 0 -> 3306 bytes modules/arrow/test/geoarrow-utils.spec.ts | 231 +++++++++ 9 files changed, 763 insertions(+) create mode 100644 modules/arrow/src/schema/geoarrow-utils.ts create mode 100644 modules/arrow/test/data/line.arrow create mode 100644 modules/arrow/test/data/multiline.arrow create mode 100644 modules/arrow/test/data/multipoint.arrow create mode 100644 modules/arrow/test/data/multipolygon.arrow create mode 100644 modules/arrow/test/data/point.arrow create mode 100644 modules/arrow/test/data/polygon.arrow create mode 100644 modules/arrow/test/geoarrow-utils.spec.ts diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index b9d95915df..6e25056256 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -44,3 +44,13 @@ export const ArrowLoader: LoaderWithParser< parseSync, parseInBatches: parseArrowInBatches }; + +// Arrow Utils +export { + GEOARROW_ENCODINGS, + parseGeometryFromArrow, + getGeometryColumnsFromArrowTable, + getBinaryGeometriesFromArrow, + BinaryDataFromGeoArrow, + getGeoArrowEncoding +} from './schema/geoarrow-utils'; \ No newline at end of file diff --git a/modules/arrow/src/schema/geoarrow-utils.ts b/modules/arrow/src/schema/geoarrow-utils.ts new file mode 100644 index 0000000000..8e774cd238 --- /dev/null +++ b/modules/arrow/src/schema/geoarrow-utils.ts @@ -0,0 +1,522 @@ +// loaders.gl, MIT license +// Copyright (c) vis.gl contributors + +import {Data, Vector, Schema, Table} from 'apache-arrow'; +import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; +import { + Feature, + MultiPolygon, + Position, + Polygon, + MultiPoint, + Point, + MultiLineString, + LineString +} from 'geojson'; + +type RawArrowFeature = { + encoding?: string; + data: any; +}; + +export enum GEOARROW_ENCODINGS { + MULTI_POLYGON = 'geoarrow.multipolygon', + POLYGON = 'geoarrow.polygon', + MULTI_LINESTRING = 'geoarrow.multilinestring', + LINESTRING = 'geoarrow.linestring', + MULTI_POINT = 'geoarrow.multipoint', + POINT = 'geoarrow.point', + WKB = 'geoarrow.wkb', + WKT = 'geoarrow.wkt' +} + +const GEOARROW_COLUMN_METADATA_KEY = 'ARROW:extension:name'; + + +// get geometry columns from arrow table +export function getGeometryColumnsFromArrowTable(arrowTable: Table): Map { + const geometryColumns = new Map(); + const schema = arrowTable.schema; + for (let i = 0; i < schema.fields.length; i++) { + const field = schema.fields[i]; + const metadata = field.metadata; + if (metadata && metadata.has(GEOARROW_COLUMN_METADATA_KEY)) { + const geoEncoding = metadata.get(GEOARROW_COLUMN_METADATA_KEY); + geometryColumns.set(field.name, {encoding: geoEncoding}); + } + } + return geometryColumns; +} + +/** + * get geoarrow encoding from geoarrow column + */ +export function getGeoArrowEncoding( + schema: Schema, + geometryColumnName: string +): string { + const field = schema.fields.find(field => field.name === geometryColumnName); + return field?.metadata?.get(GEOARROW_COLUMN_METADATA_KEY) || ''; +} + +/** + * update bounds from geoarrow samples + * + * @param flatCoords the flattend coordinates array from one chunk of geoarrow column + * @param nDim the number of dimensions of the coordinates + * @param bounds the bounds to be updated + * @param sampleSize how many samples to be used to update the bounds, default is 1000 per chunk + */ +function updateBoundsFromGeoArrowSamples( + flatCoords: Float64Array, + nDim: number, + bounds: [number, number, number, number], + sampleSize: number = 100 +): void { + const numberOfFeatures = flatCoords.length / nDim; + const sampleStep = Math.max(Math.floor(numberOfFeatures / sampleSize), 1); + + for (let i = 0; i < numberOfFeatures; i += sampleStep) { + const lng = flatCoords[i * nDim]; + const lat = flatCoords[i * nDim + 1]; + if (lng < bounds[0]) { + bounds[0] = lng; + } + if (lat < bounds[1]) { + bounds[1] = lat; + } + if (lng > bounds[2]) { + bounds[2] = lng; + } + if (lat > bounds[3]) { + bounds[3] = lat; + } + } +} + +// binary geometry template, see deck.gl BinaryGeometry +const BINARY_GEOMETRY_TEMPLATE = { + globalFeatureIds: {value: new Uint32Array(0), size: 1}, + positions: {value: new Float32Array(0), size: 2}, + properties: [], + numericProps: {}, + featureIds: {value: new Uint32Array(0), size: 1} +}; + +type BinaryGeometryContent = { + featureIds: Uint32Array; + flatCoordinateArray: Float64Array; + nDim: number; + geomOffset: Int32Array; + geometryIndicies: Uint16Array; +}; + +/** + * get binary polygons from geoarrow polygon column + * @param chunk one chunk of geoarrow polygon column + * @param geoEncoding the geo encoding of the geoarrow polygon column + * @returns BinaryGeometryContent + */ +function getBinaryPolygonsFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiPolygon = geoEncoding === GEOARROW_ENCODINGS.MULTI_POLYGON; + + const polygonData = isMultiPolygon ? chunk.children[0] : chunk; + const ringData = polygonData.children[0]; + const pointData = ringData.children[0]; + const coordData = pointData.children[0]; + const nDim = pointData.stride; + const geomOffset = ringData.valueOffsets; + const flatCoordinateArray = coordData.values; + + const geometryIndicies = new Uint16Array(chunk.length + 1); + for (let i = 0; i < chunk.length; i++) { + geometryIndicies[i] = geomOffset[chunk.valueOffsets[i]]; + } + geometryIndicies[chunk.length] = flatCoordinateArray.length / nDim; + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length - 1; i++) { + const startIdx = geomOffset[chunk.valueOffsets[i]]; + const endIdx = geomOffset[chunk.valueOffsets[i + 1]]; + for (let j = startIdx; j < endIdx; j++) { + featureIds[j] = i; + } + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} + +/** + * get binary lines from geoarrow line column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding the geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryLinesFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiLineString = geoEncoding === GEOARROW_ENCODINGS.MULTI_LINESTRING; + + const lineData = (isMultiLineString ? chunk.children[0] : chunk); + const pointData = lineData.children[0]; + const coordData = pointData.children[0]; + + const nDim = pointData.stride; + const geomOffset = lineData.valueOffsets; + const flatCoordinateArray = coordData.values; + + // geometryIndicies is not needed for line string + const geometryIndicies = new Uint16Array(0); + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length; i++) { + const startIdx = geomOffset[i]; + const endIdx = geomOffset[i + 1]; + for (let j = startIdx; j < endIdx; j++) { + featureIds[j] = i; + } + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} + +/** + * get binary points from geoarrow point column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryPointsFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + const isMultiPoint = geoEncoding === GEOARROW_ENCODINGS.MULTI_POINT; + + const pointData = (isMultiPoint ? chunk.children[0] : chunk); + const coordData = pointData.children[0]; + + const nDim = pointData.stride; + const flatCoordinateArray = coordData.values; + + // geometryIndices is not needed for point + const geometryIndicies = new Uint16Array(0); + // geomOffset is not needed for point + const geomOffset = new Int32Array(0); + + const numOfVertices = flatCoordinateArray.length / nDim; + const featureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < chunk.length; i++) { + featureIds[i] = i; + } + + return { + featureIds, + flatCoordinateArray, + nDim, + geomOffset, + geometryIndicies + }; +} + +/** + * get binary geometries from geoarrow column + * @param chunk one chunk/batch of geoarrow column + * @param geoEncoding geo encoding of the geoarrow column + * @returns BinaryGeometryContent + */ +function getBinaryGeometriesFromChunk(chunk: Data, geoEncoding: string): BinaryGeometryContent { + if (geoEncoding === GEOARROW_ENCODINGS.POINT || geoEncoding === GEOARROW_ENCODINGS.MULTI_POINT) { + return getBinaryPointsFromChunk(chunk, geoEncoding); + } else if ( + geoEncoding === GEOARROW_ENCODINGS.LINESTRING || + geoEncoding === GEOARROW_ENCODINGS.MULTI_LINESTRING + ) { + return getBinaryLinesFromChunk(chunk, geoEncoding); + } else if ( + geoEncoding === GEOARROW_ENCODINGS.POLYGON || + geoEncoding === GEOARROW_ENCODINGS.MULTI_POLYGON + ) { + return getBinaryPolygonsFromChunk(chunk, geoEncoding); + } + throw Error('invalid geoarrow encoding'); +} + +/** + * Binary data from geoarrow column and can be used by e.g. deck.gl GeojsonLayer + */ +export type BinaryDataFromGeoArrow = { + binaryGeometries: BinaryFeatures[]; + bounds: [number, number, number, number]; + featureTypes: { polygon: boolean; point: boolean; line: boolean }; +}; + +/** + * get binary geometries from geoarrow column + * + * @param geoColumn the geoarrow column, e.g. arrowTable.getChildAt(geoColumnIndex) + * @param geoEncoding the geo encoding of the geoarrow column, e.g. getGeoArrowEncoding(arrowTable.schema, geoColumnName) + * @returns BinaryDataFromGeoArrow + */ +export function getBinaryGeometriesFromArrow( + geoColumn: Vector, + geoEncoding: string +): BinaryDataFromGeoArrow { + const featureTypes = { + polygon: + geoEncoding === GEOARROW_ENCODINGS.MULTI_POLYGON || + geoEncoding === GEOARROW_ENCODINGS.POLYGON, + point: + geoEncoding === GEOARROW_ENCODINGS.MULTI_POINT || geoEncoding === GEOARROW_ENCODINGS.POINT, + line: + geoEncoding === GEOARROW_ENCODINGS.MULTI_LINESTRING || + geoEncoding === GEOARROW_ENCODINGS.LINESTRING + }; + + const chunks = geoColumn.data; + const bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity]; + let globalFeatureIdOffset = 0; + const binaryGeometries: BinaryFeatures[] = []; + + for (let c = 0; c < chunks.length; c++) { + const geometries = chunks[c]; + const {featureIds, flatCoordinateArray, nDim, geomOffset} = getBinaryGeometriesFromChunk( + geometries, + geoEncoding + ); + + const numOfVertices = flatCoordinateArray.length / nDim; + const globalFeatureIds = new Uint32Array(numOfVertices); + for (let i = 0; i < numOfVertices; i++) { + globalFeatureIds[i] = featureIds[i] + globalFeatureIdOffset; + } + + const binaryContent = { + globalFeatureIds: {value: globalFeatureIds, size: 1}, + positions: { + value: flatCoordinateArray, + size: nDim + }, + featureIds: {value: featureIds, size: 1}, + properties: [...Array(geometries.length).keys()].map(i => ({ + index: i + globalFeatureIdOffset + })) + }; + + // TODO: check if chunks are sequentially accessed + globalFeatureIdOffset += geometries.length; + + // NOTE: deck.gl defines the BinaryFeatures structure must have points, lines, polygons even if they are empty + binaryGeometries.push({ + shape: 'binary-feature-collection', + points: { + type: 'Point', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.point ? binaryContent : {}) + }, + lines: { + type: 'LineString', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.line ? binaryContent : {}), + pathIndices: {value: featureTypes.line ? geomOffset : new Uint16Array(0), size: 1} + }, + polygons: { + type: 'Polygon', + ...BINARY_GEOMETRY_TEMPLATE, + ...(featureTypes.polygon ? binaryContent : {}), + polygonIndices: { + // TODO why deck.gl's tessellatePolygon performance is not good when using geometryIndicies + // even when there is no hole in any polygon + value: featureTypes.polygon ? geomOffset : new Uint16Array(0), + size: 1 + }, + primitivePolygonIndices: { + value: featureTypes.polygon ? geomOffset : new Uint16Array(0), + size: 1 + } + } + }); + + updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds); + } + + return {binaryGeometries, bounds, featureTypes}; +} + +/** + * parse geometry from arrow data that is returned from processArrowData() + * NOTE: this function could be duplicated with the binaryToFeature() in deck.gl, + * it is currently only used for picking because currently deck.gl returns only the index of the feature + * So the following functions could be deprecated once deck.gl returns the feature directly for binary geojson layer + * + * @param rawData the raw geometry data returned from processArrowData, which is an object with two properties: encoding and data + * @see processArrowData + * @returns Feature or null + */ +export function parseGeometryFromArrow(rawData: RawArrowFeature): Feature | null { + const encoding = rawData.encoding?.toLowerCase(); + const data = rawData.data; + if (!encoding || !data) return null; + + let geometry; + + switch (encoding) { + case GEOARROW_ENCODINGS.MULTI_POLYGON: + geometry = arrowMultiPolygonToFeature(data); + break; + case GEOARROW_ENCODINGS.POLYGON: + geometry = arrowPolygonToFeature(data); + break; + case GEOARROW_ENCODINGS.MULTI_POINT: + geometry = arrowMultiPointToFeature(data); + break; + case GEOARROW_ENCODINGS.POINT: + geometry = arrowPointToFeature(data); + break; + case GEOARROW_ENCODINGS.MULTI_LINESTRING: + geometry = arrowMultiLineStringToFeature(data); + break; + case GEOARROW_ENCODINGS.LINESTRING: + geometry = arrowLineStringToFeature(data); + break; + default: { + throw Error('GeoArrow encoding not supported'); + } + } + return { + type: 'Feature', + geometry, + properties: {} + }; +} + +/** + * convert Arrow MultiPolygon to geojson Feature + */ +function arrowMultiPolygonToFeature(arrowMultiPolygon: Vector): MultiPolygon { + const multiPolygon: Position[][][] = []; + for (let m = 0; m < arrowMultiPolygon.length; m++) { + const arrowPolygon = arrowMultiPolygon.get(m); + const polygon: Position[][] = []; + for (let i = 0; arrowPolygon && i < arrowPolygon?.length; i++) { + const arrowRing = arrowPolygon?.get(i); + const ring: Position[] = []; + for (let j = 0; arrowRing && j < arrowRing.length; j++) { + const arrowCoord = arrowRing.get(j); + const coord: Position = Array.from(arrowCoord); + ring.push(coord); + } + polygon.push(ring); + } + multiPolygon.push(polygon); + } + const geometry: MultiPolygon = { + type: 'MultiPolygon', + coordinates: multiPolygon + }; + return geometry; +} + +/** + * convert Arrow Polygon to geojson Feature + */ +function arrowPolygonToFeature(arrowPolygon: Vector): Polygon { + const polygon: Position[][] = []; + for (let i = 0; arrowPolygon && i < arrowPolygon.length; i++) { + const arrowRing = arrowPolygon.get(i); + const ring: Position[] = []; + for (let j = 0; arrowRing && j < arrowRing.length; j++) { + const arrowCoord = arrowRing.get(j); + const coords: Position = Array.from(arrowCoord); + ring.push(coords); + } + polygon.push(ring); + } + const geometry: Polygon = { + type: 'Polygon', + coordinates: polygon + }; + return geometry; +} + +/** + * convert Arrow MultiPoint to geojson MultiPoint + */ +function arrowMultiPointToFeature(arrowMultiPoint: Vector): MultiPoint { + const multiPoint: Position[] = []; + for (let i = 0; arrowMultiPoint && i < arrowMultiPoint.length; i++) { + const arrowPoint = arrowMultiPoint.get(i); + if (arrowPoint) { + const coord: Position = Array.from(arrowPoint); + multiPoint.push(coord); + } + } + const geometry: MultiPoint = { + type: 'MultiPoint', + coordinates: multiPoint + }; + return geometry; +} + +/** + * convert Arrow Point to geojson Point + */ +function arrowPointToFeature(arrowPoint: Vector): Point { + const point: Position = Array.from(arrowPoint); + const geometry: Point = { + type: 'Point', + coordinates: point + }; + return geometry; +} + +/** + * convert Arrow MultiLineString to geojson MultiLineString + */ +function arrowMultiLineStringToFeature(arrowMultiLineString: Vector): MultiLineString { + const multiLineString: Position[][] = []; + for (let i = 0; arrowMultiLineString && i < arrowMultiLineString.length; i++) { + const arrowLineString = arrowMultiLineString.get(i); + const lineString: Position[] = []; + for (let j = 0; arrowLineString && j < arrowLineString.length; j++) { + const arrowCoord = arrowLineString.get(j); + if (arrowCoord) { + const coords: Position = Array.from(arrowCoord); + lineString.push(coords); + } + } + multiLineString.push(lineString); + } + const geometry: MultiLineString = { + type: 'MultiLineString', + coordinates: multiLineString + }; + return geometry; +} + +/** + * convert Arrow LineString to geojson LineString + */ +function arrowLineStringToFeature(arrowLineString: Vector): LineString { + const lineString: Position[] = []; + for (let i = 0; arrowLineString && i < arrowLineString.length; i++) { + const arrowCoord = arrowLineString.get(i); + if (arrowCoord) { + const coords: Position = Array.from(arrowCoord); + lineString.push(coords); + } + } + const geometry: LineString = { + type: 'LineString', + coordinates: lineString + }; + return geometry; +} diff --git a/modules/arrow/test/data/line.arrow b/modules/arrow/test/data/line.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2db7920191fba9c86d6d3b9d9428184ba51b2c2a GIT binary patch literal 3146 zcmeHJUrQT75Z`FDL4uW5iVr><_hjJ+xx4%^c`-r58Hgq%5rr7+nOl;BwKcc*DeTc6tu%|BRx~ot|siPCrmKJyeUDJg^Dsv?Ds|(q z+3H-%=SEA~&s@sNQMr9~LV)WHEy>`n#eKUcwWedAk0iAqlq$7awb80qkLG9Q^!&c6 z=M}B6pVG22at>!aXT`Oo=wNS|DIt(1PI(#^^Z^o`|nL|bED&>nREe87&L_IWp+rPEA zv{y6z_psx*oyT+~j{VRwMe<_HViPNkmH`_XUCVX5 zNKYRNDn@C(@~1Z(*{UIdtssF94-crLRWOu9+eTq+gLi;2_6BVNRr!ih4Q{b*AVjew z$$fAFyh{jp5RM~<7z&)`CSZYTnDiwu)$m&|bQjACUKlIW87cCK&mAh`qGOIYU}oXW zd;r7p7U@ag=sYN4-$scsM4fWbY6U-4IEU z54IW?c`6AheKs%-_L=UG&tURNxrs>JEzw}1jrJgn<4GEgqc&I%LnHwOweqAM)=p3x zgi6mM(fi$a9w*!Lanr=VjcGsUDOyj2&Ce(Mls8<%f)?k7Fq+4Mp#OM4`JjG~k4>zT zY9uN0y(yI9Yyv|TxCAEuJQ!sb;AGv literal 0 HcmV?d00001 diff --git a/modules/arrow/test/data/multiline.arrow b/modules/arrow/test/data/multiline.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2d5728a4653fc05e0f2880a3b8149fc0c1ec1c9e GIT binary patch literal 3370 zcmeHJ&1(}u6rb8yLkT5_hzAd0@lat2*-iS9c(9=jX&`MwQmIfwjoBex*zAVQRvT); zg9raKj~+bwM|c(_{(d_6+HUV1SVBwzt&|X1L0A+^VnN&% zY0Lm~sSADvWGdl1w>XD5<_XdIKAmnSyeAmXjB)>14tqzhWjJ;N->4Xcyp=B)rZEcL zVK1B<^#ku@NWnk`Lm7n=$P{mERL(BBm`lGOIIcfX%DpfQ-sFcTUgUbNFUL_x5f+D7 zXw++a_04wY@P%%GZ{y6n6t{UQmxr+v^sl@ zt=W~U#kF;-xMter^}M~ZX4}P;)uP#~?bi3VpY1mGq07)EoLJp%*KD;LJ2kqhhf?}) z-2O2n1+%(aZ`KlhM-Ct(J72KYN-IUXY_C}5a;aE^fz8gL^gMSo4qT__nReH#K5sA? zY?6xF>qTyK;>g@`vll&v9X|-8V{@;)Rcjww(En#&zgQ0Zfi@lZG%b7MD6z~){@HT8 ziM3Xz4jZMSz1r=fJOdo`LtAY-8(W6E`eQ9Muy)r0 z<0d3AC}M`ze3gqpBnnh-U8u(~k<$9POHn-2w2q~4MSdN?Vy#XJY*B`hyv0X(+`wQ&vWlmquZfxk6qjGX`Y8UvQeoU zN6l8}O1>~!(qZOGR*uW<^HTy`Z)iydcP;MQJ*hRD_6NxjFGe%M{78EsIU8G+G91WOOap z?IJyWET|Zz`P!e2IkI(132cZ2K0G|2j@H3Y5^WoWwF}+@#@H*g2~_0^Mm4y@c7YJZ zk|g)R3GhB4;6XT!9AYSNnwx+Hs$tTXz*NJp!O&eUD|lh7OlPFXr#^S6jEjyr;((cj zGxHt{%R8hefur-Fgnb(&#t?M^l1*SZ79w`?9jzodb8MYjil1x{k?wRxJ(|BM<`eok z@VT+9PxGSK;;=$v+NZ@`3w*`amgkZ$+Y0t3fygL6P)x#y2rlO**jQR8QIfquJaj`O zMLyViT;!=Fr1aUsIM`>pLq37YC*>w0akoT+r8e4wFpfuQG>+O}Jq(cq6x7P2dRRL_ zZ4fFwiA3*r<3*foFUCz1|2C%mT%>3{5w<8N`;<2=6h6tmz6+qJWu;K1MERh8kdJMw zlWHU>^1UsT;%o{-7Ptf^{}eN|U%t)f`yuuc?fvRH{{ODyn;Um37d!YJE5usBUAl$o wY(=*`WIs=Mlu}BIGRJJWmCI$c_RfagF!g-f{`}vM^X4bKfdr|ZxZ_R z5xiq$gi&_0anyX?>7Kr}Up2edUir1{?AK3T9#9ZQ{?Ka5a42JMV0GuySdOh?RT(wf z?bdPksI{LSx#RBc72RE@RNc#$D!ZkUTiJ1)cH^*l^5W%T>jbupTVj%8uh(;qJFSBT zr)ox02JgMWIWz^Qe%Nd`68oVXK}T`EP~0t7+)}kvDORgx*M)=U-BaoN-eekj!@lp7 zdQSbP78ipZ(oy^U*o!ZQ()zyLkAHxlAdKR3=cu#a=$sZ|fA6`LJcq%^xDErlmi=j* zcxEK`_N+FsaolafN7*gy^m<6o2n(v2M4!d;iA1&zn8M>Eg;!TssHJrv%vAp?ifaOV z3YidJqtBu;9|_jsws;H)VJ%1f444IeO9{|VcrztfCFj^IJg|;w{~eHZ`~(Qw<u^PMGj{t3uGL$fH`I7af75e5ylv09&6+cqk34gb z&WN6C?ZNG_jLRG9ea|8?#s^rl&=4@?;uj>1>m=mFH;9KfP)_AqtY=gWd{SCAo8Wa0 zc}M*P5`DFc%iBTD<{-mot4j3_X^Xk%z9Xm{H$<;(EL zk6LzDzdRz5Jo|E=P2_j=Vrd-fk-l#7y#<=*B&TX#*H%XSj0K(p&AZ1nv$lT!bFr`> zC;b)r%hi+je|+-Zz71z~?G!IPUwg;Z*wgPE?-=tYf>dYrLvk_k-6MQbZMxZ1_u=Xf kw)%tF|NfI){lRc-c#KwmFnxdE8M^Zi_8Ff?^}j9t16ehxTL1t6 literal 0 HcmV?d00001 diff --git a/modules/arrow/test/data/point.arrow b/modules/arrow/test/data/point.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0b1835023b23a8a0a838b73c2faf5c9ed3fe1f42 GIT binary patch literal 2954 zcmeHJ&1%~~5MJl6#+VRFDLM2YM4>4NV@Y-#n?f*&Pz+9N?1U5?kt){W2yDs7N@J7e zP5M|p^Z|P4G3P=_zgbDK>k^t{p&m2Zou8eVotfQrwbg2zs*K$OzAiJi!3Z*JovpD| zmc|T_T6y4SL9WF7<`T^z7CgqBpZE8LnD++bjXdgJ@_~K+kq0B!_vDJ4&#Ae*tjNQ_ z9oWI-yzBR`1|;Zlf55|F0$Icx$(5Ta7jy1)eaH2Bk+K~G{&jBXyIx330;tQT(J=Q~ z?arBeYPO}F!kMh>SC3B)iQxx*M>4tZ@X+l_?a3(Q14%6lC8J)iHQSBa{_M(ay|kn1 zB}FUmS? ztnct1WW>(p)l#9TYh|sdmdgcQhk?CzhxhyLaOAs=-B&bAslKn#W{{dF>b@Pi;g!Rs zt-2k)g&ofi!b_#m+BaGq75Y#1^@nBO>m{ark4$rW6vmb*k|$dhNi>>m6E+IEwryD` zPY)ZaBo#gnCqs^EHAU@iZ*LjejzKEM*J!41pf`bxJ;#_vCq84;cN^>lI0UH;l83-F z@D*`@pJF=k1XA#6Z5kG+Zxem$+jSsxXIhED17lS>6BRxVxkqhV@$3O-h$0OOYmlS0 zMJ!}1i7YxdY?(%ihwKxO?s!5oTE8aN6ZZ?w8=_mC*Qa%nKe9o;#Md+nk+0O!{95wm zI?-O!h>YR`q$GUkd~YI2v_FC@dx==+gl9!=+473W`Bo&xHv>A#8QlS&f#i?!oIq?p zA%7c_{m<(pbTUSJVTdGxsP0o!kI~6G(j;HAaS`utH05d*W@(CrYM?zR-0eowyIsm7 z&8Km#V@|crik_gJqKL*Y_yPD1n8Zip-4E0u=4CM6EUv=;>MFdr@W*ncgI7?QL5r?I yabA?x&-xU0sGOO-@6f{CJr6pSR+9dto=W!*GN8pxm1 zKd8t40X_CyDE9Yub`om{r04kH?acd__c8Cy&XgLB+L_MS9FU7Lw!;Xv*f!f_8!Uzy zAQHXj$3aFzert{95C=TQF8{zr$oqitMxFEqeAK=8!o7*@I%+{pCiFy7)zqaC?sa194rhHB#v%_3RI^+&4;sz( zxq4<|-q2D^?poZpdrEUQ@%c#6^MO*iTCLQZwaVe*%KcO}r>C-- zk1g#v<+x0)OpkNt+U^Y?Nou7Nvsw=I zE#8NW*omZ`O=nU@-pJ_rd^(kafrDn74~O=6;@Vbss2Lrt^r=G0U~@#&!>(`pR~A?H zs$Ks*>^QFH542k2u-s_t(0{hCN0wcuFHO4+ndaSzA6lkJo^3hU#B#l9!bUn}>~}gS zPag>tWQ{NU*_fkR^{{%kx3_E^Jp_3$-l3t2z<1y=_8Mah-S~=8@9nTR;1EROB=>Kz=9XXN^~{~eCl(D+PC0zN1S9ioRu_? zVjw;8EzbebG`G=R7?=gUOQE|%G6g7gwQw$UHxXA9c`}Bh6XZ#8%B;n?Wa~6af0zck z(;3ZZ{bsP9xZj1(jb(lEE%S)NAJMd@i#sfQMb?(rQe3tr_A!RYC_X?$!3XZLXwaw5SLDV}1b1AJtF-)$A#txCz1-1hGI%zQ~0k5&@#8 z?$yH{d$6NCh=Q4m=}rumzMcTvoan@FWRGAsmA!N2Owq3HbOCm zUmwL>j;t8U+#l zg { + async function testParseFromArrow(t, arrowFile, expectedGeojson) { + // TODO: use the following code instead of apache-arrow to load arrow table + // const arrowTable = await parse(fetchFile(arrowFile), ArrowLoader, {worker: false}); + const response = await fetch(arrowFile); + const arrayBuffer = await response.arrayBuffer(); + const arrowTable = tableFromIPC(new Uint8Array(arrayBuffer)); + + // check if the arrow table is loaded correctly + t.equal( + arrowTable.numRows, + expectedGeojson.features.length, + `arrow table has ${expectedGeojson.features.length} row` + ); + + const colNames = [...Object.keys(expectedGeojson.features[0].properties), 'geometry']; + t.equal(arrowTable.numCols, colNames.length, `arrow table has ${colNames.length} columns`); + + // check fields exist in arrow table schema + arrowTable.schema.fields.map(field => + t.equal(colNames.includes(field.name), true, `arrow table has ${field.name} column`) + ); + + const geometryColumns = getGeometryColumnsFromArrowTable(arrowTable); + + // check 'geometry' is in geometryColumns (geometryColumns is a Map object) + t.equal(Boolean(geometryColumns['geometry']), true, 'geometryColumns has geometry column'); + + // get encoding from geometryColumns['geometry'] + const encoding = geometryColumns['geometry'].encoding; + + // check encoding is one of GEOARROW_ENCODINGS + t.ok( + Object.values(GEOARROW_ENCODINGS).includes(encoding), + 'encoding is one of GEOARROW_ENCODINGS' + ); + + // get first geometry from arrow geometry column + const firstArrowGeometry = arrowTable.getChild('geometry')?.get(0); + const firstArrowGeometryObject = { + encoding, + data: firstArrowGeometry + }; + + // parse arrow geometry to geojson feature + const firstFeature = parseGeometryFromArrow(firstArrowGeometryObject); + + // check if geometry in firstFeature is equal to the original geometry in expectedPointGeojson + t.deepEqual( + firstFeature?.geometry, + expectedGeojson.features[0].geometry, + 'firstFeature.geometry is equal to expectedGeojson.features[0].geometry' + ); + } + + const testCases = [ + [POINT_ARROW_FILE, expectedPointGeojson], + [MULTIPOINT_ARROW_FILE, expectedMultiPointGeoJson], + [LINE_ARROW_FILE, expectedLineStringGeoJson], + [MULTILINE_ARROW_FILE, expectedMultiLineStringGeoJson], + [POLYGON_ARROW_FILE, expectedPolygonGeojson], + [MULTIPOLYGON_ARROW_FILE, expectedMultiPolygonGeojson] + ]; + + testCases.forEach(async testCase => { + await testParseFromArrow(t, testCase[0], testCase[1]); + }); + + t.end(); +});