-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enhacement 2 (read EAN products from HTML file) (#3)
* Add new dependency (intl) & use it with csv file name * Add new gitignored file (*.html) * Add 'assets/files/' directory where to get .txt & save .csv * Add 'html' dependency & start 'web scraping' with local html file * Update '_loadHtmlFromAssets' method to get EAN codes * Move all methods from 'main' file to own files * Some refactoring (e.g. move receipt read -> save as csv logic into own file) * Save parsed html into eanProduct list * Fine-tune ean product name & add quantity * Rename 'product.dart' to 'receipt_product.dart' * Add price to EANProduct & parse it from html * Refactoring: define filePaths in main method * Save eanProducts as csv & some refactoring * Refactor 'product' -> 'receiptProduct' * Move eanProduct csv generating into own file * Refactor html to csv generating into own file
- Loading branch information
Showing
14 changed files
with
265 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,4 +7,5 @@ build/ | |
|
||
# For privacy reasons, do not save cash receipt (txt and final csv) into Git. | ||
*.txt | ||
*.csv | ||
*.csv | ||
*.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,87 +1,9 @@ | ||
import 'dart:io'; | ||
import 'helper.dart'; | ||
import 'product.dart'; | ||
import 'read_html_and_save_as_csv.dart'; | ||
import 'read_receipt_and_save_as_csv.dart'; | ||
|
||
void main(List<String> arguments) { | ||
var lines = readFile('cashReceipt.txt'); | ||
var products = strings2Products(lines ?? []); | ||
readReceiptAndSaveAsCSV('assets/files/_cashReceipt.txt'); | ||
readHtmlAndSaveAsCSV('assets/files/_orderedProducts.html'); | ||
|
||
// Save products into CSV file | ||
saveProducts(products); | ||
} | ||
|
||
/// Read a text file and return as a list of lines. | ||
List<String>? readFile(String fileName) { | ||
File file = File(fileName); | ||
try { | ||
return file.readAsLinesSync(); | ||
} on FileSystemException catch (e) { | ||
print("File not found: $e"); | ||
} | ||
} | ||
|
||
/// Goes through the list of lines and returns a list of products. | ||
List<Product> strings2Products(List<String> lines) { | ||
var helper = Helper(); | ||
List<Product> products = []; | ||
|
||
for (var item in lines) { | ||
item = item.trim(); | ||
item = item.toLowerCase(); | ||
|
||
// Do not handle sum lines (after a row of strokes): | ||
if (item.contains('----------')) { | ||
break; | ||
} | ||
// Refund line: | ||
else if (item.contains('palautus')) { | ||
helper.previousLine = PreviousLine.refund; | ||
} | ||
// When previous line was a refund line, skip next two lines: | ||
else if (helper.previousLine == PreviousLine.refund) { | ||
if (helper.calcLines != 1) { | ||
helper.calcLines++; | ||
} else { | ||
helper.calcLines = 0; | ||
helper.previousLine = PreviousLine.notSet; | ||
} | ||
} | ||
// If a line starts with a digit, it is a quantity and price per unit row: | ||
else if (item.contains(RegExp(r'^\d'))) { | ||
var items = item.split(RegExp(r'\s{6,7}')); | ||
var quantity = items[0].substring(0, 2).trim(); | ||
var pricePerUnit = items[1].substring(0, 5).trim(); | ||
|
||
var lastProduct = products.last; | ||
lastProduct.quantity = int.parse(quantity); | ||
lastProduct.pricePerUnit = pricePerUnit; | ||
} | ||
|
||
// A "normal" line: | ||
else { | ||
var items = item.split(RegExp(r'\s{12,33}')); | ||
|
||
var name = items[0]; | ||
var price = items[1]; | ||
|
||
var product = Product(name: name, totalPrice: price); | ||
products.add(product); | ||
} | ||
} | ||
|
||
return products; | ||
} | ||
|
||
void saveProducts(List<Product> products) { | ||
var csv = StringBuffer(); | ||
|
||
csv.write('name;quantity;pricePerUnit;totalPrice\n'); | ||
|
||
for (var product in products) { | ||
csv.write( | ||
'${product.name};${product.quantity};${product.pricePerUnit};${product.totalPrice}\n'); | ||
} | ||
|
||
var file = File('products.csv'); | ||
file.writeAsString(csv.toString()); | ||
print('Done!'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
class EANProduct { | ||
final String ean; | ||
final String name; | ||
final int quantity; | ||
final String price; | ||
|
||
EANProduct( | ||
{required this.ean, | ||
required this.name, | ||
required this.quantity, | ||
required this.price}); | ||
|
||
@override | ||
String toString() { | ||
return 'EANProduct{ean: $ean, name: $name, quantity: $quantity, price: $price}'; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import 'dart:io'; | ||
import 'package:html/parser.dart'; | ||
|
||
import 'ean_product.dart'; | ||
|
||
/// Loads the HTML file from assets and parses it. Then, it returns a list of EANProduct objects. | ||
Future<List<EANProduct>> loadHtmlFromAssets(String filePath) async { | ||
List<EANProduct> eanProducts = []; | ||
|
||
var file = File(filePath); | ||
var html = await file.readAsString(); | ||
var document = parse(html); | ||
|
||
var responseString = document.getElementsByClassName( | ||
'styled-order-page__StyledOrderItemContainer-sc-qzridm-1')[0]; | ||
|
||
var children = responseString.children; | ||
|
||
for (var i = 0; i < responseString.children.length; i++) { | ||
if (i > 0) { | ||
var product = children[i]; | ||
|
||
var eanCode = product.attributes['data-product-id'] ?? ''; | ||
|
||
var productName = product | ||
.children[0].children[0].children[1].children[0].children[0].text | ||
.trim(); | ||
productName = productName.replaceAll('\n', ''); | ||
productName = productName.replaceAll(RegExp(r'\s{30}'), ' '); | ||
|
||
var productQuantity = product.children[0].children[0].children[1] | ||
.children[1].children[1].children[0].children[0].text; | ||
|
||
var productPrice = product.children[0].children[0].children[1].children[1] | ||
.children[1].children[1].text | ||
.trim(); | ||
productPrice = productPrice.replaceAll(' €', ''); | ||
|
||
eanProducts.add( | ||
EANProduct( | ||
ean: eanCode, | ||
name: productName, | ||
quantity: int.parse(productQuantity), | ||
price: productPrice, | ||
), | ||
); | ||
} | ||
} | ||
return eanProducts; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import 'load_html.dart'; | ||
import 'save_eps_as_csv.dart'; | ||
|
||
void readHtmlAndSaveAsCSV(String filePath) async { | ||
var awaitedEANProductList = await loadHtmlFromAssets(filePath); | ||
|
||
eanProductListToCSV(awaitedEANProductList); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import 'read_receipt_file.dart'; | ||
import 'save_rps_as_csv.dart'; | ||
import 'strings_to_products.dart'; | ||
|
||
void readReceiptAndSaveAsCSV(String filePath) { | ||
var lines = readReceiptFile(filePath); | ||
var products = strings2Products(lines ?? []); | ||
receiptProducts2CSV(products); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import 'dart:io'; | ||
|
||
/// Read a text file and return as a list of lines. | ||
List<String>? readReceiptFile(String filePath) { | ||
File file = File(filePath); | ||
try { | ||
return file.readAsLinesSync(); | ||
} on FileSystemException catch (e) { | ||
print("File not found: $e"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import 'dart:io'; | ||
import 'ean_product.dart'; | ||
import 'helper.dart'; | ||
|
||
void eanProductListToCSV(List<EANProduct> eanProductList) { | ||
var csv = StringBuffer(); | ||
|
||
csv.write('ean;name;quantity;price\n'); | ||
|
||
for (var item in eanProductList) { | ||
csv.write('${item.ean};${item.name};${item.quantity};${item.price}\n'); | ||
} | ||
|
||
var date = formattedDateTime(); | ||
|
||
var file = File('assets/files/ean_products_$date.csv'); | ||
file.writeAsString(csv.toString()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import 'dart:io'; | ||
import 'helper.dart'; | ||
import 'receipt_product.dart'; | ||
|
||
/// Saves receipt products as a CSV file. | ||
void receiptProducts2CSV(List<ReceiptProduct> products) { | ||
var csv = StringBuffer(); | ||
|
||
csv.write('name;quantity;pricePerUnit;totalPrice\n'); | ||
|
||
for (var product in products) { | ||
csv.write( | ||
'${product.name};${product.quantity};${product.pricePerUnit};${product.totalPrice}\n'); | ||
} | ||
|
||
var date = formattedDateTime(); | ||
|
||
var file = File('assets/files/receipt_products_$date.csv'); | ||
file.writeAsString(csv.toString()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import 'helper.dart'; | ||
import 'receipt_product.dart'; | ||
|
||
/// Goes through the list of lines and returns a list of products. | ||
List<ReceiptProduct> strings2Products(List<String> lines) { | ||
var helper = Helper(); | ||
List<ReceiptProduct> products = []; | ||
|
||
for (var item in lines) { | ||
item = item.trim(); | ||
item = item.toLowerCase(); | ||
|
||
// Do not handle sum lines (after a row of strokes): | ||
if (item.contains('----------')) { | ||
break; | ||
} | ||
// Refund line: | ||
else if (item.contains('palautus')) { | ||
helper.previousLine = PreviousLine.refund; | ||
} | ||
// When previous line was a refund line, skip next two lines: | ||
else if (helper.previousLine == PreviousLine.refund) { | ||
if (helper.calcLines != 1) { | ||
helper.calcLines++; | ||
} else { | ||
helper.calcLines = 0; | ||
helper.previousLine = PreviousLine.notSet; | ||
} | ||
} | ||
// If a line starts with a digit, it is a quantity and price per unit row: | ||
else if (item.contains(RegExp(r'^\d'))) { | ||
var items = item.split(RegExp(r'\s{6,7}')); | ||
var quantity = items[0].substring(0, 2).trim(); | ||
var pricePerUnit = items[1].substring(0, 5).trim(); | ||
|
||
var lastProduct = products.last; | ||
lastProduct.quantity = int.parse(quantity); | ||
lastProduct.pricePerUnit = pricePerUnit; | ||
} | ||
|
||
// A "normal" line: | ||
else { | ||
var items = item.split(RegExp(r'\s{12,33}')); | ||
|
||
var name = items[0]; | ||
var price = items[1]; | ||
|
||
var product = ReceiptProduct(name: name, totalPrice: price); | ||
products.add(product); | ||
} | ||
} | ||
|
||
return products; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,68 @@ | ||
# Generated by pub | ||
# See https://dart.dev/tools/pub/glossary#lockfile | ||
packages: | ||
clock: | ||
dependency: transitive | ||
description: | ||
name: clock | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.1.0" | ||
collection: | ||
dependency: transitive | ||
description: | ||
name: collection | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.15.0" | ||
csslib: | ||
dependency: transitive | ||
description: | ||
name: csslib | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "0.17.1" | ||
html: | ||
dependency: "direct main" | ||
description: | ||
name: html | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "0.15.0" | ||
intl: | ||
dependency: "direct main" | ||
description: | ||
name: intl | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "0.17.0" | ||
lints: | ||
dependency: "direct dev" | ||
description: | ||
name: lints | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.0.1" | ||
path: | ||
dependency: transitive | ||
description: | ||
name: path | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.8.0" | ||
source_span: | ||
dependency: transitive | ||
description: | ||
name: source_span | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.8.1" | ||
term_glyph: | ||
dependency: transitive | ||
description: | ||
name: term_glyph | ||
url: "https://pub.dartlang.org" | ||
source: hosted | ||
version: "1.2.0" | ||
sdks: | ||
dart: ">=2.15.0-178.1.beta <3.0.0" |
Oops, something went wrong.