Skip to content

Commit

Permalink
Enhacement 2 (read EAN products from HTML file) (#3)
Browse files Browse the repository at this point in the history
* Add new dependency (intl) & use it with csv file name

* Add new gitignored file (*.html)

* Add 'assets/files/' directory where to get .txt & save .csv

* Add 'html' dependency & start 'web scraping' with local html file

* Update '_loadHtmlFromAssets' method to get EAN codes

* Move all methods from 'main' file to own files

* Some refactoring (e.g. move receipt read -> save as csv logic into own file)

* Save parsed html into eanProduct list

* Fine-tune ean product name & add quantity

* Rename 'product.dart' to 'receipt_product.dart'

* Add  price to EANProduct & parse it from html

* Refactoring: define filePaths in main method

* Save eanProducts as csv & some refactoring

* Refactor 'product' -> 'receiptProduct'

* Move eanProduct csv generating into own file

* Refactor html to csv generating into own file
  • Loading branch information
areee authored Nov 13, 2021
1 parent 3395128 commit 04c1d54
Show file tree
Hide file tree
Showing 14 changed files with 265 additions and 86 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ build/

# For privacy reasons, do not save cash receipt (txt and final csv) into Git.
*.txt
*.csv
*.csv
*.html
88 changes: 5 additions & 83 deletions bin/dart_kassakuitti_cli.dart
Original file line number Diff line number Diff line change
@@ -1,87 +1,9 @@
import 'dart:io';
import 'helper.dart';
import 'product.dart';
import 'read_html_and_save_as_csv.dart';
import 'read_receipt_and_save_as_csv.dart';

void main(List<String> arguments) {
var lines = readFile('cashReceipt.txt');
var products = strings2Products(lines ?? []);
readReceiptAndSaveAsCSV('assets/files/_cashReceipt.txt');
readHtmlAndSaveAsCSV('assets/files/_orderedProducts.html');

// Save products into CSV file
saveProducts(products);
}

/// Read a text file and return as a list of lines.
List<String>? readFile(String fileName) {
File file = File(fileName);
try {
return file.readAsLinesSync();
} on FileSystemException catch (e) {
print("File not found: $e");
}
}

/// Goes through the list of lines and returns a list of products.
List<Product> strings2Products(List<String> lines) {
var helper = Helper();
List<Product> products = [];

for (var item in lines) {
item = item.trim();
item = item.toLowerCase();

// Do not handle sum lines (after a row of strokes):
if (item.contains('----------')) {
break;
}
// Refund line:
else if (item.contains('palautus')) {
helper.previousLine = PreviousLine.refund;
}
// When previous line was a refund line, skip next two lines:
else if (helper.previousLine == PreviousLine.refund) {
if (helper.calcLines != 1) {
helper.calcLines++;
} else {
helper.calcLines = 0;
helper.previousLine = PreviousLine.notSet;
}
}
// If a line starts with a digit, it is a quantity and price per unit row:
else if (item.contains(RegExp(r'^\d'))) {
var items = item.split(RegExp(r'\s{6,7}'));
var quantity = items[0].substring(0, 2).trim();
var pricePerUnit = items[1].substring(0, 5).trim();

var lastProduct = products.last;
lastProduct.quantity = int.parse(quantity);
lastProduct.pricePerUnit = pricePerUnit;
}

// A "normal" line:
else {
var items = item.split(RegExp(r'\s{12,33}'));

var name = items[0];
var price = items[1];

var product = Product(name: name, totalPrice: price);
products.add(product);
}
}

return products;
}

void saveProducts(List<Product> products) {
var csv = StringBuffer();

csv.write('name;quantity;pricePerUnit;totalPrice\n');

for (var product in products) {
csv.write(
'${product.name};${product.quantity};${product.pricePerUnit};${product.totalPrice}\n');
}

var file = File('products.csv');
file.writeAsString(csv.toString());
print('Done!');
}
17 changes: 17 additions & 0 deletions bin/ean_product.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class EANProduct {
final String ean;
final String name;
final int quantity;
final String price;

EANProduct(
{required this.ean,
required this.name,
required this.quantity,
required this.price});

@override
String toString() {
return 'EANProduct{ean: $ean, name: $name, quantity: $quantity, price: $price}';
}
}
7 changes: 7 additions & 0 deletions bin/helper.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import 'package:intl/intl.dart';

class Helper {
PreviousLine previousLine;
int calcLines;
Expand All @@ -6,3 +8,8 @@ class Helper {
}

enum PreviousLine { notSet, refund }

/// Formatted date time for file name.
String formattedDateTime() {
return DateFormat('yyyyMMddHHmmss').format(DateTime.now());
}
50 changes: 50 additions & 0 deletions bin/load_html.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import 'dart:io';
import 'package:html/parser.dart';

import 'ean_product.dart';

/// Loads the HTML file from assets and parses it. Then, it returns a list of EANProduct objects.
Future<List<EANProduct>> loadHtmlFromAssets(String filePath) async {
List<EANProduct> eanProducts = [];

var file = File(filePath);
var html = await file.readAsString();
var document = parse(html);

var responseString = document.getElementsByClassName(
'styled-order-page__StyledOrderItemContainer-sc-qzridm-1')[0];

var children = responseString.children;

for (var i = 0; i < responseString.children.length; i++) {
if (i > 0) {
var product = children[i];

var eanCode = product.attributes['data-product-id'] ?? '';

var productName = product
.children[0].children[0].children[1].children[0].children[0].text
.trim();
productName = productName.replaceAll('\n', '');
productName = productName.replaceAll(RegExp(r'\s{30}'), ' ');

var productQuantity = product.children[0].children[0].children[1]
.children[1].children[1].children[0].children[0].text;

var productPrice = product.children[0].children[0].children[1].children[1]
.children[1].children[1].text
.trim();
productPrice = productPrice.replaceAll(' €', '');

eanProducts.add(
EANProduct(
ean: eanCode,
name: productName,
quantity: int.parse(productQuantity),
price: productPrice,
),
);
}
}
return eanProducts;
}
8 changes: 8 additions & 0 deletions bin/read_html_and_save_as_csv.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import 'load_html.dart';
import 'save_eps_as_csv.dart';

void readHtmlAndSaveAsCSV(String filePath) async {
var awaitedEANProductList = await loadHtmlFromAssets(filePath);

eanProductListToCSV(awaitedEANProductList);
}
9 changes: 9 additions & 0 deletions bin/read_receipt_and_save_as_csv.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import 'read_receipt_file.dart';
import 'save_rps_as_csv.dart';
import 'strings_to_products.dart';

void readReceiptAndSaveAsCSV(String filePath) {
var lines = readReceiptFile(filePath);
var products = strings2Products(lines ?? []);
receiptProducts2CSV(products);
}
11 changes: 11 additions & 0 deletions bin/read_receipt_file.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import 'dart:io';

/// Read a text file and return as a list of lines.
List<String>? readReceiptFile(String filePath) {
File file = File(filePath);
try {
return file.readAsLinesSync();
} on FileSystemException catch (e) {
print("File not found: $e");
}
}
4 changes: 2 additions & 2 deletions bin/product.dart → bin/receipt_product.dart
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
class Product {
class ReceiptProduct {
final String? name;
final String? totalPrice;
int? quantity;
String? pricePerUnit;

Product({
ReceiptProduct({
this.name,
this.totalPrice,
this.quantity = 1,
Expand Down
18 changes: 18 additions & 0 deletions bin/save_eps_as_csv.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import 'dart:io';
import 'ean_product.dart';
import 'helper.dart';

void eanProductListToCSV(List<EANProduct> eanProductList) {
var csv = StringBuffer();

csv.write('ean;name;quantity;price\n');

for (var item in eanProductList) {
csv.write('${item.ean};${item.name};${item.quantity};${item.price}\n');
}

var date = formattedDateTime();

var file = File('assets/files/ean_products_$date.csv');
file.writeAsString(csv.toString());
}
20 changes: 20 additions & 0 deletions bin/save_rps_as_csv.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import 'dart:io';
import 'helper.dart';
import 'receipt_product.dart';

/// Saves receipt products as a CSV file.
void receiptProducts2CSV(List<ReceiptProduct> products) {
var csv = StringBuffer();

csv.write('name;quantity;pricePerUnit;totalPrice\n');

for (var product in products) {
csv.write(
'${product.name};${product.quantity};${product.pricePerUnit};${product.totalPrice}\n');
}

var date = formattedDateTime();

var file = File('assets/files/receipt_products_$date.csv');
file.writeAsString(csv.toString());
}
54 changes: 54 additions & 0 deletions bin/strings_to_products.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import 'helper.dart';
import 'receipt_product.dart';

/// Goes through the list of lines and returns a list of products.
List<ReceiptProduct> strings2Products(List<String> lines) {
var helper = Helper();
List<ReceiptProduct> products = [];

for (var item in lines) {
item = item.trim();
item = item.toLowerCase();

// Do not handle sum lines (after a row of strokes):
if (item.contains('----------')) {
break;
}
// Refund line:
else if (item.contains('palautus')) {
helper.previousLine = PreviousLine.refund;
}
// When previous line was a refund line, skip next two lines:
else if (helper.previousLine == PreviousLine.refund) {
if (helper.calcLines != 1) {
helper.calcLines++;
} else {
helper.calcLines = 0;
helper.previousLine = PreviousLine.notSet;
}
}
// If a line starts with a digit, it is a quantity and price per unit row:
else if (item.contains(RegExp(r'^\d'))) {
var items = item.split(RegExp(r'\s{6,7}'));
var quantity = items[0].substring(0, 2).trim();
var pricePerUnit = items[1].substring(0, 5).trim();

var lastProduct = products.last;
lastProduct.quantity = int.parse(quantity);
lastProduct.pricePerUnit = pricePerUnit;
}

// A "normal" line:
else {
var items = item.split(RegExp(r'\s{12,33}'));

var name = items[0];
var price = items[1];

var product = ReceiptProduct(name: name, totalPrice: price);
products.add(product);
}
}

return products;
}
56 changes: 56 additions & 0 deletions pubspec.lock
Original file line number Diff line number Diff line change
@@ -1,12 +1,68 @@
# Generated by pub
# See https://dart.dev/tools/pub/glossary#lockfile
packages:
clock:
dependency: transitive
description:
name: clock
url: "https://pub.dartlang.org"
source: hosted
version: "1.1.0"
collection:
dependency: transitive
description:
name: collection
url: "https://pub.dartlang.org"
source: hosted
version: "1.15.0"
csslib:
dependency: transitive
description:
name: csslib
url: "https://pub.dartlang.org"
source: hosted
version: "0.17.1"
html:
dependency: "direct main"
description:
name: html
url: "https://pub.dartlang.org"
source: hosted
version: "0.15.0"
intl:
dependency: "direct main"
description:
name: intl
url: "https://pub.dartlang.org"
source: hosted
version: "0.17.0"
lints:
dependency: "direct dev"
description:
name: lints
url: "https://pub.dartlang.org"
source: hosted
version: "1.0.1"
path:
dependency: transitive
description:
name: path
url: "https://pub.dartlang.org"
source: hosted
version: "1.8.0"
source_span:
dependency: transitive
description:
name: source_span
url: "https://pub.dartlang.org"
source: hosted
version: "1.8.1"
term_glyph:
dependency: transitive
description:
name: term_glyph
url: "https://pub.dartlang.org"
source: hosted
version: "1.2.0"
sdks:
dart: ">=2.15.0-178.1.beta <3.0.0"
Loading

0 comments on commit 04c1d54

Please sign in to comment.