commit 4981c3dbef0b5a7e92806ea2c3c37f64098fe5e2
parent abf30897f6c0751e33b19b456b1e3e0c20bf5c28
Author: Bharatvaj Hemanth <bharatvaj@yahoo.com>
Date: Sun, 17 Dec 2023 23:54:32 +0530
Implement basic combined lexer + parser for ledger format
Add CHANGELOG
Diffstat:
A | CHANGELOG | | | 5 | +++++ |
A | HACKING | | | 64 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | Makefile | | | 26 | ++++++++++++++++++-------- |
M | README | | | 25 | ++++++++++++++++++++++++- |
A | account.h | | | 95 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | book.c | | | 267 | ++++++++++++++++++++++++++++++++++--------------------------------------------- |
M | strn.h | | | 3 | ++- |
7 files changed, 324 insertions(+), 161 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
@@ -0,0 +1,5 @@
+Dark Ages
+---------
+- Starting keeping CHANGELOG file for payredo
+- Basic combined parser/lexer for the ledger format has been
+implemented in book.c
diff --git a/HACKING b/HACKING
@@ -0,0 +1,64 @@
+HACKING
+=======
+
+yyyy/mm/dd <till \n>
+ <till double space> <sign|commodity|quantity>\n
+ ...\n
+ ...\n\n // mark posting on \n\n
+
+
+payredo can be bulit as a standalone library(libpayredo) or can be built as an executable.
+
+ make libpayredo.a
+ make libpayredo.so
+ make payredo
+
+You can control what you want to build by invoking it separately. By default builds everything.
+
+commit.c
+------
+The original ledger-cli does not do edits, to make the runtime simple but since the database is completely ASCII and unorganized, consequent and programmatic writes creates a stress on the CPU and RAM, making it unsuitable for building touch friendly GUIs or general clients on top of it without comprosing UX.
+
+To prevent this, payredo exposes a commit API which can be used by text editors and other frontends to validate added content before saving it to the file.
+
+Once you are done with the changes in the text editor or other GUI, you can commit the data to payredo using the APIs
+
+ int ledger_commit_text(new_text, new_text_len)
+ int ledger_commit_post(timestamp, comment, comment_len, entries**)
+
+The first variant can be used by text editors where the structure of the parsed text is not understood.
+
+The second variant can be used when frontend is a GUI or other UI where the user input is controlled.
+
+RETURN VALUE
+------------
+Both variants return PARSE_OK if suceeded, and -1 on failure.
+
+
+payredo.c
+---------
+Similar to ledger-cli
+payredo follows the UNIX style option arguments to make the parsing easy and to combine multiple options
+
+
+parse.c
+-------
+There are two parsers written for the
+* main ledger file
+* price history file
+
+
+The parser is written entirely by hand to reduce dependencies.
+
+The `state` variable at any given time holds the information 'what we are trying to parse'. If the `state` has the value `DATE`, it means we are in a condition where we expect DATE to occur such as when starting the parser or when a posting is parsed.
+
+There aren't as many states as the ledger format itself is quite minimal. These are currently the states,
+
+DATE
+COMMENT
+ENTRY_WHO
+ENTRY_AMOUNT
+ENTRY_END
+
+
+
diff --git a/Makefile b/Makefile
@@ -1,17 +1,20 @@
GENERAL_FLAGS=-fPIC
-LDFLAGS=$(GENERAL_FLAGS) -lglfw -lGL -lm
-CFLAGS=$(GENERAL_FLAGS) -O0 -I. -g -Werror #-Wpedantic
+LDFLAGS:=$(GENERAL_FLAGS) -lglfw -lGL -lm
+CFLAGS:=$(GENERAL_FLAGS) -O0 -I. -g -Werror #-Wpedantic
-.DEFAULT_GOAL=hotbook
+.DEFAULT_GOAL=book
-CC=tcc
+CC=gcc
-%: %.c
+%: %.c account.h
$(CC) -o $@ $(CFLAGS) $(LDFLAGS) $<
%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@
+bal:
+ ledger -f october-2023.txt bal
+
lib%.so: %.o
$(CC) -shared -Wl,-soname,$@ -o $@ $<
@@ -21,7 +24,7 @@ hot: hot.c libbalance.so
libbook.so: book.c book.h
-hotbook: hotbook.c libbook.so
+hotbook: hotbook.c
refresh:
git ls-files | entr sh hot.sh
@@ -29,6 +32,13 @@ refresh:
format:
indent -psl -pal --use-tabs -ts4 -br -brs -ce -cli0 book.c
-clean:
- -rm *.so *.o hotbook libbook.so
+awk_query = $$(awk '/\/\* $1/{flag=1; next}/$1 \*\//{flag=0}flag' $2.c)
+test_cmd = if [ "$$($< $(call awk_query,TEST_INPUT,$<))" = "$(call awk_query,TEST_OUTPUT,$<)" ]; \
+ then echo Passed; \
+ else echo Failed; \
+ fi
+#include tests/tests.mk
+
+clean:
+ -rm *.so *.o hotbook libbook.so test/account_tree
diff --git a/README b/README
@@ -9,8 +9,31 @@ NOTE: The quality of the software is beta in the least, it's still in developmen
For now the following commands work,
- ledger balance -f file.txt [-b 2023/01/01] [-e 2023/01/31] -S [date|amount] [register query ...]
+ $ ./hotbook
+Why payeredo when ledger-cli exists?
+------------------------------------
+
+ledger-cli itself pretty lightweight but it has a handful of dependencies and features which I don't particularly use.
+
+payeredo follows a very suckless approach to ledger and does NOT provide some of the advanced features ledger provides.
+
+It should be noted that payeredo is usually faster than ledger-cli, it does not provide some of the niceties that ledger-cil provides.
+
+Goals
+-----
+- Compact as possible
+- Limited regex, ^,$ and *
+- BSD style arguments
+- Native Windows support
+- csv, emacs export/import
+
+Non-Goals
+---------
+- Python support
+- Elaborate regex
+- limited REPL
+- XML support
Build
-----
diff --git a/account.h b/account.h
@@ -0,0 +1,95 @@
+#ifndef __PAYEREDO_ACCOUNT_H
+#define __PAYEREDO_ACCOUNT_H
+
+#include <string.h>
+#include <assert.h>
+
+#include <vstr.h>
+
+size_t tree_depth = 4;
+
+struct map_tree;
+
+struct map_tree {
+ vstr_t *value;
+ size_t children_cap;
+ size_t children_len;
+ struct map_tree* children;
+};
+
+typedef struct map_tree map_tree_t;
+
+// acc: this:is:us
+//root->|this|->|is|->|us|
+// ->children
+// ->children
+// ->children
+
+// TODO handle both rootp,this:is:us case and rootp->children,is:us case
+// Currently only the rootp->value and acc are compared
+map_tree_t *account_search(map_tree_t *rootp, char *acc, size_t acc_size)
+{
+ assert(rootp != NULL);
+ // we hit leaf node, return rootp
+ if (rootp->children == NULL) return rootp;
+
+ // return rootp when the 'acc' matches exactly with rootp->value
+ // acc: this, rootp->value: this
+ vstr_t *rk = rootp->value;
+ if (rk != NULL && acc_size == rk->len && (strncmp(acc, rk->str, acc_size) == 0)) {
+ return rootp;
+ }
+
+ // search the string in it's children
+ for (size_t i = 0; i < rootp->children_len; i++) {
+ vstr_t *val = rootp->children[i].value;
+ if (val != NULL && acc_size == val->len && (strncmp(acc, val->str, acc_size) == 0)) {
+ return rootp->children + i;
+ }
+ }
+ return NULL;
+}
+
+int account_add(map_tree_t **rootp, char *acc, size_t acc_size)
+{
+ size_t records_needed = tree_depth * 4;
+ if (*rootp == NULL) {
+ *rootp = malloc(sizeof(map_tree_t));
+ }
+ if ((*rootp)->children == NULL) {
+ (*rootp)->children =
+ (map_tree_t *) calloc(records_needed, sizeof(map_tree_t));
+ (*rootp)->children_len = 0;
+ (*rootp)->children_cap = records_needed;
+ }
+ map_tree_t* _rootp = *rootp;
+ size_t i = 0;
+ while (i < acc_size) {
+ if (acc[i] == ':' || i + 1 == acc_size) {
+ size_t j = i + 1;
+ map_tree_t *current_node = account_search(_rootp, acc, j);
+ if (current_node == NULL) {
+ // return the previously allocated child
+ current_node = _rootp->children + _rootp->children_len++;
+ // current_node->value is NULL when the search fails
+ // we have to set the value now
+ // TODO maybe save vstrs in a pool and use them, would provide a sane way to free memory
+ vstr_t *vstr = (vstr_t *) malloc(sizeof(vstr_t));
+ vstr->str = acc;
+ vstr->len = j;
+ current_node->value = vstr;
+ //printf("%zu : %zu %d %.*s\n", current_node, vstr, j, j, acc);
+ } else {
+ //printf("Present already= %d %.*s\n", j, j, acc);
+ }
+ if (j != acc_size) {
+ return account_add(¤t_node, acc + j,
+ acc_size - j);
+ }
+ }
+ i++;
+ }
+ return -1;
+}
+
+#endif
diff --git a/book.c b/book.c
@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <inttypes.h>
#include <ctype.h>
+#include <limits.h>
#include "common.h"
#include "strn.h"
@@ -25,28 +26,17 @@
#define _XOPEN_SOURCE
#include <time.h>
+#include "vstr.h"
+#include "account.h"
#include "book.h"
-#define warning(STR,...) \
- fprintf(stdout, "\033[31m"STR"\033[0m", __VA_ARGS__);
-
-typedef struct {
- char *str;
- size_t len;
-} vstr_t;
-
-size_t tree_depth = 4;
+#define BUFFER_SIZE 256
-struct map_tree;
+#define warning(STR) \
+ fprintf(stdout, "\033[31m"STR"\033[0m");
-struct map_tree {
- vstr_t *value;
- size_t children_cap;
- size_t children_len;
- struct map_tree *children;
-};
-
-typedef struct map_tree map_tree_t;
+#define warningf(STR,...) \
+ fprintf(stdout, "\033[31m"STR"\033[0m", __VA_ARGS__);
vstr_t tags[100] = { 0 };
@@ -74,9 +64,15 @@ char commodity_list[256][8];
map_tree_t *rootp = NULL;
+
+// store numbers in the least denom
+// 1.50$ == 150
+// 2$ == 200
+// 2.23$ == 200
+
typedef struct {
vstr_t *denom;
- int amount;
+ size_t amount;
} LedgerValue;
typedef struct {
@@ -110,8 +106,12 @@ const char *states_str[] = {
"ENTRY START",
"ENTRY SPACE",
"ENTRY WHO",
- "ENTRY DENOM",
+ "ENTRY SIGN",
+ "ENTRY SIGN OR AMOUNT",
"ENTRY AMOUNT",
+ "ENTRY DENOM",
+ "ENTRY DENOM OR AMOUNT",
+ "ENTRY SIGN OR DENOM OR AMOUNT",
"ENTRY END",
};
@@ -121,99 +121,16 @@ typedef enum {
ENTRY_START, // entry starts after a comment
ENTRY_SPACE,
ENTRY_WHO,
- ENTRY_DENOM,
+ ENTRY_SIGN,
+ ENTRY_SIGN_AMOUNT,
ENTRY_AMOUNT,
+ ENTRY_DENOM ,
+ ENTRY_DENOM_AMOUNT,
+ ENTRY_SIGN_DENOM_AMOUNT,
ENTRY_END, // finish up entry if encountering any \n\n or \n text_len == i or text_len == i, otherwise set state to ENTRY_SPACE
+ POSTING_END,
} LedgerParseStates;
-map_tree_t *account_search(map_tree_t *children, char *acc, size_t acc_size)
-{
- if (children->children == NULL)
- return children;
- vstr_t *rk = children->value;
- if (rk != NULL && acc_size == rk->len
- && (strncmp(acc, rk->str, acc_size) == 0)) {
- return children;
- }
- for (size_t i = 0; i < children->children_len; i++) {
- vstr_t *val = children->children[i].value;
- if (val != NULL && acc_size == val->len
- && (strncmp(acc, val->str, acc_size) == 0)) {
- return children->children + i;
- }
- }
- // when the search is exhausted and nothing is found,
- // return the previously allocated child
- // TODO if len < cap allocate memory
- map_tree_t *child_to_return =
- children->children + children->children_len;
- children->children_len++;
- return child_to_return;
-}
-
-int account_add(map_tree_t **rootp, char *acc, size_t acc_size)
-{
- size_t records_needed = tree_depth * 4;
- if (*rootp == NULL) {
- *rootp = malloc(sizeof(map_tree_t));
- }
- if ((*rootp)->children == NULL) {
- (*rootp)->children =
- (map_tree_t *) calloc(records_needed, sizeof(map_tree_t));
- (*rootp)->children_cap = records_needed;
- }
- size_t i = 0;
- while (i < acc_size) {
- if (acc[i] == ':' || i + 1 == acc_size) {
- size_t j = i + 1;
- map_tree_t *current_node =
- account_search(*rootp, acc, j);
- assert(current_node != NULL);
- if (current_node->value == NULL) {
- // current_node->value is NULL when the search fails
- // we have to set the value now
- // TODO maybe save vstrs in a pool and use them
- vstr_t *vstr =
- (vstr_t *) malloc(sizeof(vstr_t));
- vstr->str = acc;
- vstr->len = j;
- current_node->value = vstr;
- printf("%d %.*s\n", j, j, acc);
- } else {
- printf("Present already= %d %.*s\n", j, j, acc);
- }
- if (i + 1 != acc_size) {
- return account_add(&(current_node->children), acc + j,
- acc_size - j);
- }
- }
- i++;
- }
- return -1;
-}
-
-size_t tab_acc = 0;
-
-void walk_it (map_tree_t* rootp)
-{
- if (rootp == NULL)
- return;
- vstr_t *val = rootp->value;
- if (val != NULL) {
- for (size_t i = 0; i < tab_acc; i++) {
- printf("\t");
- }
- printf("-|%.*s|-\n", val->len, val->str);
- }
- tab_acc++;
- if (rootp->children == NULL)
- return;
- for (int i = 0; i < rootp->children_len; i++) {
- printf("|", i);
- walk_it(rootp->children + i);
- }
- tab_acc--;
-}
void ledger_parse_data(char *text, size_t text_len)
{
@@ -229,13 +146,14 @@ void ledger_parse_data(char *text, size_t text_len)
time_t hold_date;
vstr_t hold_comment = { 0 };
vstr_t hold_register = { 0 };
+ long int hold_amount = LONG_MAX;
+ short hold_sign = -1;
size_t hold_denom_id = { 0 };
short n_count = 0;
while (i < text_len) {
char c = text[i];
// we use \n to identify entry done in ledger
- //
switch (c) {
case '\n':
case '\r':
@@ -243,21 +161,28 @@ void ledger_parse_data(char *text, size_t text_len)
n_count++;
printf("\n%d| ", line_no);
switch (state) {
+ // after parsing the amount seq, we set the state to ENTRY_WHO
case ENTRY_WHO:
case ENTRY_END:
- // TODO push the entries to stack or somethin
+ warning("----- Entry End Marked -----\n");
+ hold_sign = -1;
+ hold_amount = LONG_MAX;
+ // if entry_count <= 1 throw error
if (text[i - 1] == '\n') {
- printf("----- Entry End Marked -----\n");
state = DATE;
+ // TODO push the entries to stack or somethin
+ warning("----- Posting End Marked -----\n");
+ // state = POSTING_END;
} else {
state = ENTRY_WHO;
}
break;
case COMMENT:
+ case ENTRY_SIGN_DENOM_AMOUNT:
state = ENTRY_WHO;
break;
case ENTRY_DENOM:
- warning("%s\n", "denom not found, setting state WHO");
+ warningf("%s\n", "denom not found, setting state WHO");
state = ENTRY_WHO;
break;
case ENTRY_AMOUNT:
@@ -280,8 +205,8 @@ void ledger_parse_data(char *text, size_t text_len)
if (isdigit(c)) {
// try to parse a date
time_t tn = ledger_timestamp_from_ledger_date(text + i);
- warning("date str: %.*s\n", 10, text + i);
- warning("date: %ld\n", tn);
+ warningf("date str: %.*s\n", 10, text + i);
+ warningf("date: %ld\n", tn);
// date is expected to have the form DD/MM/YYYY (10)
i += 10;
if (tn == (time_t) - 1) goto ledger_parse_error_handle;
@@ -301,7 +226,7 @@ void ledger_parse_data(char *text, size_t text_len)
comment_len++;
}
comment.len = comment_len;
- warning("Comment: %.*s\n", comment_len,
+ warningf("Comment: %.*s\n", comment_len,
comment);
state = ENTRY_WHO;
}
@@ -311,7 +236,7 @@ void ledger_parse_data(char *text, size_t text_len)
size_t original_i = i;
while (i < text_len && isspace(text[i])) i++;
int wsc = i - original_i;
- warning("i: %ld, Spaces: %d\n", i, wsc);
+ warningf("i: %ld, Spaces: %d\n", i, wsc);
if (wsc < 2) {
goto ledger_parse_error_handle;
}
@@ -343,56 +268,96 @@ void ledger_parse_data(char *text, size_t text_len)
}
ledger_who_parsed:
who_len = i - who_len;
- printf("parsed: i=%d\n", i);
+ warningf("parsed: i=%d\n", i);
account_add(&rootp, who.str, who_len);
- warning("i=%d, Who: %.*s\n", i, who_len, who);
- state = ENTRY_DENOM;
+ warningf("i=%d, Who: %.*s\n", i, who_len, who);
+ state = ENTRY_SIGN_DENOM_AMOUNT;
// add to tags here
}
break;
- case ENTRY_DENOM:
- {
- warning("denom-i: %d\n", i + 1);
- size_t denom_len = i;
- vstr_t denom = {
- .str = text + i,
- .len = 0
- };
- while (i < text_len && !isdigit(*(text + i)))
- i++;
- state = ENTRY_AMOUNT;
- denom_len = i - denom_len;
- denom.len = denom_len;
- warning("len: %d, denom: %.*s, i: %d\n", denom_len, denom_len, denom.str, i);
- }
+ case ENTRY_SIGN_DENOM_AMOUNT:
+ if (*(text + i) == '-' ) {
+ // TODO throw already set error
+ if (hold_sign >= 0) goto ledger_parse_error_handle;
+ state = ENTRY_SIGN;
+ } else if (isdigit(*(text + i))) state = ENTRY_AMOUNT;
+ else state = ENTRY_DENOM;
+ continue;
+ case ENTRY_SIGN_AMOUNT:
+ if (*(text + i) == '-' ) {
+ // TODO throw already set error
+ if (hold_sign >= 0) goto ledger_parse_error_handle;
+ state = ENTRY_SIGN;
+ } else if (isdigit(*(text + i))) state = ENTRY_AMOUNT;
+ else goto ledger_parse_error_handle;
break;
- case ENTRY_AMOUNT:
- {
- warning("amount-i: %d\n", i + 1);
- char *amount = text + i;
- size_t amount_len = i;
- char _c = *(text + i);
- while (i < text_len && (isdigit(_c) || _c == '.' || _c == ',')) {
- i++;
- _c = *(text + i);
- }
+ case ENTRY_SIGN: {
+ if (*(text + i) == '-') {
+ i++;
+ // AMOUNT cannot be set before SIGN
+ if (hold_amount != LONG_MAX) goto ledger_parse_error_handle;
+ hold_sign = 1;
+ state = ENTRY_SIGN_DENOM_AMOUNT;
+ }
+ } break;
+ case ENTRY_DENOM: {
+ char _c;
+ warningf("denom-i: %d\n", i + 1);
+ char *denom = text + i;
+ size_t denom_len = 0;
+ while (i < text_len &&
+ ( isalpha(*(text + i))
+ || *(text + i) == '$')) i++;
+ denom_len = (text + i) - denom;
+ if (hold_amount == LONG_MAX)
+ state = hold_sign? ENTRY_AMOUNT: ENTRY_SIGN_AMOUNT;
+ else
state = ENTRY_END;
- amount_len = i - amount_len;
- warning("%d> len: %d, amount: %.*s\n", i, amount_len, amount_len, amount);
+ warningf("%d> len: %d, denom: %.*s\n", i, denom_len, denom_len, denom);
+ break;
+ }
+ case ENTRY_AMOUNT: {
+ char _c;
+ warningf("amount-i: %d\n", i + 1);
+ char *amount = text + i;
+ size_t amount_len = 0;
+ while (i < text_len && (_c = *(text + i)) == '.' || isdigit(_c) || _c == ',') i++;
+ amount_len = (text + i) - amount;
+ // TODO convert amount to hold_amount integer
+ hold_amount = 0;
+ state = hold_denom_id == 0? ENTRY_DENOM : ENTRY_END;
+ warningf("%d> len: %d, amount: %.*s\n", i, amount_len, amount_len, amount);
}
+ break;
+ default:
+ goto ledger_parse_error_handle;
}
}
- printf("read complete\n");
+ warning("read complete\n");
return;
ledger_parse_error_handle:
- warning("Parse failed at line %ld(%d)\n, Expected %s, got '%c'",
+ warningf("Parse failed at %ld b:(%d), Expected %s, got '%c'",
line_no, i, states_str[state], text[i]);
}
+int main(int argc, char* argv[]) {
+ FILE* in = fopen("october-2023.txt", "r");
+ char* data = (char*)malloc(2048 * sizeof(char));
+ size_t data_size = 0;
+ size_t c_read = 0;
+ while((c_read = fread(data + data_size + 0, 1, BUFFER_SIZE, in)) != 0) {
+ data_size += c_read;
+ }
+ if (ferror(in)) fprintf(stderr, "Error reading file\n");
+ fprintf(stdout, "Startig loop\n");
+ ledger_parse_data(data, data_size);
+ return 0;
+}
+
void *module_main(char *data, size_t data_len)
{
// printf("%s\n", data);
- printf("\n=======| Startality |=======\n");
+ warning("\n=======| Startality |=======\n");
ledger_parse_data(data, data_len);
- printf("\n========| Fatality |========\n");
+ warning("\n========| Fatality |========\n");
}
diff --git a/strn.h b/strn.h
@@ -1,7 +1,7 @@
#ifndef _STRN_H
#define _STRN_H
-inline int natoi(char* str, size_t len) {
+int natoi(char* str, size_t len) {
int final = 0;
int i = 0;
// ignore leading zeroes
@@ -13,4 +13,5 @@ inline int natoi(char* str, size_t len) {
return final;
}
+
#endif