diff --git a/Makefile b/Makefile deleted file mode 100644 index 230f132..0000000 --- a/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -OBJ:= html.o main.o -OUTPUT:=nhtmlc - -all: debug - -debug: CXXFLAGS = -g -DDEBUG -debug: CFLAGS = -g -DDEBUG -debug: $(OBJ) - gcc $(OBJ) -g -o $(OUTPUT) - -release: $(OBJ) - gcc $(OBJ) -o $(OUTPUT) - -clean: - rm -rf $(OBJ) $(OUTPUT) - - diff --git a/inc/attribute.h b/inc/attribute.h new file mode 100644 index 0000000..452f27c --- /dev/null +++ b/inc/attribute.h @@ -0,0 +1,54 @@ +/* + * attribute.h + * + * Created on: 08.08.2017 + * Author: julian + */ + +#ifndef ATTRIBUTE_H_ +#define ATTRIBUTE_H_ + +#include + +#include "nhtml_string.h" + + +typedef struct { + string_t name, value; +} attr_t; + +/** + * \brief copy the give attribute + * \return a copy of attr + */ +attr_t attr_copy(attr_t * attr); +/** + * \brief reset the attribute + * Deletes all strings + */ +void attr_destroy(attr_t * attr); + + +typedef struct { + attr_t * arr; + size_t len; +} attr_set_t; + +/** + * \brief search in attribute set for key + * \param set The Attribute set + * \param key The Key to search + * \return the pointer to the attribute in the set mathching the key + * \return NULL if there is no matching attribute + */ +attr_t * attr_set_find(attr_set_t *set, const char * key); +// Append new Attribute to set +/** + * \brief Append new attribute pair to set + * \return 0 on success + * \return -1 on failure (error can be found to errno) + */ +int attr_set_append(attr_set_t * set, attr_t *new_entry); + + +#endif /* ATTRIBUTE_H_ */ diff --git a/inc/html.h b/inc/html.h new file mode 100644 index 0000000..15d73e6 --- /dev/null +++ b/inc/html.h @@ -0,0 +1,37 @@ +/* + * html.h + * + * Created on: 07.08.2017 + * Author: julian + */ + +#ifndef HTML_H_ +#define HTML_H_ + +#include // needs FILE +#include "nhtml_string.h" +#include "attribute.h" + +extern int html_escape(int c, FILE* output); + +typedef struct node { + string_t name; + attr_set_t attributes; +} node_t; + + +/** + * \brief emit html opening tag for \node + * \param node the Node to create the opening tag for + * \param output the File to write to + */ +void open_node(node_t * node, FILE * output); + +/** + * \brief emit html closing tag for \node + * \param node the Node to create the closing tag for + * \param output the File to write to + */ +void close_node(node_t * node, FILE * output); + +#endif /* HTML_H_ */ diff --git a/inc/nhtml_string.h b/inc/nhtml_string.h new file mode 100644 index 0000000..13bc6e0 --- /dev/null +++ b/inc/nhtml_string.h @@ -0,0 +1,40 @@ +/* + * nhtml_string.h + * + * Created on: 08.08.2017 + * Author: julian + */ + +#ifndef NHTML_STRING_H_ +#define NHTML_STRING_H_ + +#include // size_t +#include // errno + +typedef struct { + char * c_str; + size_t len; +} string_t; + +/**\brief append the char @c to @str + * \param str to the String to append to + * \param c the char to append + * \return -1 on error, errno will be set to errorcode + * \return 0 on success + */ +int string_append(string_t *str, char c); + +/** + * \brief Erase the String from memory + */ +void string_destroy(string_t s); + +/** + * \brief copy the contents of a string + * \param old The String to copy + * \return returns the new string + */ +string_t string_copy(string_t old); + + +#endif /* NHTML_STRING_H_ */ diff --git a/main.c b/main.c deleted file mode 100644 index d40fde8..0000000 --- a/main.c +++ /dev/null @@ -1,331 +0,0 @@ -#include // needs: fgetc, fputs, fopen, fprintf -#include // needs: realloc, malloc, free -#include // needs: strcmp, memcpy -#include -#include // needs: isspace - -#include // needs: getopt_long - -#include "html.h" // needs: html_escaped - -typedef struct { - char * c_str; - size_t len; -} string_t; - -int string_append(string_t *str, char c) { - if (str->c_str == NULL) { - // new string, need to emit EOS - str->len++; - } - - char * new_ptr = realloc(str->c_str, str->len+1); - if (new_ptr == NULL) { - return -1; - } - - // append char - str->c_str = new_ptr; - str->c_str[str->len-1] = c; - str->c_str[str->len] = 0; // make sure - str->len++; - return 0; -} - -void string_destroy(string_t s) { - free(s.c_str); -} - -string_t string_copy(string_t old) { - string_t tmp; - tmp.c_str = malloc(old.len); - tmp.len = old.len; - memcpy(tmp.c_str, old.c_str, tmp.len); - return tmp; -} - -typedef struct { - string_t name, value; -} attr_t; - -typedef struct { - attr_t * arr; - size_t len; -} attr_set_t; - -// linear search in set -// returns NULL on failure to find entry -attr_t * attr_set_find(attr_set_t *set, const char * name) { - size_t current = 0; - - for (; current != set->len; ++current) { - if (strcmp(set->arr[current].name.c_str, name) == 0) { - return set->arr + current; - } - } - - return NULL; -} - -// Append new Attribute to set -int attr_set_append(attr_set_t * set, attr_t *new_entry) { - if (new_entry->name.c_str== NULL) return -1; // reject empty entries - - // search first - attr_t * new_ptr = attr_set_find(set, new_entry->name.c_str); - if (new_ptr != NULL) { - // already contained in set - // just change entries value - string_destroy(new_ptr->value); - new_ptr->value = string_copy(new_entry->value); - return 0; - } - - new_ptr = realloc(set->arr, (set->len+1)*sizeof(attr_t)); - if (new_ptr == NULL) { - return -1; - } - - // append and quit - set->arr = new_ptr; - set->arr[set->len] = *new_entry; - set->len++; - return 0; -} - -void attr_reset(attr_t * attr) { - memset(attr, 0, sizeof(attr_t)); -} - -int strip(FILE * stream) { - int current = 0; - while((current = fgetc(stream)) != EOF) { - if (!isspace(current)) - break; - } - - return current; -} - -int parse_attr(FILE * stream, attr_set_t * output) { - attr_t current_attr = {}; -#ifdef DEBUG - printf("parse_attr\n"); -#endif - int buffer = 0; - unsigned char isKey = 1; - - while((buffer = fgetc(stream)) != EOF) { - // parse key=value pairs - // check for delim - //if (buffer == ' ' || buffer == '\t' || buffer == '\n'){ - if (isspace(buffer)) { - attr_set_append(output, ¤t_attr); -#ifdef DEBUG - printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value); -#endif - // reset attribute - attr_reset(¤t_attr); - isKey = 1; - continue; - } - if (isKey && buffer == '.') { - isKey = 0; - current_attr.name.c_str = "class"; - current_attr.name.len = 6; - continue; - } - if (buffer == '=') { - isKey = 0; - continue; - } else - if (buffer == ']') { - break; - } - - if (isKey) { - string_append(¤t_attr.name, buffer); - } else { - string_append(¤t_attr.value, buffer); - } - } - - // append last attribute - attr_set_append(output, ¤t_attr); -#ifdef DEBUG - printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value); -#endif - - //memset(output, 0, sizeof(attr_set_t)); - return strip(stream); -} - -int parse_text(int mode, FILE * stream, FILE * output) { -#ifdef DEBUG - printf("parse text\n"); -#endif - int buffer = 0; - char escaped = 0; - - while((buffer = fgetc(stream)) != EOF) { - if (!escaped && buffer == '\\') { - escaped = 1; - continue; - } - if (!escaped && buffer == mode) { - break; - } - escaped = 0; - if (mode == '"' && html_escape(buffer, output)) { - continue; - } - - fputc(buffer, output); - } - - //fprintf(stderr, "stub: parse_text\n"); - return strip(stream); -} - -typedef struct node { - string_t name; - attr_set_t attributes; -} node_t; - - -// write out node -void open_node(node_t * node, FILE * output) { - // check for empty node (text mostly) - if (!node->name.c_str) return; - - // start by writing tag - fputc('<', output); - - // follow with tag name - fputs(node->name.c_str,output); - - // add attributes - size_t i = 0; - attr_t * current = node->attributes.arr; - for (; i < node->attributes.len; i++,current++) { - if (current->value.c_str) - fprintf(output, " %s=\"%s\"", current->name.c_str, current->value.c_str); - else { - fputc(' ', output); - fputs(current->name.c_str, output); - } - } - - // close tag - fputc('>', output); -} -void close_node(node_t * node, FILE * output) { - if (node->name.c_str) { - fprintf(output, "", node->name.c_str); - } -} - -int readName(FILE *stream, node_t *node) { - int current = strip(stream); - if (current == EOF) { - return current; - } - - do { - if (current == '{' || current == '[') { - break; - } - - if (current == ' ' || current == '\t' || current == '\n') { - current = strip(stream); - break; - } - - string_append(&node->name, current); - } while((current = fgetc(stream)) != EOF); - - return current; -} - - -enum node_type { - NODE_SELFCLOSING, - NODE_TEXT, - NODE_TAG -}; - -int parse_node(int current, FILE * stream, FILE * output) { - if (current == '"' || current == '(') { - if (current == '(') current = ')'; - - return parse_text(current, stream, output); - } - - // normal node - node_t current_node = {}; - string_append(¤t_node.name, current); - current = readName(stream, ¤t_node); - if (current == EOF) { - goto done; - } -#ifdef DEBUG - printf("parse_node: %s\n", current_node.name.c_str); -#endif - if (current == '[') { - current = parse_attr(stream, ¤t_node.attributes); - } - - if (current != '{') { - // tag is selfclosing - open_node(¤t_node, output); - return current; - } - current = strip(stream); - - open_node(¤t_node, output); - while(current != '}' && current != EOF) { - current = parse_node(current, stream, output); - } - - close_node(¤t_node, output); -done: - return strip(stream); -} - -int main(int argc, char ** args) { - //getopt_long(argc, args, NULL, NULL, NULL); - - char * filename= NULL; - FILE * output = fopen("a.html", "w"); - if (output == NULL) { - fprintf(stderr, "could not create output file\n"); - return -1; - } - - for(;argc > 1; --argc) { - filename = args[argc-1]; - printf("starting conversion of %s\n", filename); - - FILE * handle = fopen(filename, "r"); - - if (handle == NULL) { - fprintf(stderr, "could not open \"%s\": %s\n", filename, strerror(errno)); - continue; - } - - int current = strip(handle); - - while((current = parse_node(current, handle, output)) != EOF) { -// if () { -// fprintf(stderr, "error during parsing of node\n"); -// break; -// } - } - - fclose(handle); - } - - fclose(output); - printf("done compiling\n"); - return 0; -} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..699928c --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,8 @@ +set(pwd ${CMAKE_CURRENT_SOURCE_DIR}) +set(SOURCE + ${SOURCE} + ${pwd}/main.c + ${pwd}/nhtml_string.c + ${pwd}/attribute.c + ${pwd}/html.c + PARENT_SCOPE) diff --git a/src/attribute.c b/src/attribute.c new file mode 100644 index 0000000..cc12446 --- /dev/null +++ b/src/attribute.c @@ -0,0 +1,68 @@ +/* + * attribute.c + * + * Created on: 08.08.2017 + * Author: julian + */ + +#include "attribute.h" +#include // needs: strcmp +#include // needs: realloc + +// linear search in set +// returns NULL on failure to find entry +attr_t * attr_set_find(attr_set_t *set, const char * name) { + size_t current = 0; + + for (; current != set->len; ++current) { + if (strcmp(set->arr[current].name.c_str, name) == 0) { + return set->arr + current; + } + } + + return NULL; +} + +// Append new Attribute to set +int attr_set_append(attr_set_t * set, attr_t *new_entry) { + if (new_entry->name.c_str== NULL) return -1; // reject empty entries + + // search first + attr_t * new_ptr = attr_set_find(set, new_entry->name.c_str); + if (new_ptr != NULL) { + // already contained in set + // just change entries value + string_destroy(new_ptr->value); + new_ptr->value = string_copy(new_entry->value); + return 0; + } + + new_ptr = realloc(set->arr, (set->len+1)*sizeof(attr_t)); + if (new_ptr == NULL) { + return -1; + } + + // append and quit + set->arr = new_ptr; + set->arr[set->len] = attr_copy(new_entry); + set->len++; + return 0; +} + +attr_t attr_copy(attr_t * attr) { + attr_t temp; + temp.name = string_copy(attr->name); + temp.value = string_copy(attr->value); + return temp; +} + +void attr_destroy(attr_t * attr) { + // clear memory + string_destroy(attr->name); + string_destroy(attr->value); + + // reset memory content + memset(attr, 0, sizeof(attr_t)); +} + + diff --git a/src/html.c b/src/html.c new file mode 100644 index 0000000..ee595e0 --- /dev/null +++ b/src/html.c @@ -0,0 +1,71 @@ +/* + * html.c + * + * Created on: 07.08.2017 + * Author: julian + */ + +#include "html.h" + +int html_escape(int c, FILE * output) { + switch(c) { + case '<': + fprintf(output, "<"); + goto escaped; + case '>': + fprintf(output, ">"); + goto escaped; + case '&': + fprintf(output, "&"); + goto escaped; + case '"': + fprintf(output, """); + goto escaped; + /*case ' ': + fprintf(output, " "); + goto escaped;*/ + case '\n': + fprintf(output, "
"); + goto escaped; + } + + return 0; +escaped: + return 1; +} + +// write out node +void open_node(node_t * node, FILE * output) { + // check for empty node (text mostly) + if (!node->name.c_str) return; + + // start by writing tag + fputc('<', output); + + // follow with tag name + fputs(node->name.c_str,output); + + // add attributes + size_t i = 0; + attr_t * current = node->attributes.arr; + for (; i < node->attributes.len; i++,current++) { + if (current->value.c_str) + fprintf(output, " %s=\"%s\"", current->name.c_str, current->value.c_str); + else { + fputc(' ', output); + fputs(current->name.c_str, output); + } + } + + // close tag + fputc('>', output); +} + +void close_node(node_t * node, FILE * output) { + if (node->name.c_str) { + fprintf(output, "", node->name.c_str); + } +} + + + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..435e0e5 --- /dev/null +++ b/src/main.c @@ -0,0 +1,235 @@ +#include // needs: fgetc, fputs, fopen, fprintf +#include // needs: abort +#include +#include // needs: sterror +#include // needs: isspace + +#include // needs: getopt_long + +// Project specific includes +#include "html.h" +#include "attribute.h" +#include "nhtml_string.h" + +int strip(FILE * stream) { + int current = 0; + while((current = fgetc(stream)) != EOF) { + if (!isspace(current)) + break; + } + + return current; +} + +int parse_attr(FILE * stream, attr_set_t * output) { + attr_t current_attr = {}; +#ifdef DEBUG + printf("parse_attr\n"); +#endif + int buffer = 0; + unsigned char isKey = 1; + + while((buffer = fgetc(stream)) != EOF) { + // parse key=value pairs + // check for delim + //if (buffer == ' ' || buffer == '\t' || buffer == '\n'){ + if (isspace(buffer)) { + attr_set_append(output, ¤t_attr); +#ifdef DEBUG + printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value); +#endif + // reset attribute + attr_destroy(¤t_attr); + isKey = 1; + continue; + } + if (isKey && buffer == '.') { + isKey = 0; + current_attr.name.c_str = "class"; + current_attr.name.len = 6; + continue; + } + if (buffer == '=') { + isKey = 0; + continue; + } else + if (buffer == ']') { + break; + } + + if (isKey) { + string_append(¤t_attr.name, buffer); + } else { + string_append(¤t_attr.value, buffer); + } + } + + // append last attribute + attr_set_append(output, ¤t_attr); +#ifdef DEBUG + printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value); +#endif + + //memset(output, 0, sizeof(attr_set_t)); + return strip(stream); +} + +int parse_text(int end_char, FILE * stream, FILE * output) { +#ifdef DEBUG + printf("parse text\n"); +#endif + int buffer = 0; + char escaped = 0; + + while((buffer = fgetc(stream)) != EOF) { + if (!escaped && buffer == '\\') { + escaped = 1; + continue; + } + if (!escaped && buffer == end_char) { + break; + } + escaped = 0; + if (end_char == '"' && html_escape(buffer, output)) { + continue; + } + + fputc(buffer, output); + } + + return strip(stream); +} + +int readName(FILE *stream, node_t *node) { + int current = strip(stream); + if (current == EOF) { + return current; + } + + do { + if (current == '{' || current == '[') { + break; + } + + if (current == ' ' || current == '\t' || current == '\n') { + current = strip(stream); + break; + } + + string_append(&node->name, current); + } while((current = fgetc(stream)) != EOF); + + return current; +} + +int parse_node(int current, FILE * stream, FILE * output) { + if (current == '"' || current == '(') { + if (current == '(') current = ')'; + + return parse_text(current, stream, output); + } + + // normal node + node_t current_node = {}; + string_append(¤t_node.name, current); + current = readName(stream, ¤t_node); + if (current == EOF) { + goto done; + } +#ifdef DEBUG + printf("parse_node: %s\n", current_node.name.c_str); +#endif + if (current == '[') { + current = parse_attr(stream, ¤t_node.attributes); + } + + if (current != '{') { + // tag is selfclosing + open_node(¤t_node, output); + return current; + } + current = strip(stream); + + open_node(¤t_node, output); + while(current != '}' && current != EOF) { + current = parse_node(current, stream, output); + } + + close_node(¤t_node, output); +done: + return strip(stream); +} + + +// long options +static struct option long_options[] = { + {"output", required_argument, 0, 'o'}, + {"help", no_argument, 0, '?'}, + {} +}; + +int verbose = 1; + +void usage(int argc, char ** args) { + printf("usage: %s [-o ] file [file...]\n", args[0]); + printf("--output\n"); + printf("-o\tThe output file to write the html to\n"); + printf("\tWhen missing this option, stdout is used instead\n"); + printf("--help Print this usage\n"); +} + +int main(int argc, char ** args) { + int i = 0; + FILE* output = NULL; + + // parse arguments + while((i = getopt_long(argc, args, "o:v", long_options, NULL)) != -1) { + switch(i) { + case 'o': + printf("output: %s\n", optarg); + output = fopen(optarg, "w"); + if (output == NULL) { + fprintf(stderr, "could not create file: %s\n", strerror(errno)); + return -1; + } + break; + case '?': + usage(argc, args); + return -1; + default: + // should not be reachable + abort(); + } + } + + if (output == NULL) { + output = stdout; + verbose = 0; + } + + char * filename= NULL; + + // parse all the files + for(i = optind;i < argc; i++) { + filename = args[argc-1]; + if (verbose) printf("starting conversion of %s\n", filename); + + FILE * handle = fopen(filename, "r"); + + if (handle == NULL) { + fprintf(stderr, "could not open \"%s\": %s\n", filename, strerror(errno)); + continue; + } + + int current = strip(handle); + + // parse the complete file + while((current = parse_node(current, handle, output)) != EOF); + + fclose(handle); + } + + if (output != stdout) fclose(output); + if (verbose) printf("done compiling\n"); + return 0; +} diff --git a/src/nhtml_string.c b/src/nhtml_string.c new file mode 100644 index 0000000..5786f13 --- /dev/null +++ b/src/nhtml_string.c @@ -0,0 +1,50 @@ +/* + * string.c + * + * Created on: 08.08.2017 + * Author: julian + */ + +#include "nhtml_string.h" +#include // needs: malloc, free +#include // needs: memcpy + +int string_append(string_t *str, char c) { + if (str->c_str == NULL) { + // new string, need to emit EOS + str->len++; + } + + char * new_ptr = realloc(str->c_str, str->len+1); + if (new_ptr == NULL) { + errno = ENOMEM; + return -1; + } + + // append char + str->c_str = new_ptr; + str->c_str[str->len-1] = c; + str->c_str[str->len] = 0; // make sure + str->len++; + return 0; +} + +void string_destroy(string_t s) { + free(s.c_str); +} + +string_t string_copy(string_t old) { + string_t tmp = {}; // initialize with 0 + + tmp.c_str = malloc(old.len); + if (tmp.c_str == NULL) { + // The Application will have to handle a out of mem + // situation here + return tmp; + } + + tmp.len = old.len; + memcpy(tmp.c_str, old.c_str, tmp.len); + return tmp; +} +