refactored code and ported project to cmake

This commit is contained in:
Julian Daube 2017-08-08 20:04:47 +02:00
parent 17da357871
commit 640a5edee2
10 changed files with 563 additions and 348 deletions

View File

@ -1,17 +0,0 @@
OBJ:= html.o main.o
OUTPUT:=nhtmlc
all: debug
debug: CXXFLAGS = -g -DDEBUG
debug: CFLAGS = -g -DDEBUG
debug: $(OBJ)
gcc $(OBJ) -g -o $(OUTPUT)
release: $(OBJ)
gcc $(OBJ) -o $(OUTPUT)
clean:
rm -rf $(OBJ) $(OUTPUT)

54
inc/attribute.h Normal file
View File

@ -0,0 +1,54 @@
/*
* attribute.h
*
* Created on: 08.08.2017
* Author: julian
*/
#ifndef ATTRIBUTE_H_
#define ATTRIBUTE_H_
#include <errno.h>
#include "nhtml_string.h"
typedef struct {
string_t name, value;
} attr_t;
/**
* \brief copy the give attribute
* \return a copy of attr
*/
attr_t attr_copy(attr_t * attr);
/**
* \brief reset the attribute
* Deletes all strings
*/
void attr_destroy(attr_t * attr);
typedef struct {
attr_t * arr;
size_t len;
} attr_set_t;
/**
* \brief search in attribute set for key
* \param set The Attribute set
* \param key The Key to search
* \return the pointer to the attribute in the set mathching the key
* \return NULL if there is no matching attribute
*/
attr_t * attr_set_find(attr_set_t *set, const char * key);
// Append new Attribute to set
/**
* \brief Append new attribute pair to set
* \return 0 on success
* \return -1 on failure (error can be found to errno)
*/
int attr_set_append(attr_set_t * set, attr_t *new_entry);
#endif /* ATTRIBUTE_H_ */

37
inc/html.h Normal file
View File

@ -0,0 +1,37 @@
/*
* html.h
*
* Created on: 07.08.2017
* Author: julian
*/
#ifndef HTML_H_
#define HTML_H_
#include <stdio.h> // needs FILE
#include "nhtml_string.h"
#include "attribute.h"
extern int html_escape(int c, FILE* output);
typedef struct node {
string_t name;
attr_set_t attributes;
} node_t;
/**
* \brief emit html opening tag for \node
* \param node the Node to create the opening tag for
* \param output the File to write to
*/
void open_node(node_t * node, FILE * output);
/**
* \brief emit html closing tag for \node
* \param node the Node to create the closing tag for
* \param output the File to write to
*/
void close_node(node_t * node, FILE * output);
#endif /* HTML_H_ */

40
inc/nhtml_string.h Normal file
View File

@ -0,0 +1,40 @@
/*
* nhtml_string.h
*
* Created on: 08.08.2017
* Author: julian
*/
#ifndef NHTML_STRING_H_
#define NHTML_STRING_H_
#include <stddef.h> // size_t
#include <errno.h> // errno
typedef struct {
char * c_str;
size_t len;
} string_t;
/**\brief append the char @c to @str
* \param str to the String to append to
* \param c the char to append
* \return -1 on error, errno will be set to errorcode
* \return 0 on success
*/
int string_append(string_t *str, char c);
/**
* \brief Erase the String from memory
*/
void string_destroy(string_t s);
/**
* \brief copy the contents of a string
* \param old The String to copy
* \return returns the new string
*/
string_t string_copy(string_t old);
#endif /* NHTML_STRING_H_ */

331
main.c
View File

@ -1,331 +0,0 @@
#include <stdio.h> // needs: fgetc, fputs, fopen, fprintf
#include <stdlib.h> // needs: realloc, malloc, free
#include <string.h> // needs: strcmp, memcpy
#include <errno.h>
#include <ctype.h> // needs: isspace
#include <getopt.h> // needs: getopt_long
#include "html.h" // needs: html_escaped
typedef struct {
char * c_str;
size_t len;
} string_t;
int string_append(string_t *str, char c) {
if (str->c_str == NULL) {
// new string, need to emit EOS
str->len++;
}
char * new_ptr = realloc(str->c_str, str->len+1);
if (new_ptr == NULL) {
return -1;
}
// append char
str->c_str = new_ptr;
str->c_str[str->len-1] = c;
str->c_str[str->len] = 0; // make sure
str->len++;
return 0;
}
void string_destroy(string_t s) {
free(s.c_str);
}
string_t string_copy(string_t old) {
string_t tmp;
tmp.c_str = malloc(old.len);
tmp.len = old.len;
memcpy(tmp.c_str, old.c_str, tmp.len);
return tmp;
}
typedef struct {
string_t name, value;
} attr_t;
typedef struct {
attr_t * arr;
size_t len;
} attr_set_t;
// linear search in set
// returns NULL on failure to find entry
attr_t * attr_set_find(attr_set_t *set, const char * name) {
size_t current = 0;
for (; current != set->len; ++current) {
if (strcmp(set->arr[current].name.c_str, name) == 0) {
return set->arr + current;
}
}
return NULL;
}
// Append new Attribute to set
int attr_set_append(attr_set_t * set, attr_t *new_entry) {
if (new_entry->name.c_str== NULL) return -1; // reject empty entries
// search first
attr_t * new_ptr = attr_set_find(set, new_entry->name.c_str);
if (new_ptr != NULL) {
// already contained in set
// just change entries value
string_destroy(new_ptr->value);
new_ptr->value = string_copy(new_entry->value);
return 0;
}
new_ptr = realloc(set->arr, (set->len+1)*sizeof(attr_t));
if (new_ptr == NULL) {
return -1;
}
// append and quit
set->arr = new_ptr;
set->arr[set->len] = *new_entry;
set->len++;
return 0;
}
void attr_reset(attr_t * attr) {
memset(attr, 0, sizeof(attr_t));
}
int strip(FILE * stream) {
int current = 0;
while((current = fgetc(stream)) != EOF) {
if (!isspace(current))
break;
}
return current;
}
int parse_attr(FILE * stream, attr_set_t * output) {
attr_t current_attr = {};
#ifdef DEBUG
printf("parse_attr\n");
#endif
int buffer = 0;
unsigned char isKey = 1;
while((buffer = fgetc(stream)) != EOF) {
// parse key=value pairs
// check for delim
//if (buffer == ' ' || buffer == '\t' || buffer == '\n'){
if (isspace(buffer)) {
attr_set_append(output, &current_attr);
#ifdef DEBUG
printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value);
#endif
// reset attribute
attr_reset(&current_attr);
isKey = 1;
continue;
}
if (isKey && buffer == '.') {
isKey = 0;
current_attr.name.c_str = "class";
current_attr.name.len = 6;
continue;
}
if (buffer == '=') {
isKey = 0;
continue;
} else
if (buffer == ']') {
break;
}
if (isKey) {
string_append(&current_attr.name, buffer);
} else {
string_append(&current_attr.value, buffer);
}
}
// append last attribute
attr_set_append(output, &current_attr);
#ifdef DEBUG
printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value);
#endif
//memset(output, 0, sizeof(attr_set_t));
return strip(stream);
}
int parse_text(int mode, FILE * stream, FILE * output) {
#ifdef DEBUG
printf("parse text\n");
#endif
int buffer = 0;
char escaped = 0;
while((buffer = fgetc(stream)) != EOF) {
if (!escaped && buffer == '\\') {
escaped = 1;
continue;
}
if (!escaped && buffer == mode) {
break;
}
escaped = 0;
if (mode == '"' && html_escape(buffer, output)) {
continue;
}
fputc(buffer, output);
}
//fprintf(stderr, "stub: parse_text\n");
return strip(stream);
}
typedef struct node {
string_t name;
attr_set_t attributes;
} node_t;
// write out node
void open_node(node_t * node, FILE * output) {
// check for empty node (text mostly)
if (!node->name.c_str) return;
// start by writing tag
fputc('<', output);
// follow with tag name
fputs(node->name.c_str,output);
// add attributes
size_t i = 0;
attr_t * current = node->attributes.arr;
for (; i < node->attributes.len; i++,current++) {
if (current->value.c_str)
fprintf(output, " %s=\"%s\"", current->name.c_str, current->value.c_str);
else {
fputc(' ', output);
fputs(current->name.c_str, output);
}
}
// close tag
fputc('>', output);
}
void close_node(node_t * node, FILE * output) {
if (node->name.c_str) {
fprintf(output, "</%s>", node->name.c_str);
}
}
int readName(FILE *stream, node_t *node) {
int current = strip(stream);
if (current == EOF) {
return current;
}
do {
if (current == '{' || current == '[') {
break;
}
if (current == ' ' || current == '\t' || current == '\n') {
current = strip(stream);
break;
}
string_append(&node->name, current);
} while((current = fgetc(stream)) != EOF);
return current;
}
enum node_type {
NODE_SELFCLOSING,
NODE_TEXT,
NODE_TAG
};
int parse_node(int current, FILE * stream, FILE * output) {
if (current == '"' || current == '(') {
if (current == '(') current = ')';
return parse_text(current, stream, output);
}
// normal node
node_t current_node = {};
string_append(&current_node.name, current);
current = readName(stream, &current_node);
if (current == EOF) {
goto done;
}
#ifdef DEBUG
printf("parse_node: %s\n", current_node.name.c_str);
#endif
if (current == '[') {
current = parse_attr(stream, &current_node.attributes);
}
if (current != '{') {
// tag is selfclosing
open_node(&current_node, output);
return current;
}
current = strip(stream);
open_node(&current_node, output);
while(current != '}' && current != EOF) {
current = parse_node(current, stream, output);
}
close_node(&current_node, output);
done:
return strip(stream);
}
int main(int argc, char ** args) {
//getopt_long(argc, args, NULL, NULL, NULL);
char * filename= NULL;
FILE * output = fopen("a.html", "w");
if (output == NULL) {
fprintf(stderr, "could not create output file\n");
return -1;
}
for(;argc > 1; --argc) {
filename = args[argc-1];
printf("starting conversion of %s\n", filename);
FILE * handle = fopen(filename, "r");
if (handle == NULL) {
fprintf(stderr, "could not open \"%s\": %s\n", filename, strerror(errno));
continue;
}
int current = strip(handle);
while((current = parse_node(current, handle, output)) != EOF) {
// if () {
// fprintf(stderr, "error during parsing of node\n");
// break;
// }
}
fclose(handle);
}
fclose(output);
printf("done compiling\n");
return 0;
}

8
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,8 @@
set(pwd ${CMAKE_CURRENT_SOURCE_DIR})
set(SOURCE
${SOURCE}
${pwd}/main.c
${pwd}/nhtml_string.c
${pwd}/attribute.c
${pwd}/html.c
PARENT_SCOPE)

68
src/attribute.c Normal file
View File

@ -0,0 +1,68 @@
/*
* attribute.c
*
* Created on: 08.08.2017
* Author: julian
*/
#include "attribute.h"
#include <string.h> // needs: strcmp
#include <stdlib.h> // needs: realloc
// linear search in set
// returns NULL on failure to find entry
attr_t * attr_set_find(attr_set_t *set, const char * name) {
size_t current = 0;
for (; current != set->len; ++current) {
if (strcmp(set->arr[current].name.c_str, name) == 0) {
return set->arr + current;
}
}
return NULL;
}
// Append new Attribute to set
int attr_set_append(attr_set_t * set, attr_t *new_entry) {
if (new_entry->name.c_str== NULL) return -1; // reject empty entries
// search first
attr_t * new_ptr = attr_set_find(set, new_entry->name.c_str);
if (new_ptr != NULL) {
// already contained in set
// just change entries value
string_destroy(new_ptr->value);
new_ptr->value = string_copy(new_entry->value);
return 0;
}
new_ptr = realloc(set->arr, (set->len+1)*sizeof(attr_t));
if (new_ptr == NULL) {
return -1;
}
// append and quit
set->arr = new_ptr;
set->arr[set->len] = attr_copy(new_entry);
set->len++;
return 0;
}
attr_t attr_copy(attr_t * attr) {
attr_t temp;
temp.name = string_copy(attr->name);
temp.value = string_copy(attr->value);
return temp;
}
void attr_destroy(attr_t * attr) {
// clear memory
string_destroy(attr->name);
string_destroy(attr->value);
// reset memory content
memset(attr, 0, sizeof(attr_t));
}

71
src/html.c Normal file
View File

@ -0,0 +1,71 @@
/*
* html.c
*
* Created on: 07.08.2017
* Author: julian
*/
#include "html.h"
int html_escape(int c, FILE * output) {
switch(c) {
case '<':
fprintf(output, "&lt;");
goto escaped;
case '>':
fprintf(output, "&gt;");
goto escaped;
case '&':
fprintf(output, "&amp;");
goto escaped;
case '"':
fprintf(output, "&quot;");
goto escaped;
/*case ' ':
fprintf(output, "&nbsp;");
goto escaped;*/
case '\n':
fprintf(output, "<br/>");
goto escaped;
}
return 0;
escaped:
return 1;
}
// write out node
void open_node(node_t * node, FILE * output) {
// check for empty node (text mostly)
if (!node->name.c_str) return;
// start by writing tag
fputc('<', output);
// follow with tag name
fputs(node->name.c_str,output);
// add attributes
size_t i = 0;
attr_t * current = node->attributes.arr;
for (; i < node->attributes.len; i++,current++) {
if (current->value.c_str)
fprintf(output, " %s=\"%s\"", current->name.c_str, current->value.c_str);
else {
fputc(' ', output);
fputs(current->name.c_str, output);
}
}
// close tag
fputc('>', output);
}
void close_node(node_t * node, FILE * output) {
if (node->name.c_str) {
fprintf(output, "</%s>", node->name.c_str);
}
}

235
src/main.c Normal file
View File

@ -0,0 +1,235 @@
#include <stdio.h> // needs: fgetc, fputs, fopen, fprintf
#include <stdlib.h> // needs: abort
#include <errno.h>
#include <string.h> // needs: sterror
#include <ctype.h> // needs: isspace
#include <getopt.h> // needs: getopt_long
// Project specific includes
#include "html.h"
#include "attribute.h"
#include "nhtml_string.h"
int strip(FILE * stream) {
int current = 0;
while((current = fgetc(stream)) != EOF) {
if (!isspace(current))
break;
}
return current;
}
int parse_attr(FILE * stream, attr_set_t * output) {
attr_t current_attr = {};
#ifdef DEBUG
printf("parse_attr\n");
#endif
int buffer = 0;
unsigned char isKey = 1;
while((buffer = fgetc(stream)) != EOF) {
// parse key=value pairs
// check for delim
//if (buffer == ' ' || buffer == '\t' || buffer == '\n'){
if (isspace(buffer)) {
attr_set_append(output, &current_attr);
#ifdef DEBUG
printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value);
#endif
// reset attribute
attr_destroy(&current_attr);
isKey = 1;
continue;
}
if (isKey && buffer == '.') {
isKey = 0;
current_attr.name.c_str = "class";
current_attr.name.len = 6;
continue;
}
if (buffer == '=') {
isKey = 0;
continue;
} else
if (buffer == ']') {
break;
}
if (isKey) {
string_append(&current_attr.name, buffer);
} else {
string_append(&current_attr.value, buffer);
}
}
// append last attribute
attr_set_append(output, &current_attr);
#ifdef DEBUG
printf("parsed attr: %s=%s\n", current_attr.name.c_str, current_attr.value);
#endif
//memset(output, 0, sizeof(attr_set_t));
return strip(stream);
}
int parse_text(int end_char, FILE * stream, FILE * output) {
#ifdef DEBUG
printf("parse text\n");
#endif
int buffer = 0;
char escaped = 0;
while((buffer = fgetc(stream)) != EOF) {
if (!escaped && buffer == '\\') {
escaped = 1;
continue;
}
if (!escaped && buffer == end_char) {
break;
}
escaped = 0;
if (end_char == '"' && html_escape(buffer, output)) {
continue;
}
fputc(buffer, output);
}
return strip(stream);
}
int readName(FILE *stream, node_t *node) {
int current = strip(stream);
if (current == EOF) {
return current;
}
do {
if (current == '{' || current == '[') {
break;
}
if (current == ' ' || current == '\t' || current == '\n') {
current = strip(stream);
break;
}
string_append(&node->name, current);
} while((current = fgetc(stream)) != EOF);
return current;
}
int parse_node(int current, FILE * stream, FILE * output) {
if (current == '"' || current == '(') {
if (current == '(') current = ')';
return parse_text(current, stream, output);
}
// normal node
node_t current_node = {};
string_append(&current_node.name, current);
current = readName(stream, &current_node);
if (current == EOF) {
goto done;
}
#ifdef DEBUG
printf("parse_node: %s\n", current_node.name.c_str);
#endif
if (current == '[') {
current = parse_attr(stream, &current_node.attributes);
}
if (current != '{') {
// tag is selfclosing
open_node(&current_node, output);
return current;
}
current = strip(stream);
open_node(&current_node, output);
while(current != '}' && current != EOF) {
current = parse_node(current, stream, output);
}
close_node(&current_node, output);
done:
return strip(stream);
}
// long options
static struct option long_options[] = {
{"output", required_argument, 0, 'o'},
{"help", no_argument, 0, '?'},
{}
};
int verbose = 1;
void usage(int argc, char ** args) {
printf("usage: %s [-o <filename>] file [file...]\n", args[0]);
printf("--output\n");
printf("-o\tThe output file to write the html to\n");
printf("\tWhen missing this option, stdout is used instead\n");
printf("--help Print this usage\n");
}
int main(int argc, char ** args) {
int i = 0;
FILE* output = NULL;
// parse arguments
while((i = getopt_long(argc, args, "o:v", long_options, NULL)) != -1) {
switch(i) {
case 'o':
printf("output: %s\n", optarg);
output = fopen(optarg, "w");
if (output == NULL) {
fprintf(stderr, "could not create file: %s\n", strerror(errno));
return -1;
}
break;
case '?':
usage(argc, args);
return -1;
default:
// should not be reachable
abort();
}
}
if (output == NULL) {
output = stdout;
verbose = 0;
}
char * filename= NULL;
// parse all the files
for(i = optind;i < argc; i++) {
filename = args[argc-1];
if (verbose) printf("starting conversion of %s\n", filename);
FILE * handle = fopen(filename, "r");
if (handle == NULL) {
fprintf(stderr, "could not open \"%s\": %s\n", filename, strerror(errno));
continue;
}
int current = strip(handle);
// parse the complete file
while((current = parse_node(current, handle, output)) != EOF);
fclose(handle);
}
if (output != stdout) fclose(output);
if (verbose) printf("done compiling\n");
return 0;
}

50
src/nhtml_string.c Normal file
View File

@ -0,0 +1,50 @@
/*
* string.c
*
* Created on: 08.08.2017
* Author: julian
*/
#include "nhtml_string.h"
#include <stdlib.h> // needs: malloc, free
#include <memory.h> // needs: memcpy
int string_append(string_t *str, char c) {
if (str->c_str == NULL) {
// new string, need to emit EOS
str->len++;
}
char * new_ptr = realloc(str->c_str, str->len+1);
if (new_ptr == NULL) {
errno = ENOMEM;
return -1;
}
// append char
str->c_str = new_ptr;
str->c_str[str->len-1] = c;
str->c_str[str->len] = 0; // make sure
str->len++;
return 0;
}
void string_destroy(string_t s) {
free(s.c_str);
}
string_t string_copy(string_t old) {
string_t tmp = {}; // initialize with 0
tmp.c_str = malloc(old.len);
if (tmp.c_str == NULL) {
// The Application will have to handle a out of mem
// situation here
return tmp;
}
tmp.len = old.len;
memcpy(tmp.c_str, old.c_str, tmp.len);
return tmp;
}