gpubValidator/main.go

287 lines
6.6 KiB
Go

package main
import (
"fmt"
"os"
"archive/zip"
"bufio"
"strings"
"time"
"strconv"
"path"
"mime"
)
type BookInfo struct {
Title string
GpubVersion string
Index string
Author string
Language string
Charset string
Description string
Published string
PublishDate string
RevisionDate string
Copyright string
License string
Version string
Cover string
}
var fileSystem map[string]*zip.File
var bookInfo BookInfo
func main() {
if len(os.Args) < 2 {
fmt.Println("USAGE:", os.Args[0], "filename")
os.Exit(1)
}
fmt.Println("Opening", os.Args[1])
file, err := zip.OpenReader(os.Args[1])
if err != nil {
fmt.Println("Failed to open", os.Args[1], err)
}
defer file.Close()
fileSystem = make(map[string]*zip.File)
if !validate(&file.Reader) {
fmt.Println(os.Args[1], "could not be validated...")
os.Exit(1)
} else {
fmt.Println(os.Args[1], "has been validated")
printBookInfo()
}
}
func printBookInfo() {
if bookInfo.Title != "" {
fmt.Println("Title:", bookInfo.Title)
}
if bookInfo.GpubVersion != "" {
fmt.Println("GpubVersion:", bookInfo.GpubVersion)
}
}
func validate(archive *zip.Reader) bool {
isThereMetadata := false
for _, f := range archive.File { // looking for metadata first, and put all files in map
if f.Name == "metadata.txt" {
if !isValidMetadata(f) {
fmt.Println("FATAL:", f.Name,"present but not valid")
return false
}
isThereMetadata = true
}
fileSystem[f.Name] = f
}
if isThereMetadata {
if bookInfo.Index == "" {
fmt.Println("WARNING: No index provided, checking default index.gmi")
return isValidContent("index.gmi")
} else {
return isValidContent(path.Join(bookInfo.Index))
}
} else {
fmt.Println("WARNING: This is not a book but a capsule archive")
f, ok := fileSystem["index.gmi"]
if !ok {
fmt.Println("FATAL: This is a capsule archive but index.gmi is not present at root")
return false
}
if f.FileHeader.FileInfo().IsDir() {
fmt.Println("FATAL:", f.Name, "is a directory")
return false
}
return true
}
}
func isValidMetadata(file *zip.File) bool {
isThereTitle := false
isThereGpubVersion := false
if file.FileHeader.FileInfo().IsDir() {
fmt.Println("FATAL:", file.Name, "is a directory !")
return false
}
reader, err := file.Open()
if err != nil {
fmt.Println("FATAL:", file.Name, "can't be opened")
return false
}
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if strings.Trim(line, " ") != "" {
columnPosition := strings.Index(line, ":")
if columnPosition == -1 {
fmt.Println("FATAL: invalid line in", file.Name, line)
return false
}
key := strings.Trim(line[:columnPosition], " ")
value := strings.Trim(line[columnPosition+1:], " ")
switch key {
case "title":
bookInfo.Title = value
isThereTitle = true
case "gpubVersion":
bookInfo.GpubVersion = value
isThereGpubVersion = true
case "index":
bookInfo.Index = value
case "author":
bookInfo.Author = value
case "language":
//todo is language tag valid ? BCP 47
bookInfo.Language = value
case "charset":
bookInfo.Charset = value
case "description":
bookInfo.Description = value
case "published":
_, err := strconv.ParseUint(value, 10, 32)
if err != nil {
fmt.Println("FATAL: Bad format in", key, err)
return false
}
bookInfo.Published = value
case "publishDate":
_, err := time.Parse(time.RFC3339, value)
if err != nil {
fmt.Println("FATAL: Bad date format in", key, err)
return false
}
bookInfo.PublishDate = value
case "revisionDate":
_, err := time.Parse(time.RFC3339, value)
if err != nil {
fmt.Println("FATAL: Bad date format in", key, err)
return false
}
bookInfo.RevisionDate = value
case "copyright":
bookInfo.Copyright = value
case "license":
bookInfo.License = value
case "version":
bookInfo.Version = value
case "cover":
bookInfo.Cover = value
default:
fmt.Println("FATAL: Unknown key in", file.Name, key)
return false
}
}
}
if err := scanner.Err(); err != nil {
fmt.Println("FATAL: error while scanning", file.Name, err)
return false
}
if !isThereTitle { // only two fields are mandatory
fmt.Println("FATAL: No title provided !")
return false
} else if !isThereGpubVersion {
fmt.Println("FATAL: No GpubVersion provided !")
return false
} else { // the last thing we need to test is version validity
return isVersionValid()
}
}
func isValidContent(filePath string) bool {
if filePath == "" || filePath == "/" { //we want index
filePath = "index.gmi"
} else if strings.HasPrefix(filePath, "/") { //we must ingore the leading slash
filePath = filePath[1:]
}
file, ok := fileSystem[filePath]
if !ok {
fmt.Println("FATAL:", filePath, "doesn't exist !")
return false
}
if file.FileHeader.FileInfo().IsDir() { //this is a dir, we look at index.gmi
return isValidContent(filePath + "index.gmi")
}
reader, err := file.Open()
if err != nil {
fmt.Println("FATAL:", file.Name, "can't be opened")
return false
}
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
// we only check links
if strings.HasPrefix(scanner.Text(), "=>") && len(scanner.Text()) > 2 {
link := strings.Trim(scanner.Text()[2:], " ")
sepPosition := strings.Index(link, " ")
var location, text string
if sepPosition == -1 {
location = link
text = ""
} else {
location = strings.Trim(link[:sepPosition], " ")
text = strings.Trim(link[sepPosition+1:], " ")
}
// transform location to have absolute gpub path
if strings.HasPrefix(location, "/") { // if begin with / we begin at root but without leading / and simplified (.. and .)
location = path.Join(location[1:])
} else { // else join this file dir and link location
location = path.Join(path.Dir(filePath), location)
}
if isImage(location) {
if text == "" {
fmt.Println("FATAL: image without alternative text in", file.Name, scanner.Text())
return false
}
}
if isGemtext(location) {
if !isValidContent(location) {
return false
}
}
}
}
if err := scanner.Err(); err != nil {
fmt.Println("FATAL: error while scanning", file.Name, err)
return false
}
return true
}
func isVersionValid() bool {
return bookInfo.GpubVersion == "1.0.0"
}
func isImage(file string) bool {
ext := file[strings.LastIndex(file, "."):]
return strings.HasPrefix(mime.TypeByExtension(ext), "image")
}
func isGemtext(file string) bool {
ext := file[strings.LastIndex(file, "."):]
return ext == ".gmi" || ext == ".gemini"
}