From 0079d83fb4b9f622a93d7457e8908a0b5b17fe06 Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 4 Feb 2017 14:58:33 +1100 Subject: [PATCH 01/13] wikitranslate now takes the input from the OS args --- README.md | 34 ++++++++++++++++++++++++++++++++++ main.go | 28 +++++++++++++++++++++------- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index a322227..717c462 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,37 @@ CAT tools to understand the difference between the segment text (that needs to be translated) and formatting. `wikitranslate` converts the wiki markup into pseudo-HTML that can be ingested and translated. The result HTML can then be converted back into wiki markup to be uploaded as a new page. + +Install and Usage +================= + +Download the latest binary from the +[releases page](https://github.com/elliotchance/wikitranslate/releases). + +*Note:* You will not be able to open the file downloaded. You will need to open +the Terminal and use the following commands: + +To prepare a wiki page for translating, provide the URL: + +```bash +wikitranslate https://en.wikipedia.org/wiki/Staffordshire_Bull_Terrier +``` + +This will generate a `Staffordshire_Bull_Terrier.html` in your Downloads folder. +This is the documentation you upload or import to you CAT tools. + +--- + +Once the translation is complete you will need to download or export the new +HTML document and use `wikitranslate` to convert it back into the wiki markup: + +```bash +wikitranslate Staffordshire_Bull_Terrier.html +``` + +*Tip:* You can drag the file into the Terminal to insert the full path to the +HTML document. + +This will generate a `Staffordshire_Bull_Terrier.html.txt` in the same folder as +`Staffordshire_Bull_Terrier.html`. You can now open the text file to get the +wiki markup for submission. diff --git a/main.go b/main.go index aca5599..af41fc0 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "bufio" "html" "io/ioutil" + "os/user" ) func ReplaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { @@ -366,12 +367,17 @@ func WikiToHtml(wikimarkup string) string { } func main() { - fmt.Printf("Wiki URL or File: ") - reader := bufio.NewReader(os.Stdin) - input, _ := reader.ReadString('\n') + if len(os.Args) < 2 { + fmt.Printf("Usage: %v \n\n", os.Args[0]) + fmt.Printf("Examples:\n %v https://en.wikipedia.org/wiki/Staffordshire_Bull_Terrier\n", os.Args[0]) + fmt.Printf(" %v Staffordshire_Bull_Terrier.html\n\n", os.Args[0]) + return + } + + input := os.Args[1] - if input[0] == 'h' { - fmt.Printf("Downloading page...") + if strings.HasPrefix(input, "http") { + fmt.Printf("Downloading page... ") tokens := strings.Split(input, "/") title := strings.TrimSpace(tokens[len(tokens) - 1]) @@ -390,9 +396,17 @@ func main() { re := regexp.MustCompile("(?s)(.*)") wikimarkup := html.UnescapeString(re.FindStringSubmatch(content)[1]) - fmt.Printf(" Done\n") + usr, err := user.Current() + if err != nil { + panic(err) + } + + destinationFolder := usr.HomeDir + "/Downloads" + destinationPath := destinationFolder + "/" + title + ".html" + + fmt.Printf(" Done\nThe file has been created at: " + destinationPath + "\n") - fileHandle, _ := os.Create(title + ".html") + fileHandle, _ := os.Create(destinationPath) writer := bufio.NewWriter(fileHandle) defer fileHandle.Close() From 2100a8867f7eb23b808f2e3d57b7471224eadfbe Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 4 Feb 2017 15:01:02 +1100 Subject: [PATCH 02/13] Apply go fmt --- main.go | 790 +++++++++++++++++++++++++-------------------------- main_test.go | 276 +++++++++--------- 2 files changed, 533 insertions(+), 533 deletions(-) diff --git a/main.go b/main.go index af41fc0..c9c33a5 100644 --- a/main.go +++ b/main.go @@ -1,433 +1,433 @@ package main import ( - "fmt" - "regexp" - "strings" - "encoding/base64" - "strconv" - "net/http" - "bytes" - "os" - "bufio" - "html" - "io/ioutil" - "os/user" + "bufio" + "bytes" + "encoding/base64" + "fmt" + "html" + "io/ioutil" + "net/http" + "os" + "os/user" + "regexp" + "strconv" + "strings" ) func ReplaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { - result := "" - lastIndex := 0 + result := "" + lastIndex := 0 - for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { - groups := []string{} - for i := 0; i < len(v); i += 2 { - groups = append(groups, str[v[i]:v[i+1]]) - } + for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { + groups := []string{} + for i := 0; i < len(v); i += 2 { + groups = append(groups, str[v[i]:v[i+1]]) + } - result += str[lastIndex:v[0]] + repl(groups) - lastIndex = v[1] - } + result += str[lastIndex:v[0]] + repl(groups) + lastIndex = v[1] + } - return result + str[lastIndex:] + return result + str[lastIndex:] } func BalanceHtmlTags(html string) string { - parts := []string{} - result := "" - split1 := strings.Split(html, "<") - for _, p := range split1 { - parts = append(parts, strings.Split(p, ">")...) - } - - stack := []string{} - for i := 0; i < len(parts) - 1; i += 2 { - result += parts[i] - - if /*len(parts[i + 1]) > 0 &&*/ parts[i + 1][0] == '/' { - for j := len(stack) - 1; j >= 0; j-- { - s := stack[j] - stack = stack[:len(stack) - 1] - result += fmt.Sprintf("", s) - if parts[i + 1] == ("/" + s) { - break - } - } - } else { - tagParts := strings.Split(parts[i + 1], " ") - stack = append(stack, tagParts[0]) - result += fmt.Sprintf("<%v>", parts[i + 1]) - } - } - - result += parts[len(parts) - 1] - - // Anything left on the stack has to be closed. - for _, s := range stack { - result += fmt.Sprintf("", s) - } - - return result + parts := []string{} + result := "" + split1 := strings.Split(html, "<") + for _, p := range split1 { + parts = append(parts, strings.Split(p, ">")...) + } + + stack := []string{} + for i := 0; i < len(parts)-1; i += 2 { + result += parts[i] + + if /*len(parts[i + 1]) > 0 &&*/ parts[i+1][0] == '/' { + for j := len(stack) - 1; j >= 0; j-- { + s := stack[j] + stack = stack[:len(stack)-1] + result += fmt.Sprintf("", s) + if parts[i+1] == ("/" + s) { + break + } + } + } else { + tagParts := strings.Split(parts[i+1], " ") + stack = append(stack, tagParts[0]) + result += fmt.Sprintf("<%v>", parts[i+1]) + } + } + + result += parts[len(parts)-1] + + // Anything left on the stack has to be closed. + for _, s := range stack { + result += fmt.Sprintf("", s) + } + + return result } func HtmlToWiki(html string) string { - re := regexp.MustCompile(`(.*?)`) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - r := fmt.Sprintf(`[[File:%v`, groups[1]) - if groups[2] != "" || groups[3] != "" { - r += "|" + groups[2] - } - if groups[3] != "" || groups[4] != "" { - r += "|" + groups[4] - - if groups[3] != "" { - r += "link=" + groups[3] - } - } - return r + "]]" - }) - - re = regexp.MustCompile(`(.+?)`) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - if groups[1] == groups[2] { - return fmt.Sprintf(`[[%v]]`, groups[1]) - } - - return fmt.Sprintf(`[[%v|%v]]`, groups[1], groups[2]) - }) - - html = strings.Replace(html, "", "'''''", -1) - html = strings.Replace(html, "", "'''''", -1) - - html = strings.Replace(html, "", "'''", -1) - html = strings.Replace(html, "", "'''", -1) - html = strings.Replace(html, "", "''", -1) - html = strings.Replace(html, "", "''", -1) - - html = strings.Replace(html, "
  • ", "*", -1) - html = strings.Replace(html, "
  • ", "", -1) - html = strings.Replace(html, "", "#", -1) - html = strings.Replace(html, "", "", -1) - - re = regexp.MustCompile(`(.+?)`) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - level, err := strconv.Atoi(groups[1]) - if err != nil { - panic(err) - } - - return strings.Repeat("=", level) + groups[2] + strings.Repeat("=", level) - }) - - re = regexp.MustCompile(``) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - if groups[2] == "" { - return fmt.Sprintf(`{{%v}}`, groups[1]) - } - - re = regexp.MustCompile(`(.*?)`) - result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { - if groups[1] == "" { - return fmt.Sprintf(`|%v`, groups[2]) - } - - return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) - }) - - return fmt.Sprintf(`{{%v%v}}`, groups[1], result) - }) - - re = regexp.MustCompile(`(?s)(.*?)`) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - result := "{|" - - re = regexp.MustCompile(`(?s)\n(.*?)\n`) - result += ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { - re = regexp.MustCompile(`(?s)(.*?)`) - return "|-" + groups[1] + "\n" + ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { - if groups[1] == "d" { - return "|" + groups[3] - } - return "!" + groups[3] - }) - }) - - return result + "|}" - }) - - re = regexp.MustCompile(``) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - decoded, err := base64.StdEncoding.DecodeString(groups[1]) - if err != nil { - panic(err) - } - - return fmt.Sprintf(`%v`, groups[2], string(decoded)) - }) - - re = regexp.MustCompile(``) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - decoded, err := base64.StdEncoding.DecodeString(groups[1]) - if err != nil { - panic(err) - } - - return fmt.Sprintf(`%v`, groups[2], string(decoded)) - }) - - return html + re := regexp.MustCompile(`(.*?)`) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + r := fmt.Sprintf(`[[File:%v`, groups[1]) + if groups[2] != "" || groups[3] != "" { + r += "|" + groups[2] + } + if groups[3] != "" || groups[4] != "" { + r += "|" + groups[4] + + if groups[3] != "" { + r += "link=" + groups[3] + } + } + return r + "]]" + }) + + re = regexp.MustCompile(`(.+?)`) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + if groups[1] == groups[2] { + return fmt.Sprintf(`[[%v]]`, groups[1]) + } + + return fmt.Sprintf(`[[%v|%v]]`, groups[1], groups[2]) + }) + + html = strings.Replace(html, "", "'''''", -1) + html = strings.Replace(html, "", "'''''", -1) + + html = strings.Replace(html, "", "'''", -1) + html = strings.Replace(html, "", "'''", -1) + html = strings.Replace(html, "", "''", -1) + html = strings.Replace(html, "", "''", -1) + + html = strings.Replace(html, "
  • ", "*", -1) + html = strings.Replace(html, "
  • ", "", -1) + html = strings.Replace(html, "", "#", -1) + html = strings.Replace(html, "", "", -1) + + re = regexp.MustCompile(`(.+?)`) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + level, err := strconv.Atoi(groups[1]) + if err != nil { + panic(err) + } + + return strings.Repeat("=", level) + groups[2] + strings.Repeat("=", level) + }) + + re = regexp.MustCompile(``) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + if groups[2] == "" { + return fmt.Sprintf(`{{%v}}`, groups[1]) + } + + re = regexp.MustCompile(`(.*?)`) + result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + if groups[1] == "" { + return fmt.Sprintf(`|%v`, groups[2]) + } + + return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) + }) + + return fmt.Sprintf(`{{%v%v}}`, groups[1], result) + }) + + re = regexp.MustCompile(`(?s)(.*?)`) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + result := "{|" + + re = regexp.MustCompile(`(?s)\n(.*?)\n`) + result += ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + re = regexp.MustCompile(`(?s)(.*?)`) + return "|-" + groups[1] + "\n" + ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + if groups[1] == "d" { + return "|" + groups[3] + } + return "!" + groups[3] + }) + }) + + return result + "|}" + }) + + re = regexp.MustCompile(``) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + decoded, err := base64.StdEncoding.DecodeString(groups[1]) + if err != nil { + panic(err) + } + + return fmt.Sprintf(`%v`, groups[2], string(decoded)) + }) + + re = regexp.MustCompile(``) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + decoded, err := base64.StdEncoding.DecodeString(groups[1]) + if err != nil { + panic(err) + } + + return fmt.Sprintf(`%v`, groups[2], string(decoded)) + }) + + return html } func WikiToHtml(wikimarkup string) string { - re := regexp.MustCompile(`(.*?)`) - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - encoded := base64.StdEncoding.EncodeToString([]byte(groups[2])) - return fmt.Sprintf(``, encoded, groups[1]) - }) - - re = regexp.MustCompile(`(.*?)`) - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - encoded := base64.StdEncoding.EncodeToString([]byte(groups[2])) - return fmt.Sprintf(``, encoded, groups[1]) - }) - - re = regexp.MustCompile("{{([^|}]+)\\|?(.*?)}}") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - r := `" - }) - - re = regexp.MustCompile("'''(.+?)'''") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - return "" + groups[1] + "" - }) - - re = regexp.MustCompile("''(.+?)''") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - return "" + groups[1] + "" - }) - - re = regexp.MustCompile("\\[\\[(.+?)\\]\\]") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - // File: - if strings.HasPrefix(groups[1], "File:") { - parts := strings.SplitN(groups[1], "|", 3) - if len(parts) == 1 { - parts = append(parts, "", "") - } else if len(parts) == 2 { - parts = append(parts, "") - } - - link := "" - if strings.HasPrefix(parts[2], "link=") { - link = parts[2][5:] - parts[2] = "" - } - - return fmt.Sprintf(`%v`, parts[0][5:], parts[1], link, parts[2]) - } - - // Else - parts := strings.SplitN(groups[1], "|", 2) - if len(parts) == 1 { - return fmt.Sprintf(`%v`, parts[0], parts[0]) - } else { - return fmt.Sprintf(`%v`, parts[0], parts[1]) - } - }) - - re = regexp.MustCompile("\\[(.{10,}?)\\]") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - parts := strings.SplitN(groups[1], " ", 2) - if len(parts) == 1 { - return "" + groups[1] + "" - } else { - return fmt.Sprintf(`%v`, parts[0], parts[1]) - } - }) - - // Headings - for i := 6; i >= 1; i-- { - re = regexp.MustCompile("(^|\\s)" + strings.Repeat("=", i) + "(.+?)" + strings.Repeat("=", i)) - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - return fmt.Sprintf("%v%v", groups[1], i, groups[2], i) - }) - } - - // Bullet points/lists - re = regexp.MustCompile("(?m)^([*#])([^\\n]+)") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - if groups[1] == "*" { - return fmt.Sprintf("
  • %v
  • ", groups[2]) - } - - return fmt.Sprintf("%v", groups[2]) - }) - - // Images - // Raw URLs - - // Tables - re = regexp.MustCompile("(?s){\\|([^\\n]*)(.*?)\\|}") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(table_groups []string) string { - table := "\n" - - lines := strings.Split(table_groups[2], "\n") - - inRow := false - printedTr := false - for _, line := range lines { - if strings.HasPrefix(line, "|-") { - printedTr = true - if inRow { - table += "\n" - } - table += "\n" - inRow = true - continue - } - - if strings.HasPrefix(line, "|") { - if !printedTr { - printedTr = true - inRow = true - table += "\n" - } - - parts := strings.Split(line[1:], "|") - style := "" - body := "" - - if len(parts) > 1 { - style = parts[0] - body = parts[1] - } else { - body = parts[0] - } - - bodyParts := strings.Split(body, "!!") - - for _, bodyPart := range bodyParts { - table += "\n" - } - - continue - } - - if strings.HasPrefix(line, "!") { - if !printedTr { - printedTr = true - inRow = true - table += "\n" - } - - parts := strings.Split(line[1:], "|") - style := "" - body := "" - - if len(parts) > 1 { - style = parts[0] - body = parts[1] - } else { - body = parts[0] - } - - bodyParts := strings.Split(body, "!!") - - for _, bodyPart := range bodyParts { - table += "\n" - } - - continue - } - } - table += "\n" - - return table + "
    " + bodyPart + "
    " + bodyPart + "
    " - }) - - // Random unbalanced left overs - wikimarkup = strings.Replace(wikimarkup, "'''", "", -1) - wikimarkup = strings.Replace(wikimarkup, "''", "", -1) - - return BalanceHtmlTags(wikimarkup) + re := regexp.MustCompile(`(.*?)`) + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + encoded := base64.StdEncoding.EncodeToString([]byte(groups[2])) + return fmt.Sprintf(``, encoded, groups[1]) + }) + + re = regexp.MustCompile(`(.*?)`) + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + encoded := base64.StdEncoding.EncodeToString([]byte(groups[2])) + return fmt.Sprintf(``, encoded, groups[1]) + }) + + re = regexp.MustCompile("{{([^|}]+)\\|?(.*?)}}") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + r := `" + }) + + re = regexp.MustCompile("'''(.+?)'''") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + return "" + groups[1] + "" + }) + + re = regexp.MustCompile("''(.+?)''") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + return "" + groups[1] + "" + }) + + re = regexp.MustCompile("\\[\\[(.+?)\\]\\]") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + // File: + if strings.HasPrefix(groups[1], "File:") { + parts := strings.SplitN(groups[1], "|", 3) + if len(parts) == 1 { + parts = append(parts, "", "") + } else if len(parts) == 2 { + parts = append(parts, "") + } + + link := "" + if strings.HasPrefix(parts[2], "link=") { + link = parts[2][5:] + parts[2] = "" + } + + return fmt.Sprintf(`%v`, parts[0][5:], parts[1], link, parts[2]) + } + + // Else + parts := strings.SplitN(groups[1], "|", 2) + if len(parts) == 1 { + return fmt.Sprintf(`%v`, parts[0], parts[0]) + } else { + return fmt.Sprintf(`%v`, parts[0], parts[1]) + } + }) + + re = regexp.MustCompile("\\[(.{10,}?)\\]") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + parts := strings.SplitN(groups[1], " ", 2) + if len(parts) == 1 { + return "" + groups[1] + "" + } else { + return fmt.Sprintf(`%v`, parts[0], parts[1]) + } + }) + + // Headings + for i := 6; i >= 1; i-- { + re = regexp.MustCompile("(^|\\s)" + strings.Repeat("=", i) + "(.+?)" + strings.Repeat("=", i)) + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + return fmt.Sprintf("%v%v", groups[1], i, groups[2], i) + }) + } + + // Bullet points/lists + re = regexp.MustCompile("(?m)^([*#])([^\\n]+)") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + if groups[1] == "*" { + return fmt.Sprintf("
  • %v
  • ", groups[2]) + } + + return fmt.Sprintf("%v", groups[2]) + }) + + // Images + // Raw URLs + + // Tables + re = regexp.MustCompile("(?s){\\|([^\\n]*)(.*?)\\|}") + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(table_groups []string) string { + table := "\n" + + lines := strings.Split(table_groups[2], "\n") + + inRow := false + printedTr := false + for _, line := range lines { + if strings.HasPrefix(line, "|-") { + printedTr = true + if inRow { + table += "\n" + } + table += "\n" + inRow = true + continue + } + + if strings.HasPrefix(line, "|") { + if !printedTr { + printedTr = true + inRow = true + table += "\n" + } + + parts := strings.Split(line[1:], "|") + style := "" + body := "" + + if len(parts) > 1 { + style = parts[0] + body = parts[1] + } else { + body = parts[0] + } + + bodyParts := strings.Split(body, "!!") + + for _, bodyPart := range bodyParts { + table += "\n" + } + + continue + } + + if strings.HasPrefix(line, "!") { + if !printedTr { + printedTr = true + inRow = true + table += "\n" + } + + parts := strings.Split(line[1:], "|") + style := "" + body := "" + + if len(parts) > 1 { + style = parts[0] + body = parts[1] + } else { + body = parts[0] + } + + bodyParts := strings.Split(body, "!!") + + for _, bodyPart := range bodyParts { + table += "\n" + } + + continue + } + } + table += "\n" + + return table + "
    " + bodyPart + "
    " + bodyPart + "
    " + }) + + // Random unbalanced left overs + wikimarkup = strings.Replace(wikimarkup, "'''", "", -1) + wikimarkup = strings.Replace(wikimarkup, "''", "", -1) + + return BalanceHtmlTags(wikimarkup) } func main() { - if len(os.Args) < 2 { - fmt.Printf("Usage: %v \n\n", os.Args[0]) - fmt.Printf("Examples:\n %v https://en.wikipedia.org/wiki/Staffordshire_Bull_Terrier\n", os.Args[0]) - fmt.Printf(" %v Staffordshire_Bull_Terrier.html\n\n", os.Args[0]) - return - } + if len(os.Args) < 2 { + fmt.Printf("Usage: %v \n\n", os.Args[0]) + fmt.Printf("Examples:\n %v https://en.wikipedia.org/wiki/Staffordshire_Bull_Terrier\n", os.Args[0]) + fmt.Printf(" %v Staffordshire_Bull_Terrier.html\n\n", os.Args[0]) + return + } - input := os.Args[1] + input := os.Args[1] - if strings.HasPrefix(input, "http") { - fmt.Printf("Downloading page... ") + if strings.HasPrefix(input, "http") { + fmt.Printf("Downloading page... ") - tokens := strings.Split(input, "/") - title := strings.TrimSpace(tokens[len(tokens) - 1]) + tokens := strings.Split(input, "/") + title := strings.TrimSpace(tokens[len(tokens)-1]) - url := "https://en.wikipedia.org/w/index.php?title=" + title + "&action=edit" - response, err := http.Get(url) - if err != nil { - panic(err) - } - defer response.Body.Close() + url := "https://en.wikipedia.org/w/index.php?title=" + title + "&action=edit" + response, err := http.Get(url) + if err != nil { + panic(err) + } + defer response.Body.Close() - buf := new(bytes.Buffer) - buf.ReadFrom(response.Body) - content := buf.String() + buf := new(bytes.Buffer) + buf.ReadFrom(response.Body) + content := buf.String() - re := regexp.MustCompile("(?s)(.*)") - wikimarkup := html.UnescapeString(re.FindStringSubmatch(content)[1]) + re := regexp.MustCompile("(?s)(.*)") + wikimarkup := html.UnescapeString(re.FindStringSubmatch(content)[1]) - usr, err := user.Current() - if err != nil { - panic(err) - } + usr, err := user.Current() + if err != nil { + panic(err) + } - destinationFolder := usr.HomeDir + "/Downloads" - destinationPath := destinationFolder + "/" + title + ".html" + destinationFolder := usr.HomeDir + "/Downloads" + destinationPath := destinationFolder + "/" + title + ".html" - fmt.Printf(" Done\nThe file has been created at: " + destinationPath + "\n") + fmt.Printf(" Done\nThe file has been created at: " + destinationPath + "\n") - fileHandle, _ := os.Create(destinationPath) - writer := bufio.NewWriter(fileHandle) - defer fileHandle.Close() + fileHandle, _ := os.Create(destinationPath) + writer := bufio.NewWriter(fileHandle) + defer fileHandle.Close() - writer.WriteString(WikiToHtml(wikimarkup)) + writer.WriteString(WikiToHtml(wikimarkup)) - writer.Flush() - } else { - fileName := strings.TrimSpace(input) - html, err := ioutil.ReadFile(fileName) - if err != nil { - panic(err) - } + writer.Flush() + } else { + fileName := strings.TrimSpace(input) + html, err := ioutil.ReadFile(fileName) + if err != nil { + panic(err) + } - fileHandle, _ := os.Create(fileName + ".txt") - writer := bufio.NewWriter(fileHandle) - defer fileHandle.Close() + fileHandle, _ := os.Create(fileName + ".txt") + writer := bufio.NewWriter(fileHandle) + defer fileHandle.Close() - writer.WriteString(HtmlToWiki(string(html))) + writer.WriteString(HtmlToWiki(string(html))) - writer.Flush() + writer.Flush() - fmt.Printf("Done\n") - } + fmt.Printf("Done\n") + } } diff --git a/main_test.go b/main_test.go index 595571e..31573f7 100644 --- a/main_test.go +++ b/main_test.go @@ -1,160 +1,160 @@ package main import ( - "testing" + "testing" ) type example struct { - wiki string - html string - newWiki string + wiki string + html string + newWiki string } var examples = []example{ - // and - { "foo ''bar'' baz", "foo bar baz", "" }, - { "foo ''bar'' ''baz'' qux", "foo bar baz qux", "" }, - { "foo '''bar''' baz", "foo bar baz", "" }, - { "foo '''bar''' '''baz''' qux", "foo bar baz qux", "" }, - { "foo '''''bar''''' baz", "foo bar baz", "" }, - { "foo ''bar baz", "foo bar baz", "foo ''bar baz''" }, - { "foo '''bar baz", "foo bar baz", "foo '''bar baz'''" }, - - // Links - { "foo [[Bar]] baz", `foo Bar baz`, "" }, - { "foo [[Bar|some label]] baz", `foo some label baz`, "" }, - { "foo [[Bar|some label|foo]] baz", `foo some label|foo baz`, "" }, - - // Images - { "foo [[File:filename.extension]] baz", `foo baz`, "" }, - { "foo [[File:filename.extension|options]] baz", `foo baz`, "" }, - { "foo [[File:filename.extension|options|caption words]] baz", `foo caption words baz`, "" }, - { "foo [[File:filename.extension|options|link=Internal]] baz", `foo baz`, "" }, - { "foo [[File:filename.extension|options|link=http://External]] baz", `foo baz`, "" }, - - // References - { "foo [[ABC]] baz", `foo baz`, "" }, - { `foo [[ABC]] baz`, `foo baz`, "" }, - - // - { "foo ''qux'' baz", `foo baz`, "" }, - { "foo ''qux'' baz", `foo baz`, "" }, - - // Templates - { "foo {{bar}} baz", `foo baz`, "" }, - { "foo {{bar|qux}} baz", `foo baz`, "" }, - { "foo {{bar|qux|abc}} baz", `foo baz`, "" }, - { "foo {{bar|qux=abc}} baz", `foo baz`, "" }, - - // Headings - { "====== The Heading ======\nbar", "
    The Heading
    \nbar", "" }, - { "===== The Heading =====\nbar", "
    The Heading
    \nbar", "" }, - { "==== The Heading ====\nbar", "

    The Heading

    \nbar", "" }, - { "=== The Heading ===\nbar", "

    The Heading

    \nbar", "" }, - { "== The Heading ==\nbar", "

    The Heading

    \nbar", "" }, - { "= The Heading =\nbar", "

    The Heading

    \nbar", "" }, - { " ====== The Heading ======\nbar", "
    The Heading
    \nbar", "" }, - { " ===== The Heading =====\nbar", "
    The Heading
    \nbar", "" }, - { " ==== The Heading ====\nbar", "

    The Heading

    \nbar", "" }, - { " === The Heading ===\nbar", "

    The Heading

    \nbar", "" }, - { " == The Heading ==\nbar", "

    The Heading

    \nbar", "" }, - { " = The Heading =\nbar", "

    The Heading

    \nbar", "" }, - { "foo\n====== The Heading ======\nbar", "foo\n
    The Heading
    \nbar", "" }, - { "foo\n===== The Heading =====\nbar", "foo\n
    The Heading
    \nbar", "" }, - { "foo\n==== The Heading ====\nbar", "foo\n

    The Heading

    \nbar", "" }, - { "foo\n=== The Heading ===\nbar", "foo\n

    The Heading

    \nbar", "" }, - { "foo\n== The Heading ==\nbar", "foo\n

    The Heading

    \nbar", "" }, - { "foo\n= The Heading =\nbar", "foo\n

    The Heading

    \nbar", "" }, - - // Lists - { "Foo\n* Bar\n* Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", "" }, - { "Foo\n# Bar\n# Baz\nQux", "Foo\n Bar\n Baz\nQux", "" }, - { "Foo\n*Bar\n*Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", "" }, - { "Foo\n#Bar\n#Baz\nQux", "Foo\nBar\nBaz\nQux", "" }, - - // Tables - { "Foo\n{|\n|-\n|Bar\n|}\nQux", - "Foo\n\n\n\n\n
    Bar
    \nQux", - "" }, - { "Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", - "" }, - { "Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", - "" }, - - { "Foo\n{|\n|Bar\n|}\nQux", - "Foo\n\n\n\n\n
    Bar
    \nQux", - "Foo\n{|\n|-\n|Bar\n|}\nQux" }, - { "Foo\n{|\n|Bar\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", - "Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux" }, - { "Foo\n{|\n|Bar\n|-\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", - "Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux" }, - - { "Foo\n{|\n!Bar\n|}\nQux", - "Foo\n\n\n\n\n
    Bar
    \nQux", - "Foo\n{|\n|-\n!Bar\n|}\nQux" }, - { "Foo\n{|\n!Bar\n!Baz\n|}\nQux", - "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", - "Foo\n{|\n|-\n!Bar\n!Baz\n|}\nQux" }, - { "Foo\n{|\n!Bar\n|-\n!Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", - "Foo\n{|\n|-\n!Bar\n|-\n!Baz\n|}\nQux" }, + // and + {"foo ''bar'' baz", "foo bar baz", ""}, + {"foo ''bar'' ''baz'' qux", "foo bar baz qux", ""}, + {"foo '''bar''' baz", "foo bar baz", ""}, + {"foo '''bar''' '''baz''' qux", "foo bar baz qux", ""}, + {"foo '''''bar''''' baz", "foo bar baz", ""}, + {"foo ''bar baz", "foo bar baz", "foo ''bar baz''"}, + {"foo '''bar baz", "foo bar baz", "foo '''bar baz'''"}, + + // Links + {"foo [[Bar]] baz", `foo Bar baz`, ""}, + {"foo [[Bar|some label]] baz", `foo some label baz`, ""}, + {"foo [[Bar|some label|foo]] baz", `foo some label|foo baz`, ""}, + + // Images + {"foo [[File:filename.extension]] baz", `foo baz`, ""}, + {"foo [[File:filename.extension|options]] baz", `foo baz`, ""}, + {"foo [[File:filename.extension|options|caption words]] baz", `foo caption words baz`, ""}, + {"foo [[File:filename.extension|options|link=Internal]] baz", `foo baz`, ""}, + {"foo [[File:filename.extension|options|link=http://External]] baz", `foo baz`, ""}, + + // References + {"foo [[ABC]] baz", `foo baz`, ""}, + {`foo [[ABC]] baz`, `foo baz`, ""}, + + // + {"foo ''qux'' baz", `foo baz`, ""}, + {"foo ''qux'' baz", `foo baz`, ""}, + + // Templates + {"foo {{bar}} baz", `foo baz`, ""}, + {"foo {{bar|qux}} baz", `foo baz`, ""}, + {"foo {{bar|qux|abc}} baz", `foo baz`, ""}, + {"foo {{bar|qux=abc}} baz", `foo baz`, ""}, + + // Headings + {"====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, + {"===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, + {"==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, + {"=== The Heading ===\nbar", "

    The Heading

    \nbar", ""}, + {"== The Heading ==\nbar", "

    The Heading

    \nbar", ""}, + {"= The Heading =\nbar", "

    The Heading

    \nbar", ""}, + {" ====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, + {" ===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, + {" ==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, + {" === The Heading ===\nbar", "

    The Heading

    \nbar", ""}, + {" == The Heading ==\nbar", "

    The Heading

    \nbar", ""}, + {" = The Heading =\nbar", "

    The Heading

    \nbar", ""}, + {"foo\n====== The Heading ======\nbar", "foo\n
    The Heading
    \nbar", ""}, + {"foo\n===== The Heading =====\nbar", "foo\n
    The Heading
    \nbar", ""}, + {"foo\n==== The Heading ====\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"foo\n=== The Heading ===\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"foo\n== The Heading ==\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"foo\n= The Heading =\nbar", "foo\n

    The Heading

    \nbar", ""}, + + // Lists + {"Foo\n* Bar\n* Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, + {"Foo\n# Bar\n# Baz\nQux", "Foo\n Bar\n Baz\nQux", ""}, + {"Foo\n*Bar\n*Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, + {"Foo\n#Bar\n#Baz\nQux", "Foo\nBar\nBaz\nQux", ""}, + + // Tables + {"Foo\n{|\n|-\n|Bar\n|}\nQux", + "Foo\n\n\n\n\n
    Bar
    \nQux", + ""}, + {"Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", + ""}, + {"Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + ""}, + + {"Foo\n{|\n|Bar\n|}\nQux", + "Foo\n\n\n\n\n
    Bar
    \nQux", + "Foo\n{|\n|-\n|Bar\n|}\nQux"}, + {"Foo\n{|\n|Bar\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", + "Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux"}, + {"Foo\n{|\n|Bar\n|-\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + "Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux"}, + + {"Foo\n{|\n!Bar\n|}\nQux", + "Foo\n\n\n\n\n
    Bar
    \nQux", + "Foo\n{|\n|-\n!Bar\n|}\nQux"}, + {"Foo\n{|\n!Bar\n!Baz\n|}\nQux", + "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", + "Foo\n{|\n|-\n!Bar\n!Baz\n|}\nQux"}, + {"Foo\n{|\n!Bar\n|-\n!Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + "Foo\n{|\n|-\n!Bar\n|-\n!Baz\n|}\nQux"}, } func TestExamples(t *testing.T) { - for _, test := range examples { - if test.newWiki == "" { - test.newWiki = test.wiki - } - - html := WikiToHtml(test.wiki) - if html != test.html { - t.Errorf("Expected HTML '%v' from wiki '%v', got '%v'", test.html, test.wiki, html) - } - - wiki := HtmlToWiki(test.html) - if wiki != test.newWiki { - t.Errorf("Expected Wiki '%v' from HTML '%v', got '%v'", test.newWiki, test.html, wiki) - } - } + for _, test := range examples { + if test.newWiki == "" { + test.newWiki = test.wiki + } + + html := WikiToHtml(test.wiki) + if html != test.html { + t.Errorf("Expected HTML '%v' from wiki '%v', got '%v'", test.html, test.wiki, html) + } + + wiki := HtmlToWiki(test.html) + if wiki != test.newWiki { + t.Errorf("Expected Wiki '%v' from HTML '%v', got '%v'", test.newWiki, test.html, wiki) + } + } } type balanceHtmlTagsExample struct { - before string - expected string + before string + expected string } var balanceHtmlTagsExamples = []balanceHtmlTagsExample{ - // Already balanced - { "foo bar", "foo bar" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo barquxx baz", "foo barquxx baz" }, - - // Too many opens - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - - // To many closes - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - { "foo bar baz", "foo bar baz" }, - - // "<>" + // Already balanced + {"foo bar", "foo bar"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo barquxx baz", "foo barquxx baz"}, + + // Too many opens + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + + // To many closes + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + {"foo bar baz", "foo bar baz"}, + + // "<>" } func TestBalanceHtmlTags(t *testing.T) { - for _, test := range balanceHtmlTagsExamples { - result := BalanceHtmlTags(test.before) - if test.expected != result { - t.Errorf("Expected '%v', got '%v'", test.expected, result) - } - } + for _, test := range balanceHtmlTagsExamples { + result := BalanceHtmlTags(test.before) + if test.expected != result { + t.Errorf("Expected '%v', got '%v'", test.expected, result) + } + } } From 01a69b2eaf27c281fb52611f50f650b832ad0542 Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 4 Feb 2017 17:46:45 +1100 Subject: [PATCH 03/13] Added support for nested templates --- main.go | 123 ++++++++++++++++++++++++++++++++++------------- main_test.go | 133 ++++++++++++++++++++++++++++----------------------- 2 files changed, 161 insertions(+), 95 deletions(-) diff --git a/main.go b/main.go index c9c33a5..0fa59c7 100644 --- a/main.go +++ b/main.go @@ -15,6 +15,8 @@ import ( "strings" ) +const maxTemplateDepth = 8 + func ReplaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { result := "" lastIndex := 0 @@ -44,7 +46,7 @@ func BalanceHtmlTags(html string) string { for i := 0; i < len(parts)-1; i += 2 { result += parts[i] - if /*len(parts[i + 1]) > 0 &&*/ parts[i+1][0] == '/' { + if len(parts[i + 1]) > 0 && parts[i+1][0] == '/' { for j := len(stack) - 1; j >= 0; j-- { s := stack[j] stack = stack[:len(stack)-1] @@ -119,23 +121,27 @@ func HtmlToWiki(html string) string { return strings.Repeat("=", level) + groups[2] + strings.Repeat("=", level) }) - re = regexp.MustCompile(``) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - if groups[2] == "" { - return fmt.Sprintf(`{{%v}}`, groups[1]) - } + html = prepareNesting(html, "") - re = regexp.MustCompile(`(.*?)`) - result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { - if groups[1] == "" { - return fmt.Sprintf(`|%v`, groups[2]) - } + for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { + re = regexp.MustCompile(fmt.Sprintf(`(.*?)<%v/template>`, templateDepth, templateDepth)) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + if groups[2] == "" { + return fmt.Sprintf(`{{%v}}`, groups[1]) + } - return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) - }) + re = regexp.MustCompile(`(.*?)`) + result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + if groups[1] == "" { + return fmt.Sprintf(`|%v`, groups[2]) + } - return fmt.Sprintf(`{{%v%v}}`, groups[1], result) - }) + return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) + }) + + return fmt.Sprintf(`{{%v%v}}`, groups[1], result) + }) + } re = regexp.MustCompile(`(?s)(.*?)`) html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { @@ -144,7 +150,7 @@ func HtmlToWiki(html string) string { re = regexp.MustCompile(`(?s)\n(.*?)\n`) result += ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { re = regexp.MustCompile(`(?s)(.*?)`) - return "|-" + groups[1] + "\n" + ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + return "|-" + strings.TrimSpace(groups[1]) + "\n" + ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { if groups[1] == "d" { return "|" + groups[3] } @@ -178,6 +184,72 @@ func HtmlToWiki(html string) string { return html } +func prepareNesting(s, left, right string) string { + re := regexp.MustCompile(fmt.Sprintf("(%v|%v)", left, right)) + depth := 0 + s = ReplaceAllStringSubmatchFunc(re, s, func(groups []string) string { + if groups[1] == left { + r := left + strconv.Itoa(depth) + depth += 1 + return r + } + + depth -= 1 + return strconv.Itoa(depth) + right + }) + + return s +} + +func processTemplates(wikimarkup string) string { + wikimarkup = prepareNesting(wikimarkup, "{{", "}}") + + substitutions := []string{} + + for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { + re := regexp.MustCompile(fmt.Sprintf("{{%d([^|}]+)\\|?(.*?)%d}}", templateDepth, templateDepth)) + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + r := `") + + return fmt.Sprintf("~~%d~~", len(substitutions) - 1) + }) + } + + // Run the substitutions + for { + madeChange := false + + for i, sub := range substitutions { + newMarkup := strings.Replace(wikimarkup, fmt.Sprintf("~~%d~~", i), sub, -1) + if newMarkup != wikimarkup { + wikimarkup = newMarkup + madeChange = true + break + } + } + + if !madeChange { + break + } + } + + return wikimarkup +} + func WikiToHtml(wikimarkup string) string { re := regexp.MustCompile(`(.*?)
    `) wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { @@ -191,24 +263,7 @@ func WikiToHtml(wikimarkup string) string { return fmt.Sprintf(``, encoded, groups[1]) }) - re = regexp.MustCompile("{{([^|}]+)\\|?(.*?)}}") - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - r := `" - }) + wikimarkup = processTemplates(wikimarkup) re = regexp.MustCompile("'''(.+?)'''") wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { diff --git a/main_test.go b/main_test.go index 31573f7..b652c87 100644 --- a/main_test.go +++ b/main_test.go @@ -5,6 +5,7 @@ import ( ) type example struct { + name string wiki string html string newWiki string @@ -12,95 +13,103 @@ type example struct { var examples = []example{ // and - {"foo ''bar'' baz", "foo bar baz", ""}, - {"foo ''bar'' ''baz'' qux", "foo bar baz qux", ""}, - {"foo '''bar''' baz", "foo bar baz", ""}, - {"foo '''bar''' '''baz''' qux", "foo bar baz qux", ""}, - {"foo '''''bar''''' baz", "foo bar baz", ""}, - {"foo ''bar baz", "foo bar baz", "foo ''bar baz''"}, - {"foo '''bar baz", "foo bar baz", "foo '''bar baz'''"}, + {"f101", "foo ''bar'' baz", "foo bar baz", ""}, + {"f102", "foo ''bar'' ''baz'' qux", "foo bar baz qux", ""}, + {"f103", "foo '''bar''' baz", "foo bar baz", ""}, + {"f104", "foo '''bar''' '''baz''' qux", "foo bar baz qux", ""}, + {"f105", "foo '''''bar''''' baz", "foo bar baz", ""}, + {"f106", "foo ''bar baz", "foo bar baz", "foo ''bar baz''"}, + {"f107", "foo '''bar baz", "foo bar baz", "foo '''bar baz'''"}, // Links - {"foo [[Bar]] baz", `foo Bar baz`, ""}, - {"foo [[Bar|some label]] baz", `foo some label baz`, ""}, - {"foo [[Bar|some label|foo]] baz", `foo some label|foo baz`, ""}, + {"l101", "foo [[Bar]] baz", `foo Bar baz`, ""}, + {"l102", "foo [[Bar|some label]] baz", `foo some label baz`, ""}, + {"l103", "foo [[Bar|some label|foo]] baz", `foo some label|foo baz`, ""}, // Images - {"foo [[File:filename.extension]] baz", `foo baz`, ""}, - {"foo [[File:filename.extension|options]] baz", `foo baz`, ""}, - {"foo [[File:filename.extension|options|caption words]] baz", `foo caption words baz`, ""}, - {"foo [[File:filename.extension|options|link=Internal]] baz", `foo baz`, ""}, - {"foo [[File:filename.extension|options|link=http://External]] baz", `foo baz`, ""}, + {"i101", "foo [[File:filename.extension]] baz", `foo baz`, ""}, + {"i102", "foo [[File:filename.extension|options]] baz", `foo baz`, ""}, + {"i103", "foo [[File:filename.extension|options|caption words]] baz", `foo caption words baz`, ""}, + {"i104", "foo [[File:filename.extension|options|link=Internal]] baz", `foo baz`, ""}, + {"i105", "foo [[File:filename.extension|options|link=http://External]] baz", `foo baz`, ""}, // References - {"foo [[ABC]] baz", `foo baz`, ""}, - {`foo [[ABC]] baz`, `foo baz`, ""}, + {"r101", "foo [[ABC]] baz", `foo baz`, ""}, + {"r102", `foo [[ABC]] baz`, `foo baz`, ""}, // - {"foo ''qux'' baz", `foo baz`, ""}, - {"foo ''qux'' baz", `foo baz`, ""}, + {"w101", "foo ''qux'' baz", `foo baz`, ""}, + {"w102", "foo ''qux'' baz", `foo baz`, ""}, // Templates - {"foo {{bar}} baz", `foo baz`, ""}, - {"foo {{bar|qux}} baz", `foo baz`, ""}, - {"foo {{bar|qux|abc}} baz", `foo baz`, ""}, - {"foo {{bar|qux=abc}} baz", `foo baz`, ""}, + {"t101", "foo {{bar}} baz", `foo baz`, ""}, + {"t102", "foo {{bar|qux}} baz", `foo baz`, ""}, + {"t103", "foo {{bar|qux|abc}} baz", `foo baz`, ""}, + {"t104", "foo {{bar|qux=abc}} baz", `foo baz`, ""}, + + // Nested templates + {"t201", + "foo {{bar|{{qux|xyz}}|a=c}} baz", + `foo baz`, + ""}, // Headings - {"====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, - {"===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, - {"==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, - {"=== The Heading ===\nbar", "

    The Heading

    \nbar", ""}, - {"== The Heading ==\nbar", "

    The Heading

    \nbar", ""}, - {"= The Heading =\nbar", "

    The Heading

    \nbar", ""}, - {" ====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, - {" ===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, - {" ==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, - {" === The Heading ===\nbar", "

    The Heading

    \nbar", ""}, - {" == The Heading ==\nbar", "

    The Heading

    \nbar", ""}, - {" = The Heading =\nbar", "

    The Heading

    \nbar", ""}, - {"foo\n====== The Heading ======\nbar", "foo\n
    The Heading
    \nbar", ""}, - {"foo\n===== The Heading =====\nbar", "foo\n
    The Heading
    \nbar", ""}, - {"foo\n==== The Heading ====\nbar", "foo\n

    The Heading

    \nbar", ""}, - {"foo\n=== The Heading ===\nbar", "foo\n

    The Heading

    \nbar", ""}, - {"foo\n== The Heading ==\nbar", "foo\n

    The Heading

    \nbar", ""}, - {"foo\n= The Heading =\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"h101", "====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, + {"h102", "===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, + {"h103", "==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, + {"h104", "=== The Heading ===\nbar", "

    The Heading

    \nbar", ""}, + {"h105", "== The Heading ==\nbar", "

    The Heading

    \nbar", ""}, + {"h106", "= The Heading =\nbar", "

    The Heading

    \nbar", ""}, + + {"h201", " ====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, + {"h202", " ===== The Heading =====\nbar", "
    The Heading
    \nbar", ""}, + {"h203", " ==== The Heading ====\nbar", "

    The Heading

    \nbar", ""}, + {"h204", " === The Heading ===\nbar", "

    The Heading

    \nbar", ""}, + {"h205", " == The Heading ==\nbar", "

    The Heading

    \nbar", ""}, + {"h206", " = The Heading =\nbar", "

    The Heading

    \nbar", ""}, + + {"h301", "foo\n====== The Heading ======\nbar", "foo\n
    The Heading
    \nbar", ""}, + {"h302", "foo\n===== The Heading =====\nbar", "foo\n
    The Heading
    \nbar", ""}, + {"h303", "foo\n==== The Heading ====\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"h304", "foo\n=== The Heading ===\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"h305", "foo\n== The Heading ==\nbar", "foo\n

    The Heading

    \nbar", ""}, + {"h306", "foo\n= The Heading =\nbar", "foo\n

    The Heading

    \nbar", ""}, // Lists - {"Foo\n* Bar\n* Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, - {"Foo\n# Bar\n# Baz\nQux", "Foo\n Bar\n Baz\nQux", ""}, - {"Foo\n*Bar\n*Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, - {"Foo\n#Bar\n#Baz\nQux", "Foo\nBar\nBaz\nQux", ""}, + {"o101", "Foo\n* Bar\n* Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, + {"o102", "Foo\n# Bar\n# Baz\nQux", "Foo\n Bar\n Baz\nQux", ""}, + {"o103", "Foo\n*Bar\n*Baz\nQux", "Foo\n
  • Bar
  • \n
  • Baz
  • \nQux", ""}, + {"o104", "Foo\n#Bar\n#Baz\nQux", "Foo\nBar\nBaz\nQux", ""}, // Tables - {"Foo\n{|\n|-\n|Bar\n|}\nQux", - "Foo\n\n\n\n\n
    Bar
    \nQux", + {"g101", "Foo\n{|\n|-\n|Bar\n|}\nQux", + "Foo\n\n\n\n\n
    Bar
    \nQux", ""}, - {"Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", + {"g102", "Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", ""}, - {"Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + {"g103", "Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", ""}, - {"Foo\n{|\n|Bar\n|}\nQux", + {"g201", "Foo\n{|\n|Bar\n|}\nQux", "Foo\n\n\n\n\n
    Bar
    \nQux", "Foo\n{|\n|-\n|Bar\n|}\nQux"}, - {"Foo\n{|\n|Bar\n|Baz\n|}\nQux", + {"g202", "Foo\n{|\n|Bar\n|Baz\n|}\nQux", "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", "Foo\n{|\n|-\n|Bar\n|Baz\n|}\nQux"}, - {"Foo\n{|\n|Bar\n|-\n|Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + {"g203", "Foo\n{|\n|Bar\n|-\n|Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", "Foo\n{|\n|-\n|Bar\n|-\n|Baz\n|}\nQux"}, - {"Foo\n{|\n!Bar\n|}\nQux", + {"g301", "Foo\n{|\n!Bar\n|}\nQux", "Foo\n\n\n\n\n
    Bar
    \nQux", "Foo\n{|\n|-\n!Bar\n|}\nQux"}, - {"Foo\n{|\n!Bar\n!Baz\n|}\nQux", + {"g302", "Foo\n{|\n!Bar\n!Baz\n|}\nQux", "Foo\n\n\n\n\n\n
    BarBaz
    \nQux", "Foo\n{|\n|-\n!Bar\n!Baz\n|}\nQux"}, - {"Foo\n{|\n!Bar\n|-\n!Baz\n|}\nQux", - "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", + {"g303", "Foo\n{|\n!Bar\n|-\n!Baz\n|}\nQux", + "Foo\n\n\n\n\n\n\n\n
    Bar
    Baz
    \nQux", "Foo\n{|\n|-\n!Bar\n|-\n!Baz\n|}\nQux"}, } @@ -112,12 +121,14 @@ func TestExamples(t *testing.T) { html := WikiToHtml(test.wiki) if html != test.html { - t.Errorf("Expected HTML '%v' from wiki '%v', got '%v'", test.html, test.wiki, html) + t.Errorf("%v:\n expected HTML: '%v'\n from wiki: '%v'\n got: '%v'\n\n", + test.name, test.html, test.wiki, html) } wiki := HtmlToWiki(test.html) if wiki != test.newWiki { - t.Errorf("Expected Wiki '%v' from HTML '%v', got '%v'", test.newWiki, test.html, wiki) + t.Errorf("%v:\n expected wiki: '%v'\n from HTML: '%v'\n got: '%v'\n\n", + test.name, test.newWiki, test.html, wiki) } } } From 660043fbf112fab93de71b568cbe6beec7fd8c54 Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 4 Feb 2017 19:49:30 +1100 Subject: [PATCH 04/13] Formatting --- main.go | 156 +++++++++++++++++++++++++-------------------------- main_test.go | 16 +++--- 2 files changed, 86 insertions(+), 86 deletions(-) diff --git a/main.go b/main.go index 0fa59c7..199f74a 100644 --- a/main.go +++ b/main.go @@ -46,7 +46,7 @@ func BalanceHtmlTags(html string) string { for i := 0; i < len(parts)-1; i += 2 { result += parts[i] - if len(parts[i + 1]) > 0 && parts[i+1][0] == '/' { + if len(parts[i+1]) > 0 && parts[i+1][0] == '/' { for j := len(stack) - 1; j >= 0; j-- { s := stack[j] stack = stack[:len(stack)-1] @@ -121,27 +121,27 @@ func HtmlToWiki(html string) string { return strings.Repeat("=", level) + groups[2] + strings.Repeat("=", level) }) - html = prepareNesting(html, "") + html = prepareNesting(html, "") - for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { - re = regexp.MustCompile(fmt.Sprintf(`(.*?)<%v/template>`, templateDepth, templateDepth)) - html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { - if groups[2] == "" { - return fmt.Sprintf(`{{%v}}`, groups[1]) - } + for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { + re = regexp.MustCompile(fmt.Sprintf(`(.*?)<%v/template>`, templateDepth, templateDepth)) + html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { + if groups[2] == "" { + return fmt.Sprintf(`{{%v}}`, groups[1]) + } - re = regexp.MustCompile(`(.*?)`) - result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { - if groups[1] == "" { - return fmt.Sprintf(`|%v`, groups[2]) - } + re = regexp.MustCompile(`(.*?)`) + result := ReplaceAllStringSubmatchFunc(re, groups[2], func(groups []string) string { + if groups[1] == "" { + return fmt.Sprintf(`|%v`, groups[2]) + } - return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) - }) + return fmt.Sprintf(`|%v=%v`, groups[1], groups[2]) + }) - return fmt.Sprintf(`{{%v%v}}`, groups[1], result) - }) - } + return fmt.Sprintf(`{{%v%v}}`, groups[1], result) + }) + } re = regexp.MustCompile(`(?s)(.*?)`) html = ReplaceAllStringSubmatchFunc(re, html, func(groups []string) string { @@ -185,69 +185,69 @@ func HtmlToWiki(html string) string { } func prepareNesting(s, left, right string) string { - re := regexp.MustCompile(fmt.Sprintf("(%v|%v)", left, right)) - depth := 0 - s = ReplaceAllStringSubmatchFunc(re, s, func(groups []string) string { - if groups[1] == left { - r := left + strconv.Itoa(depth) - depth += 1 - return r - } - - depth -= 1 - return strconv.Itoa(depth) + right - }) - - return s + re := regexp.MustCompile(fmt.Sprintf("(%v|%v)", left, right)) + depth := 0 + s = ReplaceAllStringSubmatchFunc(re, s, func(groups []string) string { + if groups[1] == left { + r := left + strconv.Itoa(depth) + depth += 1 + return r + } + + depth -= 1 + return strconv.Itoa(depth) + right + }) + + return s } func processTemplates(wikimarkup string) string { - wikimarkup = prepareNesting(wikimarkup, "{{", "}}") - - substitutions := []string{} - - for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { - re := regexp.MustCompile(fmt.Sprintf("{{%d([^|}]+)\\|?(.*?)%d}}", templateDepth, templateDepth)) - wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { - r := `") - - return fmt.Sprintf("~~%d~~", len(substitutions) - 1) - }) - } - - // Run the substitutions - for { - madeChange := false - - for i, sub := range substitutions { - newMarkup := strings.Replace(wikimarkup, fmt.Sprintf("~~%d~~", i), sub, -1) - if newMarkup != wikimarkup { - wikimarkup = newMarkup - madeChange = true - break - } - } - - if !madeChange { - break - } - } - - return wikimarkup + wikimarkup = prepareNesting(wikimarkup, "{{", "}}") + + substitutions := []string{} + + for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { + re := regexp.MustCompile(fmt.Sprintf("{{%d([^|}]+)\\|?(.*?)%d}}", templateDepth, templateDepth)) + wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { + r := `") + + return fmt.Sprintf("~~%d~~", len(substitutions)-1) + }) + } + + // Run the substitutions + for { + madeChange := false + + for i, sub := range substitutions { + newMarkup := strings.Replace(wikimarkup, fmt.Sprintf("~~%d~~", i), sub, -1) + if newMarkup != wikimarkup { + wikimarkup = newMarkup + madeChange = true + break + } + } + + if !madeChange { + break + } + } + + return wikimarkup } func WikiToHtml(wikimarkup string) string { diff --git a/main_test.go b/main_test.go index b652c87..3c7454a 100644 --- a/main_test.go +++ b/main_test.go @@ -47,11 +47,11 @@ var examples = []example{ {"t103", "foo {{bar|qux|abc}} baz", `foo baz`, ""}, {"t104", "foo {{bar|qux=abc}} baz", `foo baz`, ""}, - // Nested templates - {"t201", - "foo {{bar|{{qux|xyz}}|a=c}} baz", - `foo baz`, - ""}, + // Nested templates + {"t201", + "foo {{bar|{{qux|xyz}}|a=c}} baz", + `foo baz`, + ""}, // Headings {"h101", "====== The Heading ======\nbar", "
    The Heading
    \nbar", ""}, @@ -122,13 +122,13 @@ func TestExamples(t *testing.T) { html := WikiToHtml(test.wiki) if html != test.html { t.Errorf("%v:\n expected HTML: '%v'\n from wiki: '%v'\n got: '%v'\n\n", - test.name, test.html, test.wiki, html) + test.name, test.html, test.wiki, html) } wiki := HtmlToWiki(test.html) if wiki != test.newWiki { - t.Errorf("%v:\n expected wiki: '%v'\n from HTML: '%v'\n got: '%v'\n\n", - test.name, test.newWiki, test.html, wiki) + t.Errorf("%v:\n expected wiki: '%v'\n from HTML: '%v'\n got: '%v'\n\n", + test.name, test.newWiki, test.html, wiki) } } } From be1ad8fe619f295f507f0343ef0e1b033666d99d Mon Sep 17 00:00:00 2001 From: Elliot Chance Date: Sat, 4 Feb 2017 19:56:49 +1100 Subject: [PATCH 05/13] Templates and argument names may contains extra space or newlines --- main.go | 4 ++-- main_test.go | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/main.go b/main.go index 199f74a..cfdae08 100644 --- a/main.go +++ b/main.go @@ -207,7 +207,7 @@ func processTemplates(wikimarkup string) string { substitutions := []string{} for templateDepth := maxTemplateDepth; templateDepth >= 0; templateDepth-- { - re := regexp.MustCompile(fmt.Sprintf("{{%d([^|}]+)\\|?(.*?)%d}}", templateDepth, templateDepth)) + re := regexp.MustCompile(fmt.Sprintf("(?s){{%d([^|}]+)\\|?(.*?)%d}}", templateDepth, templateDepth)) wikimarkup = ReplaceAllStringSubmatchFunc(re, wikimarkup, func(groups []string) string { r := `