CoreCodeDown.hs
moduleMarkdown
and Code
type synonymsTypeOfCode
and its CodeSymbolism
type123
TypeOfCodeName
To add a new code language:
TypeOfCode
andCodeSymbolism
for it to the codeSymbolism
function.Optionally,
codeFileType
for default extensions of filenames in the PandocCodeDown
module.Everything else (including the generation of all the helper text) is done automatically.
module CoreCodeDown ( -- * Basic types Markdown, Code, TypeOfCode (..), TypeOfCodeName, typeOfCodeNameList, typeOfCode, -- * Code to Markdown conversion pureCodeToMarkdown, codeToMarkdown, -- * Markdown to Code conversions markdownToPureCode, markdownToCode, -- * Auxiliary IO functions readFrom, writeTo, toAction, -- * IO versions of the converters codeToMarkdownIO, pureCodeToMarkdownIO, markdownToPureCodeIO, markdownToCodeIO, -- * Showing and printing help messages showSymbolismTable, printSymbolismTable, showCodeHelp, printCodeHelp, ) where
import Data.Maybe (isJust, fromJust) import Data.Char (isSpace, toUpper, toLower) import Data.List (stripPrefix) import System.FilePath.Posix import TextDisplay
Markdown
and Code
type synonymstype Markdown = String type Code = String
TypeOfCode
and its CodeSymbolism
data TypeOfCode = SCHEME -- type 1, inline comment ; | BASH -- type 1, inline comment # | LATEX_CODE -- type 1, inline comment % | PERL -- type 1, inline comment # | PYTHON -- type 1, inline comment # | RUBY -- type 1, inline comment # | SML -- type 2, block comment (* and *) | OCAML -- type 2, block comment (* and *) | SQL -- type 2, block comment /* and */ | HTML_CODE -- type 2, block comment <!-- and --> | XML_CODE -- type 2, block comment <!-- and --> | C -- type 3, inline comment // and block comment /* and */ | CPP -- type 3, inline comment // and block comment /* and */ | JAVA -- type 3, inline comment // and block comment /* and */ | SCALA -- type 3, inline comment // and block comment /* and */ | JAVASCRIPT -- type 3, inline comment // and block comment /* and */ | PHP -- type 3, inline comment // and block comment /* and */ | HASKELL -- type 3, inline comment -- and block comment {-- and --} | LISP -- type 3, inline comment ; and block comment #| and |# deriving (Show, Read, Eq, Ord, Enum, Bounded) data CodeSymbolism = CodeSymbolism { -- original comment structure of the language: commentLine :: Code, -- the string that precedes inline comments commentBegin :: Code, -- the string that opens a block comment commentEnd :: Code, -- the string that closes a block comment -- derived symbols for CodeDown docLine :: Code, -- the string at the beginning of a markdown document line docBegin :: Code, -- the string that opens a markdown document block docEnd :: Code, -- the string that terminates a markdown document block literalBegin :: Code, -- the comment that opens a literal code block literalEnd :: Code -- the comment that closes a literal code block } deriving (Show, Read, Eq, Ord) codeSymbolism :: TypeOfCode -> CodeSymbolism codeSymbolism toc = case toc of SCHEME -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = ";" , docBegin = "" , docEnd = "" , docLine = "; ; " , literalBegin = ";;;BEGIN;;;" , literalEnd = ";;;END;;;" } BASH -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = "#" , docBegin = "" , docEnd = "" , docLine = "# # " , literalBegin = "###BEGIN###" , literalEnd = "###END###" } LATEX_CODE -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = "%" , docBegin = "" , docEnd = "" , docLine = "% % " , literalBegin = "%%%BEGIN%%%" , literalEnd = "%%%END%%%" } PERL -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = "#" , docBegin = "" , docEnd = "" , docLine = "# # " , literalBegin = "###BEGIN###" , literalEnd = "###END###" } RUBY -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = "#" , docBegin = "" , docEnd = "" , docLine = "# # " , literalBegin = "###BEGIN###" , literalEnd = "###END###" } PYTHON -> CodeSymbolism { commentBegin = "" , commentEnd = "" , commentLine = "#" , docBegin = "" , docEnd = "" , docLine = "# # " , literalBegin = "###BEGIN###" , literalEnd = "###END###" } SML -> CodeSymbolism { commentBegin = "(*" , commentEnd = "*)" , commentLine = "" , docBegin = "(***" , docEnd = "***)" , docLine = "" , literalBegin = "(***BEGIN***)" , literalEnd = "(***END***)" } OCAML -> CodeSymbolism { commentBegin = "(*" , commentEnd = "*)" , commentLine = "" , docBegin = "(***" , docEnd = "***)" , docLine = "" , literalBegin = "(***BEGIN***)" , literalEnd = "(***END***)" } SQL -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "" , docBegin = "/***" , docEnd = "***/" , docLine = "" , literalBegin = "/***BEGIN***/" , literalEnd = "/***END***/" } HTML_CODE -> CodeSymbolism { commentBegin = "<!--" , commentEnd = "-->" , commentLine = "" , docBegin = "<!-- --" , docEnd = "-- -->" , docLine = "" , literalBegin = "<!--BEGIN-->" , literalEnd = "<!--END-->" } XML_CODE -> CodeSymbolism { commentBegin = "<!--" , commentEnd = "-->" , commentLine = "" , docBegin = "<!-- -- " , docEnd = "-- -->" , docLine = "" , literalBegin = "<!--BEGIN-->" , literalEnd = "<!--END-->" } C -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } CPP -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } JAVA -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } SCALA -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } JAVASCRIPT -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } PHP -> CodeSymbolism { commentBegin = "/*" , commentEnd = "*/" , commentLine = "//" , docBegin = "/***" , docEnd = "***/" , docLine = "// // " , literalBegin = "///BEGIN///" , literalEnd = "///END///" } HASKELL -> CodeSymbolism { commentBegin = "{-" , commentEnd = "-}" , commentLine = "--" , docBegin = "{---" , docEnd = "---}" , docLine = "-- -- " , literalBegin = "---BEGIN---" , literalEnd = "---END---" } LISP -> CodeSymbolism { commentBegin = "#|" , commentEnd = "|#" , commentLine = ";" , docBegin = "#|||" , docEnd = "|||#" , docLine = "; ; " , literalBegin = ";;;BEGIN;;;" , literalEnd = ";;;END;;;" }
type123
Let toc
be a TypeOfCode
, then type123 toc
is either one of the following numbers:
1
, if toc
has a line comment, but not a block comment
2
, if toc
has a block comment, but no line comment
3
, if toc
has both, a line and block comment syntax.
type123 :: TypeOfCode -> Int type123 toc = let s = codeSymbolism toc in if (null (commentBegin s) && null (commentEnd s)) then 1 else if (null (commentLine s)) then 2 else 3
TypeOfCodeName
type TypeOfCodeName = String typeOfCode :: TypeOfCodeName -> Maybe TypeOfCode typeOfCode str = let str' = map toUpper str in if str' `elem` typeOfCodeNameList then Just (read str') else Nothing typeOfCodeNameList :: [TypeOfCodeName] typeOfCodeNameList = map show typeOfCodeList where typeOfCodeList = [minBound..maxBound] :: [TypeOfCode]
literalMarkdownPrefix :: String literalMarkdownPrefix = "> " -- i.e. a greater-than symbol plus five spaces
pureCodeToMarkdown :: Code -> Markdown pureCodeToMarkdown = unlines . (map (literalMarkdownPrefix ++)) . lines codeToMarkdown :: TypeOfCodeName -> Code -> Markdown codeToMarkdown str = let toc = case typeOfCode str of Just toc -> toc Nothing -> error (str ++ " is not a defined TypeOfCodeName") :: TypeOfCode s = codeSymbolism toc :: CodeSymbolism t = type123 toc :: Int in if t == 3 then codeToMarkdown3 (docLine s, docBegin s, docEnd s, literalBegin s, literalEnd s) else if t == 2 then codeToMarkdown2 (docBegin s, docEnd s, literalBegin s, literalEnd s) else if t == 1 then codeToMarkdown1 (docLine s, literalBegin s, literalEnd s) else error "Undefined type123" data ParseModus = TEXTDOC | LITERAL | IGNORED codeToMarkdown3 :: (Code, Code, Code, Code, Code) -> Code -> Markdown codeToMarkdown3 (docLine, docBegin, docEnd, literalBegin, literalEnd) = converter where converter :: Code -> Markdown converter = unlines . (parse IGNORED) . lines parse :: ParseModus -> [Code] -> [Markdown] parse IGNORED [] = [] parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++ literalBegin ++ " without a corrsponding " ++ literalEnd ++ " that closes the LITERAL block.") parse TEXTDOC [] = error ("Source code terminated in TEXTDOC mode. There is a " ++ docBegin ++ " without a corresponding " ++ docEnd ++ " that closes the TEXTDOC block.") parse IGNORED (row:rows) = case (stripPrefix docLine row) of Just row' -> row' : (parse IGNORED rows) Nothing -> case (stripPrefix docBegin row) of Just _ -> parse TEXTDOC rows Nothing -> case (stripPrefix literalBegin row) of Just row' -> "" : (parse LITERAL rows) Nothing -> case (stripPrefix docEnd row) of Just _ -> error ("The " ++ docEnd ++ " was not opened before.") Nothing -> case (stripPrefix literalEnd row) of Just _ -> error ("The " ++ literalEnd ++ " was not opened before.") Nothing -> parse IGNORED rows parse LITERAL (row:rows) = case (stripPrefix literalEnd row) of Just row' -> "" : (parse IGNORED rows) Nothing -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows) parse TEXTDOC (row:rows) = case (stripPrefix docEnd row) of Just row' -> parse IGNORED rows Nothing -> row : (parse TEXTDOC rows) codeToMarkdown2 :: (Code, Code, Code, Code) -> Code -> Markdown codeToMarkdown2 (docBegin, docEnd, literalBegin, literalEnd) = converter where converter :: Code -> Markdown converter = unlines . (parse IGNORED) . lines parse :: ParseModus -> [Code] -> [Markdown] parse IGNORED [] = [] parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++ literalBegin ++ " without a corrsponding " ++ literalEnd ++ " that closes the LITERAL block.") parse TEXTDOC [] = error ("Source code terminated in TEXTDOC mode. There is a " ++ docBegin ++ " without a corresponding " ++ docEnd ++ " that closes the TEXTDOC block.") parse IGNORED (row:rows) = case (stripPrefix literalBegin row) of Just row' -> "" : (parse LITERAL rows) Nothing -> case (stripPrefix docBegin row) of Just _ -> parse TEXTDOC rows Nothing -> case (stripPrefix docEnd row) of Just _ -> error ("The " ++ docEnd ++ " was not opened before.") Nothing -> case (stripPrefix literalEnd row) of Just _ -> error ("The " ++ literalEnd ++ " was not opened before.") Nothing -> parse IGNORED rows parse LITERAL (row:rows) = case (stripPrefix literalEnd row) of Just row' -> "" : (parse IGNORED rows) Nothing -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows) parse TEXTDOC (row:rows) = case (stripPrefix docEnd row) of Just row' -> parse IGNORED rows Nothing -> row : (parse TEXTDOC rows) codeToMarkdown1 :: (Code, Code, Code) -> Code -> Markdown codeToMarkdown1 (docLine, literalBegin, literalEnd) = converter where converter :: Code -> Markdown converter = unlines . (parse IGNORED) . lines parse :: ParseModus -> [Code] -> [Markdown] parse IGNORED [] = [] parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++ literalBegin ++ " without a corrsponding " ++ literalEnd ++ " that closes the LITERAL block.") parse IGNORED (row:rows) = case (stripPrefix docLine row) of Just row' -> row' : (parse IGNORED rows) Nothing -> case (stripPrefix literalBegin row) of Just row' -> "" : (parse LITERAL rows) Nothing -> case (stripPrefix literalEnd row) of Just _ -> error ("The " ++ literalEnd ++ " was not opened before.") Nothing -> parse IGNORED rows parse LITERAL (row:rows) = case (stripPrefix literalEnd row) of Just row' -> if (all isSpace row') then "" : (parse IGNORED rows) else "" : row' : (parse IGNORED rows) Nothing -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows)
data ParseBlock = DOCBLOCK [Markdown] | CODEBLOCK [Code] parseBlocks :: Markdown -> [ParseBlock] parseBlocks = parse . lines where parse :: [Markdown] -> [ParseBlock] parse [] = [] parse rows = let (quoteRows, rows') = span isQuote rows in if (null quoteRows) then let (nonQuoteRows, rows') = span (not . isQuote) rows in if (null nonQuoteRows) then [] else (DOCBLOCK nonQuoteRows) : (parse rows') else if (all isLiteralQuote quoteRows) then (CODEBLOCK (map unquoteLiteral quoteRows)) : (parse rows') else let (nonQuoteRows, rows'') = span (not . isQuote) rows' in (DOCBLOCK (quoteRows ++ nonQuoteRows)) : (parse rows'') isQuote :: Markdown -> Bool isQuote row = (not (null row)) && (head row == '>') isLiteralQuote :: Markdown -> Bool isLiteralQuote row = isJust (stripPrefix literalMarkdownPrefix row) unquoteLiteral :: Markdown -> Code unquoteLiteral row = fromJust (stripPrefix literalMarkdownPrefix row) markdownToPureCode :: Markdown -> Code markdownToPureCode = unlines . pureCode . parseBlocks where pureCode :: [ParseBlock] -> [Code] pureCode [] = [] pureCode ((DOCBLOCK rows) : blocks) = pureCode blocks pureCode ((CODEBLOCK rows) : blocks) = rows ++ (pureCode blocks) markdownToCode :: TypeOfCodeName -> Markdown -> Code markdownToCode str = let toc = case typeOfCode str of Just toc -> toc Nothing -> error (str ++ " is not a defined TypeOfCodeName") :: TypeOfCode s = codeSymbolism toc :: CodeSymbolism t = type123 toc :: Int in if t == 3 then markdownToCode3 (docLine s, docBegin s, docEnd s, literalBegin s, literalEnd s) else if t == 2 then markdownToCode2 (docBegin s, docEnd s, literalBegin s, literalEnd s) else if t == 1 then markdownToCode1 (docLine s, literalBegin s, literalEnd s) else error "Undefined type123" markdownToCode1 :: (String, String, String) -> Markdown -> Code markdownToCode1 (docLine, literalBegin, literalEnd) = unlines . convert . parseBlocks where convert :: [ParseBlock] -> [Code] convert [] = [] convert ((DOCBLOCK rows):parseBlocks) = (map (docLine ++) rows) ++ (convert parseBlocks) convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks) markdownToCode2 :: (String, String, String, String) -> Markdown -> Code markdownToCode2 (docBegin, docEnd, literalBegin, literalEnd) = unlines . convert . parseBlocks where convert :: [ParseBlock] -> [Code] convert [] = [] convert ((DOCBLOCK rows):parseBlocks) = [docBegin] ++ rows ++ [docEnd] ++ (convert parseBlocks) convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks) markdownToCode3 :: (String, String, String, String, String) -> Markdown -> Code markdownToCode3 (docLine, docBegin, docEnd, literalBegin, literalEnd) = unlines . convert . parseBlocks where convert :: [ParseBlock] -> [Code] convert [] = [] convert ((DOCBLOCK rows):parseBlocks) = if (length rows > 1) then [docBegin] ++ rows ++ [docEnd] ++ (convert parseBlocks) else (map (docLine ++) rows) ++ (convert parseBlocks) convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks)
readFrom :: [FilePath] -> IO String readFrom files = if null files then getContents else do stringList <- mapM readFile files let string = concat stringList return string writeTo :: Maybe FilePath -> String -> IO () writeTo maybeFile string = case maybeFile of Just outFile -> writeFile outFile string Nothing -> putStrLn string toAction :: (String -> String) -> [FilePath] -> Maybe FilePath -> IO () toAction converter inFiles optionalOutFile = do inputString <- readFrom inFiles let outputString = converter inputString writeTo optionalOutFile outputString
pureCodeToMarkdownIO :: [FilePath] -> Maybe FilePath -> IO () pureCodeToMarkdownIO = toAction pureCodeToMarkdown codeToMarkdownIO :: TypeOfCodeName -> [FilePath] -> Maybe FilePath -> IO () codeToMarkdownIO toc = toAction (codeToMarkdown toc) markdownToPureCodeIO :: [FilePath] -> Maybe FilePath -> IO () markdownToPureCodeIO = toAction markdownToPureCode markdownToCodeIO :: TypeOfCodeName -> [FilePath] -> Maybe FilePath -> IO () markdownToCodeIO toc = toAction (markdownToCode toc)
showSymbolismTable :: String showSymbolismTable = unlines textFrame where listOfCodes = [minBound .. maxBound] :: [TypeOfCode] headRow = [["Type of", "code"], ["commment", "line"], ["comment", "begin"], ["comment", "end"], ["doc", "line"], ["doc", "begin"], ["doc", "end"], ["literal", " begin"], ["literal", "end"]] headRow' = map correctTextFrame headRow :: [TextFrame] oneTableRow toc = let cs = codeSymbolism toc in map (\ cell -> [cell]) [show toc, commentLine cs, commentBegin cs, commentEnd cs, docLine cs, docBegin cs, docEnd cs, literalBegin cs, literalEnd cs] :: [TextFrame] allRows = [headRow] ++ (map oneTableRow listOfCodes) textFrame = gridMerge (normalTextFrameTable allRows) printSymbolismTable :: IO () printSymbolismTable = putStr showSymbolismTable showCodeHelp :: TypeOfCodeName -> String showCodeHelp tocName | typeNumber == 1 = showHelp1 | typeNumber == 2 = showHelp2 | typeNumber == 3 = showHelp3 where toc :: TypeOfCode toc = case typeOfCode tocName of Just toc -> toc Nothing -> error (tocName ++ " is not a well--defined TypeOfCodeName") typeNumber :: Int typeNumber = type123 toc symb :: CodeSymbolism symb = codeSymbolism toc lineText :: String lineText = unlines $ textFrameBox [(docLine symb) ++ "... one line of Markdown text ..."] blockText :: String blockText = unlines $ textFrameBox $ correctTextFrame [docBegin symb, "... lines of Markdown text ...", docEnd symb] literalText :: String literalText = unlines $ textFrameBox $ correctTextFrame [literalBegin symb, "... lines of " ++ tocName ++ " code ...", literalEnd symb] showHelp3 :: String showHelp3 = "Markdown document lines in " ++ tocName ++ ":\n" ++ lineText ++ "Markdown document blocks in " ++ tocName ++ ":\n" ++ blockText ++ "Literal " ++ tocName ++ " code blocks:\n" ++ literalText showHelp2 :: String showHelp2 = "Markdown document blocks in " ++ tocName ++ ":\n" ++ blockText ++ "Literal " ++ tocName ++ " code blocks:\n" ++ literalText showHelp1 :: String showHelp1 = "Markdown document lines in " ++ tocName ++ ":\n" ++ lineText ++ "Literal " ++ tocName ++ " code blocks:\n" ++ literalText printCodeHelp :: TypeOfCodeName -> IO () printCodeHelp = putStrLn . showCodeHelp