The CoreCodeDown.hs module

Notes

To add a new code language:

  1. add the new entry to the TypeOfCode and
  2. add CodeSymbolism for it to the codeSymbolism function.

Optionally,

  1. add an entry to the codeFileType for default extensions of filenames in the PandocCodeDown module.

Everything else (including the generation of all the helper text) is done automatically.

The Implementation

Module header (including exports and Haddock documentation)

module CoreCodeDown (
  -- * Basic types
  Markdown,
  Code,
  TypeOfCode (..),
  TypeOfCodeName,
  typeOfCodeNameList,
  typeOfCode,
  -- * Code to Markdown conversion
  pureCodeToMarkdown,
  codeToMarkdown,
  -- * Markdown to Code conversions
  markdownToPureCode,
  markdownToCode,
  -- * Auxiliary IO functions
  readFrom,
  writeTo,
  toAction,
  -- * IO versions of the converters
  codeToMarkdownIO,
  pureCodeToMarkdownIO,
  markdownToPureCodeIO,
  markdownToCodeIO,
  -- * Showing and printing help messages
  showSymbolismTable,
  printSymbolismTable,
  showCodeHelp,
  printCodeHelp,
) where

Imports

  import Data.Maybe (isJust, fromJust)
  import Data.Char (isSpace, toUpper, toLower)
  import Data.List (stripPrefix)
  import System.FilePath.Posix
  import TextDisplay

The Markdown and Code type synonyms

  type Markdown = String
  type Code = String

The TypeOfCode and its CodeSymbolism

  data TypeOfCode
    = SCHEME                 -- type 1, inline comment ;
    | BASH                   -- type 1, inline comment #
    | LATEX_CODE             -- type 1, inline comment %
    | PERL                   -- type 1, inline comment #
    | PYTHON                 -- type 1, inline comment #
    | RUBY                   -- type 1, inline comment #
    | SML                    -- type 2, block comment (* and *)
    | OCAML                  -- type 2, block comment (* and *)
    | SQL                    -- type 2, block comment /* and */
    | HTML_CODE              -- type 2, block comment <!-- and -->
    | XML_CODE               -- type 2, block comment <!-- and -->
    | C                      -- type 3, inline comment // and block comment /* and */
    | CPP                    -- type 3, inline comment // and block comment /* and */
    | JAVA                   -- type 3, inline comment // and block comment /* and */
    | SCALA                  -- type 3, inline comment // and block comment /* and */
    | JAVASCRIPT             -- type 3, inline comment // and block comment /* and */
    | PHP                    -- type 3, inline comment // and block comment /* and */
    | HASKELL                -- type 3, inline comment -- and block comment {-- and --}
    | LISP                   -- type 3, inline comment ; and block comment #| and |#
    deriving (Show, Read, Eq, Ord, Enum, Bounded)

  data CodeSymbolism = CodeSymbolism {
    -- original comment structure of the language:
    commentLine   :: Code,           -- the string that precedes inline comments
    commentBegin  :: Code,           -- the string that opens a block comment
    commentEnd    :: Code,           -- the string that closes a block comment
    -- derived symbols for CodeDown
    docLine       :: Code,           -- the string at the beginning of a markdown document line
    docBegin      :: Code,           -- the string that opens a markdown document block
    docEnd        :: Code,           -- the string that terminates a markdown document block
    literalBegin  :: Code,           -- the comment that opens a literal code block
    literalEnd    :: Code            -- the comment that closes a literal code block
  } deriving (Show, Read, Eq, Ord)

  codeSymbolism :: TypeOfCode -> CodeSymbolism
  codeSymbolism toc = case toc of
    SCHEME ->  CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = ";"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "; ; "  ,
      literalBegin = ";;;BEGIN;;;"   , literalEnd = ";;;END;;;"
    }
    BASH -> CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = "#"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "# # "  ,
      literalBegin = "###BEGIN###"   , literalEnd = "###END###"
    }
    LATEX_CODE -> CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = "%"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "% % "  ,
      literalBegin = "%%%BEGIN%%%"   , literalEnd = "%%%END%%%"
    }
    PERL -> CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = "#"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "# # "  ,
      literalBegin = "###BEGIN###"   , literalEnd = "###END###"
    }
    RUBY -> CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = "#"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "# # "  ,
      literalBegin = "###BEGIN###"   , literalEnd = "###END###"
    }
    PYTHON -> CodeSymbolism {
      commentBegin = ""              , commentEnd = ""            , commentLine = "#"     ,
      docBegin     = ""              , docEnd     = ""            , docLine     = "# # "  ,
      literalBegin = "###BEGIN###"   , literalEnd = "###END###"
    }
    SML -> CodeSymbolism {
      commentBegin = "(*"            , commentEnd = "*)"          , commentLine = ""      ,
      docBegin     = "(***"          , docEnd     = "***)"        , docLine     = ""      ,
      literalBegin = "(***BEGIN***)" , literalEnd = "(***END***)"
    }
    OCAML -> CodeSymbolism {
      commentBegin = "(*"            , commentEnd = "*)"          , commentLine = ""      ,
      docBegin     = "(***"          , docEnd     = "***)"        , docLine     = ""      ,
      literalBegin = "(***BEGIN***)" , literalEnd = "(***END***)"
    }
    SQL -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = ""      ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = ""      ,
      literalBegin = "/***BEGIN***/" , literalEnd = "/***END***/"
    }
    HTML_CODE -> CodeSymbolism {
      commentBegin = "<!--"          , commentEnd = "-->"         , commentLine = ""      ,
      docBegin     = "<!-- --"       , docEnd     = "-- -->"      , docLine     = ""      ,
      literalBegin = "<!--BEGIN-->"  , literalEnd = "<!--END-->"
    }
    XML_CODE -> CodeSymbolism {
      commentBegin = "<!--"          , commentEnd = "-->"         , commentLine = ""      ,
      docBegin     = "<!-- -- "      , docEnd     = "-- -->"      , docLine     = ""      ,
      literalBegin = "<!--BEGIN-->"  , literalEnd = "<!--END-->"
    }
    C -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    CPP -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    JAVA -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    SCALA -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    JAVASCRIPT -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    PHP -> CodeSymbolism {
      commentBegin = "/*"            , commentEnd = "*/"          , commentLine = "//"     ,
      docBegin     = "/***"          , docEnd     = "***/"        , docLine     = "// // " ,
      literalBegin = "///BEGIN///"   , literalEnd = "///END///"
    }
    HASKELL -> CodeSymbolism {
      commentBegin = "{-"            , commentEnd = "-}"          , commentLine = "--"     ,
      docBegin     = "{---"          , docEnd     = "---}"        , docLine     = "-- -- " ,
      literalBegin = "---BEGIN---"   , literalEnd = "---END---"
    }
    LISP -> CodeSymbolism {
      commentBegin = "#|"            , commentEnd = "|#"          , commentLine = ";"     ,
      docBegin     = "#|||"          , docEnd     = "|||#"        , docLine     = "; ; "  ,
      literalBegin = ";;;BEGIN;;;"   , literalEnd = ";;;END;;;"
    }

type123

Let toc be a TypeOfCode, then type123 toc is either one of the following numbers:

  type123 :: TypeOfCode -> Int
  type123 toc = let s = codeSymbolism toc
                in if (null (commentBegin s) && null (commentEnd s))
                   then 1
                   else if (null (commentLine s))
                        then 2
                        else 3

TypeOfCodeName

  type TypeOfCodeName = String

  typeOfCode :: TypeOfCodeName -> Maybe TypeOfCode
  typeOfCode str = let str' = map toUpper str
                   in if str' `elem` typeOfCodeNameList
                      then Just (read str')
                      else Nothing

  typeOfCodeNameList :: [TypeOfCodeName]
  typeOfCodeNameList = map show typeOfCodeList
    where typeOfCodeList = [minBound..maxBound] :: [TypeOfCode]

The literal markdown prefix

  literalMarkdownPrefix :: String
  literalMarkdownPrefix = ">     "   -- i.e. a greater-than symbol plus five spaces

Code-to-Markdown conversion

  pureCodeToMarkdown :: Code -> Markdown
  pureCodeToMarkdown = unlines . (map (literalMarkdownPrefix ++)) . lines

  codeToMarkdown :: TypeOfCodeName -> Code -> Markdown
  codeToMarkdown str = let toc = case typeOfCode str of
                                   Just toc -> toc
                                   Nothing  -> error (str ++ " is not a defined TypeOfCodeName") :: TypeOfCode
                           s   = codeSymbolism toc                                               :: CodeSymbolism
                           t   = type123 toc                                                     :: Int
                       in if t == 3
                          then codeToMarkdown3 (docLine s, docBegin s, docEnd s, literalBegin s, literalEnd s)
                          else if t == 2
                               then codeToMarkdown2 (docBegin s, docEnd s, literalBegin s, literalEnd s)
                               else if t == 1
                                    then codeToMarkdown1 (docLine s, literalBegin s, literalEnd s)
                                    else error "Undefined type123"

  data ParseModus = TEXTDOC | LITERAL | IGNORED

  codeToMarkdown3 :: (Code, Code, Code, Code, Code) -> Code -> Markdown
  codeToMarkdown3 (docLine, docBegin, docEnd, literalBegin, literalEnd) = converter
    where converter :: Code -> Markdown
          converter = unlines . (parse IGNORED) . lines
          parse :: ParseModus -> [Code] -> [Markdown]
          parse IGNORED [] = []
          parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++
                                    literalBegin ++ " without a corrsponding " ++
                                    literalEnd ++ " that closes the LITERAL block.")
          parse TEXTDOC [] = error ("Source code terminated in TEXTDOC mode. There is a " ++
                                    docBegin ++ " without a corresponding " ++
                                    docEnd ++ " that closes the TEXTDOC block.")
          parse IGNORED (row:rows) =
            case (stripPrefix docLine row) of
              Just row' -> row' : (parse IGNORED rows)
              Nothing -> case (stripPrefix docBegin row) of
                Just _ -> parse TEXTDOC rows
                Nothing -> case (stripPrefix literalBegin row) of
                  Just row' -> "" : (parse LITERAL rows)
                  Nothing -> case (stripPrefix docEnd row) of
                    Just _  -> error ("The " ++ docEnd ++ " was not opened before.")
                    Nothing -> case (stripPrefix literalEnd row) of
                      Just _  -> error ("The " ++ literalEnd ++ " was not opened before.")
                      Nothing -> parse IGNORED rows
          parse LITERAL (row:rows) =
            case (stripPrefix literalEnd row) of
              Just row' -> "" : (parse IGNORED rows)
              Nothing   -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows)
          parse TEXTDOC (row:rows) =
            case (stripPrefix docEnd row) of
              Just row' -> parse IGNORED rows
              Nothing   -> row : (parse TEXTDOC rows)

  codeToMarkdown2 :: (Code, Code, Code, Code) -> Code -> Markdown
  codeToMarkdown2 (docBegin, docEnd, literalBegin, literalEnd) = converter
    where converter :: Code -> Markdown
          converter = unlines . (parse IGNORED) . lines
          parse :: ParseModus -> [Code] -> [Markdown]
          parse IGNORED [] = []
          parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++
                                    literalBegin ++ " without a corrsponding " ++
                                    literalEnd ++ " that closes the LITERAL block.")
          parse TEXTDOC [] = error ("Source code terminated in TEXTDOC mode. There is a " ++
                                    docBegin ++ " without a corresponding " ++
                                    docEnd ++ " that closes the TEXTDOC block.")
          parse IGNORED (row:rows) =
            case (stripPrefix literalBegin row) of
              Just row' -> "" : (parse LITERAL rows)
              Nothing -> case (stripPrefix docBegin row) of
                Just _ -> parse TEXTDOC rows
                Nothing -> case (stripPrefix docEnd row) of
                  Just _  -> error ("The " ++ docEnd ++ " was not opened before.")
                  Nothing -> case (stripPrefix literalEnd row) of
                    Just _  -> error ("The " ++ literalEnd ++ " was not opened before.")
                    Nothing -> parse IGNORED rows
          parse LITERAL (row:rows) =
            case (stripPrefix literalEnd row) of
              Just row' -> "" : (parse IGNORED rows)
              Nothing   -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows)
          parse TEXTDOC (row:rows) =
            case (stripPrefix docEnd row) of
              Just row' -> parse IGNORED rows
              Nothing   -> row : (parse TEXTDOC rows)

  codeToMarkdown1 :: (Code, Code, Code) -> Code -> Markdown
  codeToMarkdown1 (docLine, literalBegin, literalEnd) = converter
    where converter :: Code -> Markdown
          converter = unlines . (parse IGNORED) . lines
          parse :: ParseModus -> [Code] -> [Markdown]
          parse IGNORED [] = []
          parse LITERAL [] = error ("Source code terminated in LITERAL mode. There is a " ++
                                    literalBegin ++ " without a corrsponding " ++
                                    literalEnd ++ " that closes the LITERAL block.")
          parse IGNORED (row:rows) =
            case (stripPrefix docLine row) of
              Just row' -> row' : (parse IGNORED rows)
              Nothing -> case (stripPrefix literalBegin row) of
                Just row' -> "" : (parse LITERAL rows)
                Nothing -> case (stripPrefix literalEnd row) of
                  Just _  -> error ("The " ++ literalEnd ++ " was not opened before.")
                  Nothing -> parse IGNORED rows
          parse LITERAL (row:rows) =
            case (stripPrefix literalEnd row) of
              Just row' -> if (all isSpace row')
                           then "" : (parse IGNORED rows)
                           else "" : row' : (parse IGNORED rows)
              Nothing   -> (literalMarkdownPrefix ++ row) : (parse LITERAL rows)

Markdown-to-Code conversion

  data ParseBlock = DOCBLOCK [Markdown] | CODEBLOCK [Code]

  parseBlocks :: Markdown -> [ParseBlock]
  parseBlocks = parse . lines
    where parse :: [Markdown] -> [ParseBlock]
          parse [] = []
          parse rows = let (quoteRows, rows') = span isQuote rows
                       in if (null quoteRows)
                          then let (nonQuoteRows, rows') = span (not . isQuote) rows
                               in if (null nonQuoteRows)
                                  then []
                                  else (DOCBLOCK nonQuoteRows) : (parse rows')
                          else if (all isLiteralQuote quoteRows)
                               then (CODEBLOCK (map unquoteLiteral quoteRows)) : (parse rows')
                               else let (nonQuoteRows, rows'') = span (not . isQuote) rows'
                                    in (DOCBLOCK (quoteRows ++ nonQuoteRows)) : (parse rows'')
          isQuote :: Markdown -> Bool
          isQuote row = (not (null row)) && (head row == '>')
          isLiteralQuote :: Markdown -> Bool
          isLiteralQuote row = isJust (stripPrefix literalMarkdownPrefix row)
          unquoteLiteral :: Markdown -> Code
          unquoteLiteral row = fromJust (stripPrefix literalMarkdownPrefix row)

  markdownToPureCode :: Markdown -> Code
  markdownToPureCode = unlines . pureCode . parseBlocks
    where pureCode :: [ParseBlock] -> [Code]
          pureCode [] = []
          pureCode ((DOCBLOCK  rows) : blocks) = pureCode blocks
          pureCode ((CODEBLOCK rows) : blocks) = rows ++ (pureCode blocks)

  markdownToCode :: TypeOfCodeName -> Markdown -> Code
  markdownToCode str = let toc = case typeOfCode str of
                                   Just toc -> toc
                                   Nothing  -> error (str ++ " is not a defined TypeOfCodeName") :: TypeOfCode
                           s   = codeSymbolism toc                                               :: CodeSymbolism
                           t   = type123 toc                                                     :: Int
                       in if t == 3
                          then markdownToCode3 (docLine s, docBegin s, docEnd s, literalBegin s, literalEnd s)
                          else if t == 2
                               then markdownToCode2 (docBegin s, docEnd s, literalBegin s, literalEnd s)
                               else if t == 1
                                    then markdownToCode1 (docLine s, literalBegin s, literalEnd s)
                                    else error "Undefined type123"

  markdownToCode1 :: (String, String, String) -> Markdown -> Code
  markdownToCode1 (docLine, literalBegin, literalEnd) = unlines . convert . parseBlocks
    where convert :: [ParseBlock] -> [Code]
          convert []                             = []
          convert ((DOCBLOCK  rows):parseBlocks) = (map (docLine ++) rows) ++ (convert parseBlocks)
          convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks)

  markdownToCode2 :: (String, String, String, String) -> Markdown -> Code
  markdownToCode2 (docBegin, docEnd, literalBegin, literalEnd) = unlines . convert . parseBlocks
    where convert :: [ParseBlock] -> [Code]
          convert []                             = []
          convert ((DOCBLOCK  rows):parseBlocks) = [docBegin] ++ rows ++ [docEnd] ++ (convert parseBlocks)
          convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks)

  markdownToCode3 :: (String, String, String, String, String) -> Markdown -> Code
  markdownToCode3 (docLine, docBegin, docEnd, literalBegin, literalEnd) = unlines . convert . parseBlocks
    where convert :: [ParseBlock] -> [Code]
          convert []                             = []
          convert ((DOCBLOCK  rows):parseBlocks) = if (length rows > 1)
                                                   then [docBegin] ++ rows ++ [docEnd] ++ (convert parseBlocks)
                                                   else (map (docLine ++) rows) ++ (convert parseBlocks)
          convert ((CODEBLOCK rows):parseBlocks) = [literalBegin] ++ rows ++ [literalEnd] ++ (convert parseBlocks)

Auxiliary IO functions

  readFrom :: [FilePath] -> IO String
  readFrom files = if null files
                   then getContents
                   else do stringList <- mapM readFile files
                           let string = concat stringList
                           return string

  writeTo :: Maybe FilePath -> String -> IO ()
  writeTo maybeFile string = case maybeFile of
                               Just outFile -> writeFile outFile string
                               Nothing      -> putStrLn string

  toAction :: (String -> String) -> [FilePath] -> Maybe FilePath -> IO ()
  toAction converter inFiles optionalOutFile =
    do inputString  <- readFrom inFiles
       let outputString = converter inputString
       writeTo optionalOutFile outputString

IO versions of the converters

  pureCodeToMarkdownIO :: [FilePath] -> Maybe FilePath -> IO ()
  pureCodeToMarkdownIO = toAction pureCodeToMarkdown

  codeToMarkdownIO :: TypeOfCodeName -> [FilePath] -> Maybe FilePath -> IO ()
  codeToMarkdownIO toc = toAction (codeToMarkdown toc)

  markdownToPureCodeIO :: [FilePath] -> Maybe FilePath -> IO ()
  markdownToPureCodeIO = toAction markdownToPureCode

  markdownToCodeIO :: TypeOfCodeName -> [FilePath] -> Maybe FilePath -> IO ()
  markdownToCodeIO toc = toAction (markdownToCode toc)

Showing and printing help messages

  showSymbolismTable :: String
  showSymbolismTable = unlines textFrame
    where listOfCodes = [minBound .. maxBound] :: [TypeOfCode]
          headRow = [["Type of", "code"], ["commment", "line"], ["comment", "begin"], ["comment", "end"],
                     ["doc", "line"], ["doc", "begin"], ["doc", "end"], ["literal", " begin"], ["literal", "end"]]
          headRow' = map correctTextFrame headRow :: [TextFrame]
          oneTableRow toc = let cs = codeSymbolism toc
                            in map (\ cell -> [cell])
                                   [show toc, commentLine cs, commentBegin cs, commentEnd cs,
                                    docLine cs, docBegin cs, docEnd cs, literalBegin cs, literalEnd cs] :: [TextFrame]
          allRows = [headRow] ++ (map oneTableRow listOfCodes)
          textFrame = gridMerge (normalTextFrameTable allRows)

  printSymbolismTable :: IO ()
  printSymbolismTable = putStr showSymbolismTable

  showCodeHelp :: TypeOfCodeName -> String
  showCodeHelp tocName | typeNumber == 1 = showHelp1
                       | typeNumber == 2 = showHelp2
                       | typeNumber == 3 = showHelp3
    where
      toc :: TypeOfCode
      toc = case typeOfCode tocName of
              Just toc -> toc
              Nothing  -> error (tocName ++ " is not a well--defined TypeOfCodeName")
      typeNumber :: Int
      typeNumber = type123 toc
      symb :: CodeSymbolism
      symb = codeSymbolism toc
      lineText :: String
      lineText = unlines $ textFrameBox
                 [(docLine symb) ++ "... one line of Markdown text ..."]
      blockText :: String
      blockText = unlines $ textFrameBox $ correctTextFrame
                  [docBegin symb, "... lines of Markdown text ...", docEnd symb]
      literalText :: String
      literalText = unlines $ textFrameBox $ correctTextFrame
                    [literalBegin symb, "... lines of " ++ tocName ++ " code ...", literalEnd symb]
      showHelp3 :: String
      showHelp3 = "Markdown document lines in " ++ tocName ++ ":\n" ++
                  lineText ++
                  "Markdown document blocks in " ++ tocName ++ ":\n" ++
                  blockText ++
                  "Literal " ++ tocName ++ " code blocks:\n" ++
                  literalText
      showHelp2 :: String
      showHelp2 = "Markdown document blocks in " ++ tocName ++ ":\n" ++
                  blockText ++
                  "Literal " ++ tocName ++ " code blocks:\n" ++
                  literalText
      showHelp1 :: String
      showHelp1 = "Markdown document lines in " ++ tocName ++ ":\n" ++
                  lineText ++
                  "Literal " ++ tocName ++ " code blocks:\n" ++
                  literalText

  printCodeHelp :: TypeOfCodeName -> IO ()
  printCodeHelp = putStrLn . showCodeHelp