diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..7caa388beb1440210e1649ba3a8de945088ca91a --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +Copyright Author name here (c) 2021 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Author name here nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/P.csv b/P.csv deleted file mode 100644 index b33bda1b93503ff9aa2ebb1751bb1a752a95692d..0000000000000000000000000000000000000000 --- a/P.csv +++ /dev/null @@ -1,4 +0,0 @@ -1 ,5 ,4 , -2 , ,2 , -3 ,7 ,1 ,2 -4 ,8 , , \ No newline at end of file diff --git a/P42.csv b/P42.csv deleted file mode 100644 index ae56f619e71099db05328103bf595edc62b35956..0000000000000000000000000000000000000000 --- a/P42.csv +++ /dev/null @@ -1,5 +0,0 @@ -David , Beckham -Pele , -Diego , Maradona -Cristiano, Ronaldo -Ronaldinho , \ No newline at end of file diff --git a/Q.csv b/Q.csv deleted file mode 100644 index e3281094c0264d8786678c0faa5f7cff334dd6f8..0000000000000000000000000000000000000000 --- a/Q.csv +++ /dev/null @@ -1,4 +0,0 @@ -1 ,6 ,4 ,7 -2 ,8 ,5 ,3 -2 , , ,1 -4 , ,2 ,3 \ No newline at end of file diff --git a/R.csv b/R.csv deleted file mode 100644 index 5f2c21269165f15da2188e526c4461a0317518f9..0000000000000000000000000000000000000000 --- a/R.csv +++ /dev/null @@ -1,4 +0,0 @@ -1 ,5 ,4 ,0 -2 , 0,2 ,0 -3 ,7 ,1 ,2 -4 ,8 , 0,0 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ceb251861ca17653e68439541f0342459ed98e90 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# csvql diff --git a/S.csv b/S.csv deleted file mode 100644 index b3697e56f2298286b201563d547e42babce6ef7c..0000000000000000000000000000000000000000 --- a/S.csv +++ /dev/null @@ -1,4 +0,0 @@ -1 ,6 ,4 ,7 -2 ,8 ,5 ,3 -2 ,0,0,1 -4 ,0,2 ,3 \ No newline at end of file diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000000000000000000000000000000000000..9a994af677b0dfd41b4e3b76b3e7e604003d64e1 --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000000000000000000000000000000000000..bee36a843f7b36930fccdd94facd5c303607c98d --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,20 @@ +module Main where + +import Interpreter +import Control.Exception +import System.Environment + + + +main :: IO () + + +main = do + args <- getArgs + case args of + --("default":_) -> catch (interpret "/home/patryk/dev/plc/extra-problems/ex1.cql") (errorFunc) + (srcname:_) -> catch (interpret srcname) (errorFunc) + _ -> printErr "No source file specified." + +errorFunc :: ErrorCall -> IO () +errorFunc = printErr.show \ No newline at end of file diff --git a/csvql.cabal b/csvql.cabal new file mode 100644 index 0000000000000000000000000000000000000000..7f8c938ab0666af9b168fa23fc28cfe698ecf0ea --- /dev/null +++ b/csvql.cabal @@ -0,0 +1,64 @@ +cabal-version: 1.12 + +-- This file has been generated from package.yaml by hpack version 0.34.4. +-- +-- see: https://github.com/sol/hpack +-- +-- hash: 3fe644e4d9e0509e20b1fd83f282a8b9332a2b4de3947af78367746e7406ca53 + +name: csvql +version: 0.1.0.0 +description: Please see the README on GitHub at <https://github.com/githubuser/csvql#readme> +author: Jeffrey,Patryk and Maram +copyright: 2021 Jeffrey,Patryk and Maram +license: BSD3 +license-file: LICENSE +build-type: Simple +extra-source-files: + README.md + ChangeLog.md + +library + exposed-modules: + CSV + Debug + Eval + Interpreter + Lexer + Parser + Types + other-modules: + Paths_csvql + hs-source-dirs: + src + build-depends: + array + , base >=4.7 && <5 + default-language: Haskell2010 + +executable csvql-exe + main-is: Main.hs + other-modules: + Paths_csvql + hs-source-dirs: + app + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: + array + , base >=4.7 && <5 + , csvql + default-language: Haskell2010 + +test-suite csvql-test + type: exitcode-stdio-1.0 + main-is: Spec.hs + other-modules: + Paths_csvql + hs-source-dirs: + test + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: + array + , base >=4.7 && <5 + , csvql + default-language: Haskell2010 diff --git a/package.yaml b/package.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef202285a72ed47be06bab90ae8520f0ebb86b2b --- /dev/null +++ b/package.yaml @@ -0,0 +1,50 @@ +name: csvql +version: 0.1.0.0 +#github: "https://git.soton.ac.uk/plc1/comp2212-cw-2021" +license: BSD3 +author: "Jeffrey,Patryk and Maram" +maintainer: "" +copyright: "2021 Jeffrey,Patryk and Maram" + +extra-source-files: +- README.md +- ChangeLog.md + +# Metadata used when publishing your package +# synopsis: Short description of your package +# category: Web + +# To avoid duplicated efforts in documentation and dealing with the +# complications of embedding Haddock markup inside cabal files, it is +# common to point users to the README.md file. +description: Please see the README on GitHub at <https://github.com/githubuser/csvql#readme> + +dependencies: +- base >= 4.7 && < 5 +- array + + +library: + source-dirs: src + +executables: + csvql-exe: + main: Main.hs + source-dirs: app + ghc-options: + - -threaded + - -rtsopts + - -with-rtsopts=-N + dependencies: + - csvql + +tests: + csvql-test: + main: Spec.hs + source-dirs: test + ghc-options: + - -threaded + - -rtsopts + - -with-rtsopts=-N + dependencies: + - csvql diff --git a/pr1.cql b/pr1.cql new file mode 100644 index 0000000000000000000000000000000000000000..f9bfb751ab7fe0abaacc7b74c0c67cd81b33533a --- /dev/null +++ b/pr1.cql @@ -0,0 +1,6 @@ +.in +A:2, # declare input files and their numbers of cols in .in section +B:2 + +.out # statements for the query are in .out section +A x B; # returns the cartesian product of the two sets (conjunction) \ No newline at end of file diff --git a/pr2.cql b/pr2.cql new file mode 100644 index 0000000000000000000000000000000000000000..7411d3213e48552103ca4e9a2e5b7cda555ae982 --- /dev/null +++ b/pr2.cql @@ -0,0 +1,6 @@ +.in +A:3 + +.out +filter( \(r) -> r[1] == r[2]); +map (\(r) -> r[3,1]); \ No newline at end of file diff --git a/pr3.cql b/pr3.cql new file mode 100644 index 0000000000000000000000000000000000000000..5dc784d5397ce53d453b08192bebe247592cdfa1 --- /dev/null +++ b/pr3.cql @@ -0,0 +1,9 @@ +.in +P: 4, +Q: 4 + +.out +P x Q; +filter(\(r) -> r[1] == r[5]); +let f = \(a,y) -> if (isEmpty(a)) then y else a; +map (\(r) -> [r[1], f(r[2], r[6]), f(r[3], r[7]), f(r[4], r[8])]); \ No newline at end of file diff --git a/pr4.cql b/pr4.cql new file mode 100644 index 0000000000000000000000000000000000000000..d8425281c1a50055894782a4635d172cf2c5f008 --- /dev/null +++ b/pr4.cql @@ -0,0 +1,5 @@ +.in +P42 :2 + +.out +filter (\(r) -> not(isEmpty(r[2]) ) ); \ No newline at end of file diff --git a/pr5.cql b/pr5.cql new file mode 100644 index 0000000000000000000000000000000000000000..f34636ec2f22bd74a8f39567878a1dec46232c78 --- /dev/null +++ b/pr5.cql @@ -0,0 +1,5 @@ +.in +A:1 + +.out +map{A}(\(r) -> [r[1],"0",r[1]] ); \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..7f71b12a90b2747d6d89b88d167b4606780087eb --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +.stack-work/dist/x86_64-linux-tinfo6/Cabal-3.2.1.0/build/csvql-exe/csvql-exe $@ \ No newline at end of file diff --git a/src/CSV.hs b/src/CSV.hs new file mode 100644 index 0000000000000000000000000000000000000000..39d0c4a3773bb5306350dcddce03648ff6ad6d44 --- /dev/null +++ b/src/CSV.hs @@ -0,0 +1,42 @@ +module CSV where + +import System.IO +import Data.List + +readCSV :: FilePath -> IO [[String]] +readCSV fname = do + str <- readFile fname + return $ readCSVString str + +readCSVString :: String -> [[String]] +readCSVString whole = [splitElem ',' (line++" " ) | line <- splitElem '\n' whole] +splitElem :: Eq a => a -> [a] -> [[a]] +splitElem elem = split (/=elem) + +split :: (a -> Bool) -> [a] -> [[a]] +split p l = case span p l of + ([], _) -> [] + (match, []) -> [match] + (match, _:rem') -> match:split p rem' + +print2DList :: [[String]] -> IO () +print2DList = putStrLn.toCSVString + +toCSVString :: [[String]] -> String +--F: use the function lines! +toCSVString list = let lines = map (',' `join`) list in '\n' `join` lines + +join :: a -> [[a]] -> [a] +join _ [] = [] +join a l = foldr1 (\s1 s2 -> s1 ++ a:s2) l + +sort2DListLex :: [[String]] -> [[String]] +sort2DListLex = sort + +stripWhitespace = stripTrailingWhitespace.dropWhile (==' ') + +stripTrailingWhitespace (' ':xs) = let remainder = stripTrailingWhitespace xs in + if null remainder then [] else ' ':remainder + +stripTrailingWhitespace (x:xs) = x : stripTrailingWhitespace xs +stripTrailingWhitespace [] = [] \ No newline at end of file diff --git a/src/Debug.hs b/src/Debug.hs new file mode 100644 index 0000000000000000000000000000000000000000..6c66e0c069b2ce8c7fae62ccb78ef2ff8dc1ef94 --- /dev/null +++ b/src/Debug.hs @@ -0,0 +1,2 @@ +module Debug where +notImplemented = error "Not implemented yet" diff --git a/src/Eval.hs b/src/Eval.hs new file mode 100644 index 0000000000000000000000000000000000000000..a081f997b22b268a621178fcc8651fa5dae7a597 --- /dev/null +++ b/src/Eval.hs @@ -0,0 +1,123 @@ +module Eval where +import Types +import Debug +import Data.Maybe +import Data.List +import Debug.Trace + +eval, evalFull :: Environment -> Expr -> Expr --only focus on one expression +findVar :: Environment -> SymbolName -> Maybe Expr +findVar e v = lookup v e +addVar :: Environment -> SymbolName -> Expr -> Environment +addVar = notImplemented + +debug x = trace ("{{" ++ x ++ "}}") + +eval env expr = let (eval', evalFull') = (eval env, evalFull env) in case expr of -- evaluates expression to weak-head normal form (i.e. top level data structure is not a FuncCall) + FuncCall func inputSets args -> let (func', inputSets', args') = (eval' func, map eval' inputSets, map eval' args) in case func' of + (PredefFunc f) -> case f of + Filter -> case (inputSets', args') of + ([Set inputRecords], [predicate]) -> let func = (\r -> eval' $ FuncCall predicate [] [r]) in + Set $ filter ((==Boolean True).func) inputRecords + _ -> error "Filter argument error" + + + Map -> eval' $ case(inputSets', args') of + ([Set records], [lambda]) -> Set (map (\record -> FuncCall lambda [] [record]) records) + _ -> error "Map argument error" + + IsEqual -> case args' of -- TODO not sufficent. + [e1, e2] -> Boolean (e1 == e2) + _ -> error "isEqual argument error" + + XProduct -> eval' $ case inputSets' of + [Set l1, Set l2] -> Set $ [ x `concatRecord` y | x <- l1, y <- l2] + _ -> error "X product takes in two sets" + + (BooleanFunc op) -> case args' of + [Boolean e1, Boolean e2] -> Boolean (e1 `op` e2) + _ -> error "Boolean function takes in two arguments." + + RecordIndex -> eval' $ case args' of + [Record recordData,Int index] -> recordData `listAtIndex` index + RecordSelect -> case args' of + (Record recordData: indexes ) -> Record filteredList + where + indexesRaw = map checkInt indexes + --filteredList = map (eval'.(recordData !!)) indexesRaw + filteredList = map (eval'.listAtIndex recordData) indexesRaw + checkInt (Int i) = i + checkInt _ = error "Arguments to index record must be integers" + + + {-numberedElems = zip [1..] recordData :: [(Int, Expr)] + filtered = filter ((`elem` indexesRaw).fst) numberedElems :: [(Int, Expr)] + filteredList = map snd filtered :: [Expr]-} + + IsEmpty -> case head args' of + (String a) -> Boolean $ null a + _ -> error "IsEmpty operates on a string" + + NotEmpty -> case head args' of + (String a) -> Boolean $ (not.null) a + _ -> error "notEmpty operates on a string" + + Contains -> case args' of + [String a, String b] -> Boolean $ b `isSubList` a + _ -> error "Arguments to 'Contains' must be two strings." + + Plus -> case args' of + [String a, String b] -> String (a ++ b) + [Record a, Record b] -> Record (a ++ b) + _ -> error "Arguments to '+' must either be two strings or two records" + + Not -> case args' of + [Boolean exp1] -> Boolean $ not exp1 + (FuncDef setParams argParams body) -> eval newEnv body + where + newEnv = let (setEnv, argsEnv) = (zip setParams inputSets, zip argParams args) in setEnv ++ argsEnv ++ env + + (Var name) -> case findVar env name of + (Just value) -> eval' value + Nothing -> error $ "Variable " ++ name ++ " not found." + + + (Let _ _ expr) -> expr + + (If cond e1 e2) -> eval' $ case eval' cond of + (Boolean True) -> e1 + (Boolean False) -> e2 + + control@(Control lastResult exprs) -> if null exprs then lastResult else + let (newEnv, newControl) = evalControl1 env control in + eval newEnv newControl + + (Record exprs) -> Record $ map eval' exprs + (Set exprs) -> Set $ map eval' exprs + _ -> expr + +listAtIndex :: [a] -> Int -> a +listAtIndex recordData i | i > length recordData = error $ "Index " ++ show i ++ " too large." + | otherwise = recordData !! (i - 1) + +evalControl1 :: Environment -> Expr -> (Environment, Expr) +evalControl1 env (Control last (currentExpr:exprs)) = (newEnv, Control newLast exprs) + where + newLast = eval env $ case currentExpr of + (FuncCall func [] args) -> FuncCall func [last] args + (Let False _ _) -> last + _ -> currentExpr + + newEnv = case currentExpr of + (Let _ var expr) -> (var,expr):env + _ -> env + +evalFull = eval +--evalFull env (Set xs) = Set (map (eval env) xs) -- evaluates expression fully (not just weak head normal form) +--evalFull _ e = e +--TODO implement properly +concatRecord (Record r1) (Record r2) = Record (r1 ++ r2) + +listStartsWith, isSubList :: Eq a => [a] -> [a] -> Bool +listStartsWith = notImplemented -- check if first list starts with second list +isSubList main sub = sub `isInfixOf` main-- check if first list contains second list anywhere in it \ No newline at end of file diff --git a/src/Interpreter.hs b/src/Interpreter.hs new file mode 100644 index 0000000000000000000000000000000000000000..e9b516d07072f32432bba5438abc4e3847aaee3a --- /dev/null +++ b/src/Interpreter.hs @@ -0,0 +1,70 @@ +module Interpreter where +import Types +import Debug +import Eval +import System.IO +import CSV +import Lexer +import Parser +import Debug.Trace + +printErr s = hPutStrLn stderr ("[ERROR] " ++ s) + +parse :: String -> Program +parse = parseSource.alexScanTokens +--outline + +--main = do +-- args <- getArgs +-- case args of +-- --("default":_) -> catch (interpret "/home/patryk/dev/plc/extra-problems/ex1.cql") (errorFunc) +-- (srcname:_) -> catch (interpret srcname) (errorFunc) + --_ -> printErr "No source file specified." +-- _ -> catch (interpret "/home/patryk/dev/plc/extra-problems/ex1.cql") (errorFunc) + + + + +interpret :: FilePath -> IO () -- the main function, takes in file name, prints out result +interpret sourceFName = do + source <- readFile sourceFName + let program = parse source -- main abstract syntax tree + (env, mainExpr) <- prepare program + --print $ "Main expression " ++ show mainExpr + let finalOutput = eval env mainExpr + --print $ "Final outupt: " ++ show finalOutput + showFinal finalOutput + +prepare :: Program -> IO (Environment, Expr) -- takes in the AST for the program, reads the csv files and prepares the environment based on the CSV files +prepare (inputSets,instructions) = do + csvData <- loadInputFiles inputSets ::IO [Expr] + let env = zip inputSets csvData :: Environment + let mainExpr = Control (head csvData) instructions + return (env, mainExpr) + + where + loadInputFiles = mapM loadInputFile + + +evalFinal :: Expr -> Expr -- whnf set to ready set +evalFinal expr = trace ("Called with: " ++ show expr) (evalFull [] expr) + +showFinal :: Expr -> IO () +showFinal = (print2DList.sort2DListLex.setTo2DList) + +setTo2DList :: Expr -> [[String]] +setTo2DList (Set records) = traceShow records $ map (map (\(String s) -> s).(\(Record list) -> list)) records + + +-------------------------------------------- +loadInputFile :: SymbolName -> IO Expr +loadInputFile name = do + rawData <- readCSV (name ++ ".csv") :: IO [[String]] + let output = Set $ map toRecord rawData + return output + + where + toRecord stringList = Record $ map ((String).stripWhitespace) stringList + + + diff --git a/src/Lexer.x b/src/Lexer.x new file mode 100644 index 0000000000000000000000000000000000000000..1cab7571e7ce16a73be0c93358d4a0f99d13ab28 --- /dev/null +++ b/src/Lexer.x @@ -0,0 +1,141 @@ +{ +module Lexer where +import Data.List +} + +%wrapper "posn" +$digit = 0-9 +$posDigit = 1-9 +$alpha = [a-zA-Z] +$lower = [a-z] +$upper = [A-Z] + + +--\\map(\r -> r[1,2,3]) +tokens :- + +$white+ ; +\#.* ; +contains {\p s -> TokenContains p } +isEmpty {\p s -> TokenIsEmpty p } +filter {\p s -> TokenFilter p } +true {\p s -> TokenTrue p } +false {\p s -> TokenFalse p } +not {\p s -> TokenNot p } +let {\p s -> TokenLet p } +if {\p s -> TokenIf p } +else {\p s -> TokenElse p } +then {\p s -> TokenThen p } +\.in {\p s -> TokenInSet p } +\.out {\p s -> TokenOutSet p } +\[ {\p s -> TokenLeftSqBracket p } +\] {\p s -> TokenRightSqBracket p } +\{ {\p s -> TokenLeftBrace p } +\} {\p s -> TokenRightBrace p } +"->" {\p s -> TokenArrow p } +"==" {\p s -> TokenisEqual p } +"/=" {\p s -> TokenisNotEqual p } +"+" {\p s -> TokenPlus p } +\= {\p s -> TokenEqual p } +\( {\p s -> TokenLeftBracket p } +\) {\p s -> TokenRightBracket p } +\: {\p s -> TokenCol p } +\; {\p s -> TokenSemiCol p } +\\ {\p s -> TokenLambda p } +\, {\p s -> TokenComma p } +\. {\p s -> TokenFullStop p } +x {\p s -> TokenXProduct p } +xx {\p s -> TokenXXProduct p } +map {\p s -> TokenMap p } +$lower [$lower $digit \_ \']* {\p s -> TokenVarName p s } +$upper[$alpha $digit \_ \']* {\p s -> TokenSetName p s } +--$posDigit$digit* {\p s -> TokenPosNat p (read s) } +$digit+ {\p s -> TokenNat p (read s) } +\"[$alpha $digit]+\" {\p s -> TokenString p (init.tail $ s) } +AND {\p s -> TokenBoolAND p } +OR {\p s -> TokenBoolOR p } + + +{ +--token type: +data Token = + TokenFilter AlexPosn | + TokenIsEmpty AlexPosn | + TokenContains AlexPosn | + TokenSetName AlexPosn String | + TokenNat AlexPosn Int | + TokenPosNat AlexPosn Int | + TokenVarName AlexPosn String | + TokenTrue AlexPosn | + TokenFalse AlexPosn | + TokenNot AlexPosn | + TokenString AlexPosn String | + TokenLeftSqBracket AlexPosn | + TokenRightSqBracket AlexPosn | + TokenLeftBrace AlexPosn | + TokenRightBrace AlexPosn | + TokenArrow AlexPosn | + TokenisEqual AlexPosn | + TokenisNotEqual AlexPosn | + TokenPlus AlexPosn | + TokenLeftBracket AlexPosn | + TokenRightBracket AlexPosn | + TokenSemiCol AlexPosn | + TokenCol AlexPosn | + TokenLambda AlexPosn | + TokenComma AlexPosn | + TokenFullStop AlexPosn | + TokenInSet AlexPosn | + TokenXProduct AlexPosn | + TokenXXProduct AlexPosn | + TokenOutSet AlexPosn | + TokenMap AlexPosn | + TokenLet AlexPosn | + TokenIf AlexPosn | + TokenElse AlexPosn | + TokenThen AlexPosn | + TokenEqual AlexPosn | + TokenBoolAND AlexPosn | + TokenBoolOR AlexPosn + deriving (Eq, Show) + + +pos :: Token -> AlexPosn + +pos token = case token of + (TokenFilter p ) -> p + (TokenIsEmpty p ) -> p + (TokenContains p ) -> p + (TokenSetName p _) -> p + (TokenNat p _) -> p + (TokenPosNat p _) -> p + (TokenVarName p _) -> p + (TokenTrue p ) -> p + (TokenFalse p ) -> p + (TokenNot p ) -> p + (TokenString p _) -> p + (TokenLeftSqBracket p ) -> p + (TokenLeftBrace p ) -> p + (TokenRightSqBracket p ) -> p + (TokenArrow p ) -> p + (TokenisEqual p ) -> p + (TokenisNotEqual p ) -> p + (TokenPlus p ) -> p + (TokenLeftBracket p ) -> p + (TokenRightBracket p ) -> p + (TokenSemiCol p ) -> p + (TokenCol p ) -> p + (TokenLambda p ) -> p + (TokenComma p ) -> p + (TokenFullStop p ) -> p + (TokenInSet p ) -> p + (TokenXProduct p ) -> p + (TokenXXProduct p ) -> p + (TokenOutSet p ) -> p + (TokenMap p) -> p + (TokenLet p) -> p + (TokenElse p) -> p + (TokenIf p) -> p + (TokenThen p) -> p + (TokenEqual p) -> p +} \ No newline at end of file diff --git a/src/Parser.y b/src/Parser.y new file mode 100644 index 0000000000000000000000000000000000000000..39c3dafaac67d43faa0971b7b4828fbd9c2ac65a --- /dev/null +++ b/src/Parser.y @@ -0,0 +1,133 @@ +{ +module Parser where +import Lexer +import Types +import CSV +} + +%name parseSource +%tokentype {Token} +%error {parseError} + + +%token + filter { TokenFilter _ } + in { TokenInSet _ } + out { TokenOutSet _ } + SetName { TokenSetName _ $$ } + Nat { TokenNat _ $$ } + PosNat { TokenPosNat _ $$ } + VarName { TokenVarName _ $$ } + true { TokenTrue _ } + false { TokenFalse _ } + Str { TokenString _ $$ } + '[' { TokenLeftSqBracket _ } + '{' { TokenLeftBrace _ } + '}' { TokenRightBrace _ } + ']' { TokenRightSqBracket _ } + "->" { TokenArrow _ } + "==" { TokenisEqual _ } + "/=" { TokenisNotEqual _ } + '(' { TokenLeftBracket _ } + ')' { TokenRightBracket _ } + ';' { TokenSemiCol _ } + ':' { TokenCol _ } + '\\' { TokenLambda _ } + ',' { TokenComma _ } + '.' { TokenFullStop _ } + '+' { TokenPlus _ } + x { TokenXProduct _ } + map { TokenMap _ } + xx { TokenXXProduct _ } +-- mapr { TokenMapr _ } + not { TokenNot _ } +-- zip { TokenZip _ } + contains { TokenContains _ } +-- isSubstring { TokenIsSubstring _ } + isEmpty { TokenIsEmpty _ } + let { TokenLet _} + if { TokenIf _} + else { TokenElse _} + then { TokenThen _} + '=' { TokenEqual _} + or { TokenBoolAND _ } + and { TokenBoolOR _ } + + +%left FUNCCALL +%right INDEX +%right map filter FUNCCALL + +%left '+' +%left '==' +%left or +%left and + +%% + +Prog : in SetDecls out Instructions {($2,$4)} + +SetDecl : SetName':'Nat {$1} +SetDecls : SetDecl {[$1]} + | SetDecls','SetDecl {$3:$1} + +Instructions : Expr ';' {[$1]} + | Expr';' Instructions {$1:$3} + +Expr : + Expr '('Exprs')' %prec FUNCCALL {FuncCall $1 [] $3} + | Expr '{' Exprs '}' '('Exprs')' {FuncCall $1 $3 $6} + | Expr "==" Expr {FuncCall (PredefFunc IsEqual) [] [$1, $3]} +-- | Expr "/=" Expr {FuncCall (PredefFunc IsNotEqual) [] [$1, $3]} + | Expr x Expr {FuncCall (PredefFunc XProduct) [$1, $3] []} + | Expr '+' Expr {FuncCall (PredefFunc Plus) [$1, $3] []} + | Expr'['Expr']' %prec INDEX {FuncCall (PredefFunc RecordIndex) [] [$1, $3]} + | Expr'['Expr','Exprs']' {FuncCall (PredefFunc RecordSelect) [] ($1:$3:$5) } + | '['Exprs']' {Record $2} + | Str {Types.String $ stripWhitespace $1} + | '\\' '(' VarNames ')' "->" Expr { FuncDef [] $3 $6 } + | if '('Expr')' then Expr else Expr {If $3 $6 $8} + | let SetName '=' Expr {Let True $2 $4} + | let VarName '=' Expr {Let False $2 $4} + | VarName {Var $1} + | SetName {Var $1} + | Nat {Types.Int $1} + | true {Boolean True} + | PredefFunc {PredefFunc $1} + | Expr or Expr {booleanExpr (||) $1 $3} + | Expr and Expr {booleanExpr (&&) $1 $3} + +PredefFunc : isEmpty {IsEmpty} + | filter {Filter} + | contains {Contains} + | isEmpty {IsEmpty} +-- | nEmpty {IsEmpty} + | map {Map} + | not {Not} +-- | and {And} +-- | or {Or} +-- | zip {Zip} +-- | Mapr + +SetNames : SetName {[$1]} + | SetName ',' SetNames { $1:$3 } + + +VarNames : VarName {[$1]} + | VarName ',' VarNames {$1:$3} + +Exprs : Expr {[$1]} + | Expr','Exprs {$1:$3} + +Nats : Nat {[$1]} + | Nat ',' Nats {$1:$3} + +{ + +parseError :: [Token] -> a +parseError tokens = error $ "Parse error: " ++ (show.pos.head) tokens + +tokenPosn :: Token -> String +tokenPosn t = let (AlexPn _ line col) = pos t in (show line) ++ ':' : (show col) + +} diff --git a/src/Types.hs b/src/Types.hs new file mode 100644 index 0000000000000000000000000000000000000000..9e5d4bc63b6fbfdda69415d9187cc54b7e1e0103 --- /dev/null +++ b/src/Types.hs @@ -0,0 +1,70 @@ +module Types where +type Program = (InputSymbols,Output) + +type SymbolName = String +type InputSymbols = [String] -- .in section +type Environment = [(SymbolName, Expr)] -- (result of last calculation, other bindings e.g. let statements) + +--data OutputLine = Let SymbolName Expr | Expr -- Expr always a function call for a set function, but not enforced by AST. +type Output = [Expr] +data PredefFunc = XProduct | XXProduct | IsEqual | IsNotEqual | Plus --operators + | Map | Filter + | RecordIndex -- [] operator + | RecordSelect + | IsEmpty | NotEmpty | Contains -- string functions + | Length + | Not + | BooleanFunc (Bool -> Bool -> Bool) + --TODO others:reverse +instance Show PredefFunc where + show XProduct = "XProduct" + show XXProduct = "XXProduct" + show IsEqual = "IsEqual" + show IsNotEqual = "IsNotEqual" + show Plus = "Plus" + show Map = "Map" + show Filter = "Filter" + show RecordIndex = "RecordIndex" + show RecordSelect = "RecordSelect" + show IsEmpty = "IsEmpty" + show NotEmpty = "NotEmpty" + show Contains = "Contains" + show Length = "Length" + show Not = "Not" + show (BooleanFunc _) = "BooleanFunc" + + +--n.b. these definitions do not enforce type checking! The use of any function or operator will be associated with a FuncCall. +--Therefore, it is possible to have ASTs with ridiculous things where e.g. a XX product is applied to and int and a String, or a function call has an Int as a function! +--These issues can be checked via the type checker after the AST has been built. + +-- filter [A] (\(r) -> r[1] == "hello") +-- FuncCall (PredefFunc Filter) [Var "A"] [FuncDef [] ["r"] [(FuncCall (PredefFunc IsEqual) [] (r[1]) ("hello"))]] + +data Expr = Control Expr [Expr] -- result of last computation, sequence of instructions + | FuncCall {func::Expr, inputSets::[Expr], args::[Expr]} -- args: function to call, input sets, extra argument. Applicable for all kinds of functions: the set functions e.g. filter, map or simple functions or even the operators e.g. ==, /= ! We don't define separate expr for each operator + | FuncDef {inputSetNames::[SymbolName], argsNames::[SymbolName], body::Expr} -- function definition + | If Expr Expr Expr + | Set [Expr] + | Tuple [Expr] + | Record [Expr] + | PredefFunc PredefFunc + | Var SymbolName + | Int Int + | String String + | Boolean Bool + | Let Bool SymbolName Expr + +-- | Nat Int + deriving (Show) +booleanExpr func e1 e2 = FuncCall (PredefFunc $ BooleanFunc func) [] [e1,e2] + +instance Eq Expr where + --(Record r1) == (Record r2) = and (zipWith (==) r1 r2) + --(String s1) == (String s2) = and (zipWith (==) s1 s2) + (Record r1) == (Record r2) = r1 == r2; + (String s1) == (String s2) = s1 == s2 + (Boolean b1) == (Boolean b2) = b1 == b2 + (Int v1) == (Int v2) = v1 == v2 + _ == _ = error "Comparison of incompatible types" +data Parameter = NamedParam SymbolName | TupleMatch [SymbolName] \ No newline at end of file diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05d682bd1cf62ea68739357688829a6dcbc8cf8c --- /dev/null +++ b/stack.yaml @@ -0,0 +1,67 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# https://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# +# The location of a snapshot can be provided as a file or url. Stack assumes +# a snapshot provided as a file might change, whereas a url resource does not. +# +# resolver: ./custom-snapshot.yaml +# resolver: https://example.com/snapshots/2018-01-01.yaml +resolver: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/10.yaml + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# subdirs: +# - auto-update +# - wai +packages: +- . +# Dependency packages to be pulled from upstream that are not in the resolver. +# These entries can reference officially published versions as well as +# forks / in-progress versions pinned to a git hash. For example: +# +# extra-deps: +# - acme-missiles-0.3 +# - git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# +# extra-deps: [] + +# Override default flag values for local packages and extra-deps +# flags: {} + +# Extra package databases containing global packages +# extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=2.5" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor diff --git a/stack.yaml.lock b/stack.yaml.lock new file mode 100644 index 0000000000000000000000000000000000000000..12aa4254388d7d0ec32833a2cb30a1e26988a65e --- /dev/null +++ b/stack.yaml.lock @@ -0,0 +1,13 @@ +# This file was autogenerated by Stack. +# You should not edit this file by hand. +# For more information, please see the documentation at: +# https://docs.haskellstack.org/en/stable/lock_files + +packages: [] +snapshots: +- completed: + size: 567241 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/10.yaml + sha256: 321b3b9f0c7f76994b39e0dabafdc76478274b4ff74cc5e43d410897a335ad3b + original: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/10.yaml diff --git a/test/Spec.hs b/test/Spec.hs new file mode 100644 index 0000000000000000000000000000000000000000..cd4753fc9c10722ad5c3ec4fd34de99972243b6c --- /dev/null +++ b/test/Spec.hs @@ -0,0 +1,2 @@ +main :: IO () +main = putStrLn "Test suite not yet implemented"