From 74461d9898eb3a5c5a0a7a1fac593f3dec3251af Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 1 May 2019 11:40:05 +0100 Subject: [PATCH 01/12] Add SAS lexer and accompanying files --- lib/rouge/demos/sas | 13 + lib/rouge/lexers/sas.rb | 568 ++++++++++++++++++++++++++++++++++++++++ spec/lexers/sas_spec.rb | 20 ++ spec/visual/samples/sas | 71 +++++ 4 files changed, 672 insertions(+) create mode 100644 lib/rouge/demos/sas create mode 100644 lib/rouge/lexers/sas.rb create mode 100644 spec/lexers/sas_spec.rb create mode 100644 spec/visual/samples/sas diff --git a/lib/rouge/demos/sas b/lib/rouge/demos/sas new file mode 100644 index 0000000000..24ac5aca0c --- /dev/null +++ b/lib/rouge/demos/sas @@ -0,0 +1,13 @@ +data sim; + do i = 1 to 100; + x1 = rand("Normal"); + x2 = rand("Binomial", 0.5, 100); + output; + end; +run; + +proc means data=sashelp.class; + class sex; + var height weight; + output out = mean_by_sex; +run; \ No newline at end of file diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb new file mode 100644 index 0000000000..4d01df30d3 --- /dev/null +++ b/lib/rouge/lexers/sas.rb @@ -0,0 +1,568 @@ +# -*- coding: utf-8 -*- # + +module Rouge + module Lexers + class SAS < RegexLexer + title "SAS" + desc "SAS (Statistical Analysis Software)" + tag 'sas' + filenames '*.sas' + mimetypes *%w( + application/x-sas + application/x-stat-sas + application/x-sas-syntax + ) + + def self.data_step_statements + # from Data step statements - SAS 9.4 Statements reference + # http://support.sas.com/documentation/cdl/en/lestmtsref/68024/PDF/default/lestmtsref.pdf + @keywords ||= %w( + ABORT ARRAY ATTRIB BY CALL CARDS CARDS4 CATNAME CHECKPOINT + EXECUTE_ALWAYS CONTINUE DATA DATALINES DATALINES4 DELETE DESCRIBE + DISPLAY DM DO UNTIL WHILE DROP END ENDSAS ERROR EXECUTE FILE FILENAME + FOOTNOTE FORMAT GO TO IF THEN ELSE INFILE INFORMAT INPUT + KEEP LABEL LEAVE LENGTH LIBNAME LINK LIST LOCK LOSTCARD MERGE + MISSING MODIFY OPTIONS OUTPUT PAGE PUT PUTLOG REDIRECT REMOVE RENAME + REPLACE RESETLINE RETAIN RETURN RUN SASFILE SELECT SET SKIP STOP + SYSECHO TITLE UPDATE WHERE WINDOW X + ) + # label: + # Sum + end + + def self.sas_functions + # from SAS 9.4 Functions and CALL Routines reference + # http://support.sas.com/documentation/cdl/en/lefunctionsref/67960/PDF/default/lefunctionsref.pdf + @sasfns ||= %w( + ABS ADDR ADDRLONG AIRY ALLCOMB ALLPERM ANYALNUM ANYALPHA ANYCNTRL + ANYDIGIT ANYFIRST ANYGRAPH ANYLOWER ANYNAME ANYPRINT ANYPUNCT + ANYSPACE ANYUPPER ANYXDIGIT ARCOS ARCOSH ARSIN ARSINH ARTANH ATAN + ATAN2 ATTRC ATTRN BAND BETA BETAINV BLACKCLPRC BLACKPTPRC + BLKSHCLPRC BLKSHPTPRC BLSHIFT BNOT BOR BRSHIFT BXOR BYTE CAT CATQ + CATS CATT CATX CDF CEIL CEILZ CEXIST CHAR CHOOSEC CHOOSEN CINV + CLOSE CMISS CNONCT COALESCE COALESCEC COLLATE COMB COMPARE COMPBL + COMPFUZZ COMPGED COMPLEV COMPOUND COMPRESS CONSTANT CONVX CONVXP + COS COSH COT COUNT COUNTC COUNTW CSC CSS CUMIPMT CUMPRINC CUROBS + CV DACCDB DACCDBSL DACCSL DACCSYD DACCTAB DAIRY DATDIF DATE + DATEJUL DATEPART DATETIME DAY DCLOSE DCREATE DEPDB DEPDBSL DEPSL + DEPSYD DEPTAB DEQUOTE DEVIANCE DHMS DIF DIGAMMA DIM DINFO DIVIDE + DNUM DOPEN DOPTNAME DOPTNUM DOSUBL DREAD DROPNOTE DSNAME + DSNCATLGD DUR DURP EFFRATE ENVLEN ERF ERFC EUCLID EXIST EXP FACT + FAPPEND FCLOSE FCOL FCOPY FDELETE FETCH FETCHOBS FEXIST FGET + FILEEXIST FILENAME FILEREF FINANCE FIND FINDC FINDW FINFO FINV + FIPNAME FIPNAMEL FIPSTATE FIRST FLOOR FLOORZ FMTINFO FNONCT FNOTE + FOPEN FOPTNAME FOPTNUM FPOINT FPOS FPUT FREAD FREWIND FRLEN FSEP + FUZZ FWRITE GAMINV GAMMA GARKHCLPRC GARKHPTPRC GCD GEODIST + GEOMEAN GEOMEANZ GETOPTION GETVARC GETVARN GRAYCODE HARMEAN + HARMEANZ HBOUND HMS HOLIDAY HOLIDAYCK HOLIDAYCOUNT HOLIDAYNAME + HOLIDAYNX HOLIDAYNY HOLIDAYTEST HOUR HTMLDECODE HTMLENCODE + IBESSEL IFC IFN INDEX INDEXC INDEXW INPUT INPUTC INPUTN INT + INTCINDEX INTCK INTCYCLE INTFIT INTFMT INTGET INTINDEX INTNX + INTRR INTSEAS INTSHIFT INTTEST INTZ IORCMSG IPMT IQR IRR JBESSEL + JULDATE JULDATE7 KURTOSIS LAG LARGEST LBOUND LCM LCOMB LEFT + LENGTH LENGTHC LENGTHM LENGTHN LEXCOMB LEXCOMBI LEXPERK LEXPERM + LFACT LGAMMA LIBNAME LIBREF LOG LOG1PX LOG10 LOG2 LOGBETA LOGCDF + LOGISTIC LOGPDF LOGSDF LOWCASE LPERM LPNORM MAD MARGRCLPRC + MARGRPTPRC MAX MD5 MDY MEAN MEDIAN MIN MINUTE MISSING MOD + MODEXIST MODULE MODULEC MODULEN MODZ MONTH MOPEN MORT MSPLINT + MVALID N NETPV NLITERAL NMISS NOMRATE NORMAL NOTALNUM NOTALPHA + NOTCNTRL NOTDIGIT NOTE NOTFIRST NOTGRAPH NOTLOWER NOTNAME + NOTPRINT NOTPUNCT NOTSPACE NOTUPPER NOTXDIGIT NPV NVALID NWKDOM + OPEN ORDINAL PATHNAME PCTL PDF PEEK PEEKC PEEKCLONG PEEKLONG PERM + PMT POINT POISSON PPMT PROBBETA PROBBNML PROBBNRM PROBCHI PROBF + PROBGAM PROBHYPR PROBIT PROBMC PROBNEGB PROBNORM PROBT PROPCASE + PRXCHANGE PRXMATCH PRXPAREN PRXPARSE PRXPOSN PTRLONGADD PUT PUTC + PUTN PVP QTR QUANTILE QUOTE RANBIN RANCAU RAND RANEXP RANGAM + RANGE RANK RANNOR RANPOI RANTBL RANTRI RANUNI RENAME REPEAT + RESOLVE REVERSE REWIND RIGHT RMS ROUND ROUNDE ROUNDZ SAVING + SAVINGS SCAN SDF SEC SECOND SHA256 SHA256HEX SHA256HMACHEX SIGN + SIN SINH SKEWNESS SLEEP SMALLEST SOAPWEB SOAPWEBMETA + SOAPWIPSERVICE SOAPWIPSRS SOAPWS SOAPWSMETA SOUNDEX SPEDIS SQRT + SQUANTILE STD STDERR STFIPS STNAME STNAMEL STRIP SUBPAD SUBSTR + SUBSTRN SUM SUMABS SYMEXIST SYMGET SYMGLOBL SYMLOCAL SYSEXIST + SYSGET SYSMSG SYSPARM SYSPROCESSID SYSPROCESSNAME SYSPROD SYSRC + SYSTEM TAN TANH TIME TIMEPART TIMEVALUE TINV TNONCT TODAY + TRANSLATE TRANSTRN TRANWRD TRIGAMMA TRIM TRIMN TRUNC TSO TYPEOF + TZONEID TZONENAME TZONEOFF TZONES2U TZONEU2S UNIFORM UPCASE + URLDECODE URLENCODE USS UUIDGEN VAR VARFMT VARINFMT VARLABEL + VARLEN VARNAME VARNUM VARRAY VARRAYX VARTYPE VERIFY VFORMAT + VFORMATD VFORMATDX VFORMATN VFORMATNX VFORMATW VFORMATWX VFORMATX + VINARRAY VINARRAYX VINFORMAT VINFORMATD VINFORMATDX VINFORMATN + VINFORMATNX VINFORMATW VINFORMATWX VINFORMATX VLABEL VLABELX + VLENGTH VLENGTHX VNAME VNAMEX VTYPE VTYPEX VVALUE VVALUEX WEEK + WEEKDAY WHICHC WHICHN WTO YEAR YIELDP YRDIF YYQ ZIPCITY + ZIPCITYDISTANCE ZIPFIPS ZIPNAME ZIPNAMEL ZIPSTATE + ) + end + + def self.sas_macro_statements + # from SAS 9.4 Macro Language Reference + # Chapter 12 + + @macro_sts ||= %w( + %COPY %DISPLAY %GLOBAL %INPUT %LET %MACRO %PUT %SYMDEL %SYSCALL + %SYSEXEC %SYSLPUT %SYSMACDELETE %SYSMSTORECLEAR %SYSRPUT %WINDOW + %ABORT %DO %TO %UNTIL %WHILE %END %GOTO %IF %THEN %ELSE %LOCAL + %RETURN + %INCLUDE %LIST %RUN + ) + # Omitted: + # %label: Identifies the destination of a %GOTO statement. + # %MEND + end + + def self.sas_macro_functions + # from SAS 9.4 Macro Language Reference + # Chapter 12 + + @macro_fns ||= %w( + %BQUOTE %NRBQUOTE %EVAL %INDEX %LENGTH %QUOTE %NRQUOTE %SCAN + %QSCAN %STR %NRSTR %SUBSTR %QSUBSTR %SUPERQ %SYMEXIST %SYMGLOBL + %SYMLOCAL %SYSEVALF %SYSFUNC %QSYSFUNC %SYSGET %SYSMACEXEC + %SYSMACEXIST %SYSMEXECDEPTH %SYSMEXECNAME %SYSPROD %UNQUOTE + %UPCASE %QUPCASE + ) + end + + def self.sas_auto_macro_vars + # from SAS 9.4 Macro Language Reference + # Chapter 12 + + @macro_auto_vars ||= %w( + &SYSADDRBITS &SYSBUFFR &SYSCC &SYSCHARWIDTH &SYSCMD &SYSDATASTEPPHASE &SYSDATE + &SYSDATE9 &SYSDAY &SYSDEVIC &SYSDMG &SYSDSN &SYSENCODING &SYSENDIAN &SYSENV + &SYSERR &SYSERRORTEXT &SYSFILRC &SYSHOSTINFOLONG &SYSHOSTNAME &SYSINDEX + &SYSINFO &SYSJOBID &SYSLAST &SYSLCKRC &SYSLIBRC &SYSLOGAPPLNAME &SYSMACRONAME + &SYSMENV &SYSMSG &SYSNCPU &SYSNOBS &SYSODSESCAPECHAR &SYSODSPATH &SYSPARM + &SYSPBUFF &SYSPRINTTOLIST &SYSPRINTTOLOG &SYSPROCESSID &SYSPROCESSMODE + &SYSPROCESSNAME &SYSPROCNAME &SYSRC &SYSSCP &SYSSCPL &SYSSITE &SYSSIZEOFLONG + &SYSSIZEOFPTR &SYSSIZEOFUNICODE &SYSSTARTID &SYSSTARTNAME &SYSTCPIPHOSTNAME + &SYSTIME &SYSTIMEZONE &SYSTIMEZONEIDENT &SYSTIMEZONEOFFSET &SYSUSERID &SYSVER + &SYSVLONG &SYSVLONG4 &SYSWARNINGTEXT + ) + end + + # Create a hash with keywords for common PROCs, keyed by PROC name + PROC_KEYWORDS = Hash.new + + PROC_KEYWORDS["sql"] = %w( + ALTER TABLE CONNECT CREATE INDEX VIEW DELETE DESCRIBE DISCONNECT DROP EXECUTE + INSERT RESET SELECT UPDATE VALIDATE ADD CONSTRAINT DROP FOREIGN KEY PRIMARY + MODIFY LIKE AS ORDER BY USING FROM INTO SET VALUES RESET DISTINCT UNIQUE + WHERE GROUP HAVING LEFT RIGHT INNER JOIN ON + ) + # from SAS 9.4 SQL Procedure User's Guide + + PROC_KEYWORDS["means"] = %w( + BY CLASS FREQ ID OUTPUT OUT TYPES VAR WAYS WEIGHT + ATTRIB FORMAT LABEL WHERE + DESCENDING NOTSORTED + NOTHREADS NOTRAP PCTLDEF SUMSIZE THREADS CLASSDATA COMPLETETYPES + EXCLUSIVE MISSING FW MAXDEC NONOBS NOPRINT ORDER FORMATTED FREQ + UNFORMATTED PRINT PRINTALLTYPES PRINTIDVARS STACKODSOUTPUT + CHARTYPE DESCENDTYPES IDMIN + ALPHA EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF + CLM CSS CV KURTOSIS KURT LCLM MAX MEAN MIN MODE N + NMISS RANGE SKEWNESS SKEW STDDEV STD STDERR SUM SUMWGT UCLM USS VAR + MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 + P80 QRANGE + PROBT PRT T + ASCENDING GROUPINTERNAL MLF PRELOADFMT + MAXID AUTOLABEL AUTONAME KEEPLEN LEVELS NOINHERIT + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["datasets"] = %w( + AGE APPEND ATTRIB AUDIT CHANGE CONTENTS COPY DELETE EXCHANGE + EXCLUDE FORMAT IC CREATE DELETE REACTIVATE INDEX CENTILES INFORMAT + INITIATE LABEL LOG MODIFY REBUILD RENAME REPAIR RESUME SAVE SELECT + SUSPEND TERMINATE USER_VAR XATTR ADD OPTIONS REMOVE SET + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["sort"] = %w( + BY DESCENDING KEY ASCENDING ASC DESC DATECOPY FORCE OVERWRITE + PRESORTED SORTSIZE TAGSORT DUPOUT OUT UNIQUEOUT NODUPKEY NOUNIQUEKEY + NOTHREADS THREADS EQUALS NOEQUALS + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["print"] = %w( + BY DESCENDING NOTSORTED PAGEBY SUMBY ID STYLE SUM VAR CONTENTS DATA + GRANDTOTAL_LABEL HEADING LABEL SPLIT SUMLABEL NOSUMLABEL + BLANKLINE COUNT DOUBLE N NOOBS OBS ROUND + ROWS UNIFORM WIDTH + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["append"] = %w( + BASE APPENDVER DATA ENCRYPTKEY FORCE GETSORT NOWARN + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["transpose"] = %w( + DELIMITER LABEL LET NAME OUT PREFIX SUFFIX BY DESCENDING NOTSORTED + COPY ID IDLABEL VAR INDB + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["freq"] = %w( + BY EXACT OUTPUT TABLES TEST WEIGHT + COMPRESS DATA FORMCHAR NLEVELS NOPRINT ORDER PAGE FORMATTED FREQ + INTERNAL + AGREE BARNARD BINOMIAL BIN CHISQ COMOR EQOR ZELEN FISHER JT KAPPA + KENTB TAUB LRCHI MCNEM MEASURES MHCHI OR ODDSRATIO PCHI PCORR RELRISK + RISKDIFF SCORR SMDCR SMDRC STUTC TAUC TREND WTKAP WTKAPPA + OUT AJCHI ALL BDCHI CMH CMH1 CMH2 CMHCOR CMHGA CMHRMS COCHQ CONTGY + CRAMV EQKAP EQWKP GAMMA GS GAILSIMON LAMCR LAMDAS LAMRC LGOR LGRRC1 + LGRRC2 MHOR MHRRC1 MHRRC2 N NMISS PHI PLCORR RDIF1 RDIF2 RISKDIFF1 + RISKDIFF2 RRC1 RELRISK1 RRC2 RELRISK2 RSK1 RISK1 RSK11 RISK11 RSK12 + RISK12 RSK21 RISK21 RSK22 RISK22 TSYMM BOWKER U UCR URC + CELLCHI2 CUMCOL DEVIATION EXPECTED MISSPRINT PEARSONREF PRINTWKTS + SCOROUT SPARSE STDRES TOTPCT + CONTENTS CROSSLIST FORMAT LIST MAXLEVELS NOCOL NOCUM NOFREQ NOPERCENT + NOPRINT NOROW NOSPARSE NOWARN PLOTS OUT OUTCUM OUTEXPECT OUTPCT + ZEROS + ) + # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition + + PROC_KEYWORDS["corr"] = %w( + BY FREQ ID PARTIAL VAR WEIGHT WITH + DATA OUTH OUTK OUTP OUTPLC OUTPLS OUTS + EXCLNPWGHT FISHER HOEFFDING KENDALL NOMISS PEARSON POLYCHORIC + POLYSERIAL ALPHA COV CSSCP SINGULAR SSCP VARDEF PLOTS MATRIX SCATTER + BEST NOCORR NOPRINT NOPROB NOSIMPLE RANK + ) + # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition + + PROC_KEYWORDS["report"] = %w( + BREAK BY DESCENDING NOTSORTED COLUMN COMPUTE STYLE LINE ENDCOMP + CALL DEFINE _ROW_ FREQ RBREAK WEIGHT + ATTRIB FORMAT LABEL WHERE + DATA NOALIAS NOCENTER NOCOMPLETECOLS NOCOMPLETEROWS NOTHREADS + NOWINDOWS OUT PCTLDEF THREADS WINDOWS COMPLETECOLS NOCOMPLETECOLS + COMPLETEROWS NOCOMPLETEROWS CONTENTS SPANROWS COMMAND HELP PROMPT + BOX BYPAGENO CENTER NOCENTER COLWIDTH FORMCHAR LS MISSING PANELS PS + PSPACE SHOWALL SPACING WRAP EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF + NAMED NOHEADER SPLIT HEADLINE HEADSKIP LIST NOEXEC OUTREPT PROFILE + REPORT + COLOR DOL DUL OL PAGE SKIP SUMMARIZE SUPPRESS UL + BLINK COMMAND HIGHLIGHT RVSVIDEO MERGE REPLACE URL URLBP URLP + AFTER BEFORE _PAGE_ LEFT RIGHT CHARACTER LENGTH + EXCLUSIVE MISSING MLF ORDER DATA FORMATTED FREQ INTERNAL PRELOADFMT + WIDTH + ACROSS ANALYSIS COMPUTED DISPLAY GROUP ORDER + CONTENTS FLOW ID NOPRINT NOZERO PAGE + CSS CV MAX MEAN MIN MODE N NMISS PCTN PCTSUM RANGE STD STDERR SUM + SUMWGT USS VAR + MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 + P80 QRANGE + PROBT PRT T + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + PROC_KEYWORDS["metalib"] = %w( + OMR DBAUTH DBUSER DBPASSWORD EXCLUDE SELECT READ FOLDER FOLDERID + IMPACT_LIMIT NOEXEC PREFIX REPORT UPDATE_RULE DELETE NOADD NODELDUP + NOUPDATE + LIBID LIBRARY LIBURI + TYPE DETAIL SUMMARY + ) + # from SAS 9.4 Language Interfaces to Metadata, Third Edition + + PROC_KEYWORDS["gchart"] = %w( + DATA ANNOTATE GOUT IMAGEMAP BLOCK HBAR HBAR3D VBAR VBAR3D PIE PIE3D + DONUT STAR ANNO + BY NOTE FORMAT LABEL WHERE + BLOCKMAX CAXIS COUTLINE CTEXT LEGEND NOHEADING NOLEGEND PATTERNID + GROUP MIDPOINT SUBGROUP WOUTLINE DESCRIPTION NAME DISCRETE LEVELS + OLD MISSING HTML_LEGEND HTML URL FREQ G100 SUMVAR TYPE + CAUTOREF CERROR CFRAME CLM CREF FRAME NOFRAME GSPACE IFRAME + IMAGESTYLE TILE FIT LAUTOREF NOSYMBOL PATTERNID SHAPE SPACE + SUBOUTSIDE WAUTOREF WIDTH WOUTLINE WREF + ASCENDING AUTOREF CLIPREF DESCENDING FRONTREF GAXIS MAXIS MINOR + NOAXIS NOBASEREF NOZERO RANGE AXIS REF CFREQ CFREQLABEL NONE CPERCENT + CPERCENTLABEL ERRORBAR BARS BOTH TOP FREQLABEL INSIDE MEAN MEANLABEL + NOSTATS OUTSIDE PERCENT PERCENTLABEL PERCENTSUM SUM + CFILL COUTLINE DETAIL_RADIUS EXPLODE FILL SOLID X INVISIBLE NOHEADING + RADIUS WOUTLINE DETAIL_THRESHOLD DETAIL_PERCENT DETAIL_SLICE + DETAIL_VALUE DONUTPCT LABEL ACROSS DOWN GROUP NOGROUPHEADING SUBGROUP + MATCHCOLOR OTHERCOLOR OTHERLABEL PERCENT ARROW PLABEL PPERCENT SLICE + VALUE + ANGLE ASCENDING CLOCKWISE DESCENDING JSTYLE + NOCONNECT STARMAX STARMIN + ) + # from SAS GRAPH 9.4 Reference, Fourth Edition + + PROC_KEYWORDS["gplot"] = %w( + DATA ANNOTATE GOUT IMAGEMAP UNIFORM BUBBLE BUBBLE2 PLOT PLOT2 + BCOLOR BFILL BFONT BLABEL BSCALE AREA RADIUS BSIZE DESCRIPTION NAME + AUTOHREF CAUTOHREF CHREF HAXIS HMINOR HREF HREVERSE HZERO LAUTOHREF + LHREF WAUTOHREF WHREF HTML URL + CAXIS CFRAME CTEXT DATAORDER FRAME NOFRAME FRONTREF GRID IFRAME + IMAGESTYLE TILE FIT NOAXIS + AUTOVREF CAUTOVREF CVREF LAUTOVREF LVREF VAXIS VMINOR VREF VREVERSE + VZERO WAUTOVREF WVREF + CBASELINE COUTLINE + AREAS GRID LEGEND NOLASTAREA NOLEGEND OVERLAY REGEQN SKIPMISS + ) + # from SAS GRAPH 9.4 Reference, Fourth Edition + + PROC_KEYWORDS["reg"] = %w( + MODEL BY FREQ ID VAR WEIGHT ADD CODE DELETE MTEST OUTPUT PAINT + PLOT PRINT REFIT RESTRICT REWEIGHT STORE TEST + ) + # from SAS/STAT 15.1 User's Guide + + PROC_KEYWORDS["sgplot"] = %w( + STYLEATTRS BAND X Y UPPER LOWER BLOCK BUBBLE DENSITY DOT DROPLINE + ELLIPSE ELLIPSEPARM FRINGE GRADLEGEND HBAR HBARBASIC HBARPARM + HBOX HEATMAP HEATMAPPARM HIGHLOW HISTOGRAM HLINE INSET KEYLEGEND + LINEPARM LOESS NEEDLE PBSPLINE POLYGON REFLINE REG SCATTER SERIES + SPLINE STEP SYMBOLCHAR SYMBOLIMAGE TEXT VBAR VBARBASIC VBARPARM + VBOX VECTOR VLINE WATERFALL XAXIS X2AXIS XAXISTABLE YAXIS Y2AXIS + YAXISTABLE + ) + # from ODS Graphics: Procedures Guide, Sixth Edition + + def self.sas_proc_names + # from SAS Procedures by Name + # http://support.sas.com/documentation/cdl/en/allprodsproc/68038/HTML/default/viewer.htm#procedures.htm + + @proc_names ||= %w( + ACCESS ACECLUS ADAPTIVEREG ALLELE ANOM ANOVA APPEND APPSRV ARIMA + AUTHLIB AUTOREG BCHOICE BOM BOXPLOT BTL BUILD CALENDAR CALIS CALLRFC + CANCORR CANDISC CAPABILITY CASECONTROL CATALOG CATMOD CDISC CDISC + CHART CIMPORT CLP CLUSTER COMPARE COMPILE COMPUTAB CONTENTS CONVERT + COPULA COPY CORR CORRESP COUNTREG CPM CPORT CUSUM CV2VIEW DATEKEYS + DATASETS DATASOURCE DB2EXT DB2UTIL DBCSTAB DBF DBLOAD DELETE DIF + DISCRIM DISPLAY DISTANCE DMSRVADM DMSRVDATASVC DMSRVPROCESSSVC + DOCUMENT DOWNLOAD DQLOCLST DQMATCH DQSCHEME DS2 DTREE ENTROPY ESM + EXPAND EXPLODE EXPORT FACTEX FACTOR FAMILY FASTCLUS FCMP FEDSQL FMM + FONTREG FORECAST FORMAT FORMS FREQ FSBROWSE FSEDIT FSLETTER FSLIST + FSVIEW G3D G3GRID GA GAM GAMPL GANNO GANTT GAREABAR GBARLINE GCHART + GCONTOUR GDEVICE GEE GENESELECT GENMOD GEOCODE GFONT GINSIDE GIS GKPI + GLIMMIX GLM GLMMOD GLMPOWER GLMSELECT GMAP GOPTIONS GPLOT GPROJECT + GRADAR GREDUCE GREMOVE GREPLAY GROOVY GSLIDE GTILE HADOOP HAPLOTYPE + HDMD HPBIN HPCANDISC HPCDM HPCOPULA HPCORR HPCOUNTREG HPDMDB HPDS2 + HPFMM HPGENSELECT HPIMPUTE HPLMIXED HPLOGISTIC HPMIXED HPNLMOD + HPPANEL HPPLS HPPRINCOMP HPQUANTSELECT HPQLIM HPREG HPSAMPLE + HPSEVERITY HPSPLIT HPSUMMARY HTSNP HTTP ICLIFETEST ICPHREG IML IMPORT + IMSTAT IMXFER INBREED INFOMAPS INTPOINT IOMOPERATE IRT ISHIKAWA ITEMS + JAVAINFO JSON KDE KRIGE2D LASR LATTICE LIFEREG LIFETEST LOAN + LOCALEDATA LOESS LOGISTIC LP LUA MACONTROL MAPIMPORT MCMC MDC MDDB + MDS MEANS METADATA METALIB METAOPERATE MI MIANALYZE MIGRATE MIXED + MODECLUS MODEL MSCHART MULTTEST MVPDIAGNOSE MVPMODEL MVPMONITOR + NESTED NETDRAW NETFLOW NLIN NLMIXED NLP NPAR1WAY ODSLIST ODSTABLE + ODSTEXT OLAP OLAPCONTENTS OLAPOPERATE OPERATE OPTEX OPTGRAPH OPTIONS + OPTLOAD OPTLP OPTLSO OPTMILP OPTMODEL OPTNET OPTQP OPTSAVE ORTHOREG + PANEL PARETO PDLREG PDS PDSCOPY PHREG PLAN PLM PLOT PLS PM PMENU + POWER PRESENV PRINCOMP PRINQUAL PRINT PRINTTO PROBIT PROTO PRTDEF + PRTEXP PSMOOTH PWENCODE QDEVICE QLIM QUANTLIFE QUANTREG QUANTSELECT + QUEST RANK RAREEVENTS RDC RDPOOL RDSEC RECOMMEND REG REGISTRY RELEASE + RELIABILITY REPORT RISK ROBUSTREG RSREG SCAPROC SCORE SEQDESIGN + SEQTEST SERVER SEVERITY SGDESIGN SGPANEL SGPLOT SGRENDER SGSCATTER + SHEWHART SIM2D SIMILARITY SIMLIN SIMNORMAL SOAP SORT SOURCE SPECTRA + SPP SQL SQOOP SSM STANDARD STATESPACE STDIZE STDRATE STEPDISC STP + STREAM SUMMARY SURVEYFREQ SURVEYIMPUTE SURVEYLOGISTIC SURVEYMEANS + SURVEYPHREG SURVEYREG SURVEYSELECT SYSLIN TABULATE TAPECOPY TAPELABEL + TEMPLATE TIMEDATA TIMEID TIMEPLOT TIMESERIES TPSPLINE TRANSPOSE + TRANSREG TRANTAB TREE TSCSREG TTEST UCM UNIVARIATE UPLOAD VARCLUS + VARCOMP VARIOGRAM VARMAX VASMP X11 X12 X13 XSL + ) + end + + state :basics do + # Rules to be parsed before the keywords (which are different depending + # on the context) + + rule /\s+/m, Text + + # Single-line comments (between * and ;) - these can actually go onto multiple lines + # case 1 - where it starts a line + rule /^\s*%?\*[^;]*;/m, Comment::Single + # case 2 - where it follows the previous statement on the line (after a semicolon) + rule /(;)(\s*)(%?\*[^;]*;)/m do + groups Punctuation, Text, Comment::Single + end + + # True multiline comments! + rule %r(/[*].*?[*]/)m, Comment::Multiline + + # date/time constants (Language Reference pp91-2) + rule /'[0-9a-z]+?'d/i, Literal::Date + rule /'.+?'dt/i, Literal::Date + rule /'[0-9:]+?([a|p]m)?'t/i, Literal::Date + + rule /'/, Str::Single, :single_string + rule /"/, Str::Double, :double_string + rule /&[a-z0-9_&.]+/i, Name::Variable + + # numeric constants (Language Reference p91) + rule /\d[0-9a-f]*x/i, Num::Hex + rule /\d[0-9e\-.]+/i, Num # scientific notation + + # auto variables from DATA step (Language Reference p46, p37) + rule /\b(_n_|_error_|_file_|_infile_|_msg_|_iorc_|_cmd_)\b/i, Name::Builtin::Pseudo + + # auto variable list names + rule /\b(_character_|_numeric_|_all_)\b/i, Name::Builtin + + # datalines/cards etc + rule /\b(datalines|cards)(\s*)(;)/i do + groups Keyword, Text, Punctuation + push :datalines + end + rule /\b(datalines4|cards4)(\s*)(;)/i do + groups Keyword, Text, Punctuation + push :datalines4 + end + + + # operators (Language Reference p96) + rule %r(\*\*|[\*/\+-]), Operator + rule /[^¬~]?=:?|[<>]=?:?/, Operator + rule /\b(eq|ne|gt|lt|ge|le|in)\b/i, Operator::Word + rule /[&|!¦¬∘~]/, Operator + rule /\b(and|or|not)\b/i, Operator::Word + rule /[<>|><]/, Operator # min/max + rule /\|\|/, Operator # concatenation + + # The OF operator should also be highlighted (Language Reference p49) + rule /\b(of)\b/i, Operator::Word + rule /\b(like)\b/i, Operator::Word # Language Ref p181 + + rule /\d+/, Num::Integer + + rule /\$/, Keyword::Type + + # Macro definitions + rule /(%macro|%mend)(\s*)([\w\d]+)/i do + groups Keyword, Text, Name::Function + end + rule /%mend/, Keyword + + rule /%\w[\w\d]*/ do |m| + if self.class.sas_macro_statements.include? m[0].upcase + token Keyword + elsif self.class.sas_macro_functions.include? m[0].upcase + token Keyword + else + token Name + end + end + end + + state :basics2 do + # Rules to be parsed after the keywords (which are different depending + # on the context) + + # Missing values (Language Reference p81) + rule /\s\.[;\s]/, Keyword::Constant # missing + rule /\s\.[a-z_]/, Name::Constant # user-defined missing + + rule /[\(\),;:\{\}\[\]\\\.]/, Punctuation + + rule /@/, Str::Symbol # line hold specifiers + rule /\?/, Str::Symbol # used for format modifiers + + rule /.*/, Text # Fallback for anything we haven't matched so far + end + + + + state :root do + mixin :basics + + # PROC definitions + rule %r((proc)(\s+)(\w+))ix do |m| + @proc_name = m[0].split(' ')[1].downcase + puts " proc name: #{@proc_name}" if @debug + if self.class.sas_proc_names.include? @proc_name.upcase + groups Keyword, Text, Keyword + else + groups Keyword, Text, Name + end + + push :proc + end + + # Data step definitions + rule /(data)(\s+)([\w\d\.]+)/i do + groups Keyword, Text, Name::Variable + end + # Libname definitions + rule /(libname)(\s+)([\w\d]+)/i do + groups Keyword, Text, Name::Variable + end + + rule /\w[\w\d]*/ do |m| + if self.class.data_step_statements.include? m[0].upcase + token Keyword + elsif self.class.sas_functions.include? m[0].upcase + token Keyword + else + token Name + end + end + + mixin :basics2 + end + + + state :single_string do + rule /''/, Str::Escape + rule /'/, Str::Single, :pop! + rule /[^\\']+/, Str::Single + end + + state :double_string do + rule /&[a-z0-9_&]+\.?/i, Str::Interpol + rule /""/, Str::Escape + rule /"/, Str::Double, :pop! + + rule /[^&"]+/, Str::Double + # Allow & to be used as character if not already matched as macro variable + rule /&/, Str::Double + end + + state :datalines do + rule /[^;]/, Literal::String::Heredoc + rule /;/, Punctuation, :pop! + end + + state :datalines4 do + rule /;{4}/, Punctuation, :pop! + rule /[^;]/, Literal::String::Heredoc + rule /;{,3}/, Literal::String::Heredoc + end + + + # PROCS + state :proc do + rule /(quit|run)/i, Keyword, :pop! + + mixin :basics + rule /\w[\w\d]*/ do |m| + if self.class.data_step_statements.include? m[0].upcase + token Keyword + elsif self.class.sas_functions.include? m[0].upcase + token Keyword + elsif PROC_KEYWORDS.has_key?(@proc_name) and PROC_KEYWORDS[@proc_name].include? m[0].upcase + token Keyword + else + token Name + end + end + + mixin :basics2 + end + + + end #class SAS + end #module Lexers +end #module Rouge diff --git a/spec/lexers/sas_spec.rb b/spec/lexers/sas_spec.rb new file mode 100644 index 0000000000..84ccf605e3 --- /dev/null +++ b/spec/lexers/sas_spec.rb @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- # + +describe Rouge::Lexers::SAS do + let(:subject) { Rouge::Lexers::SAS.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.sas' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'application/x-sas' + assert_guess :mimetype => 'application/x-stat-sas' + assert_guess :mimetype => 'application/x-sas-syntax' + end + end +end + diff --git a/spec/visual/samples/sas b/spec/visual/samples/sas new file mode 100644 index 0000000000..d53e0218f2 --- /dev/null +++ b/spec/visual/samples/sas @@ -0,0 +1,71 @@ +* Example SAS script showing syntax highlighting; +data population; +/* +The first 10 lines of 2017 population data. Source: World Bank +https://data.worldbank.org/indicator/sp.pop.totl +Licence: CC BY-4.0 + +To demonstrate reading data in-line using datalines. +*/ +infile datalines dlm = '09'x; +input + country_name: $20. + country_code: $3. + pop2017; +datalines; +Aruba ABW 105264 +Afghanistan AFG 35530081 +Angola AGO 29784193 +Albania ALB 2873457 +Andorra AND 76965 +Arab World ARB 414491886 +United Arab Emirates ARE 9400145 +Argentina ARG 44271041 +Armenia ARM 2930450 +American Samoa ASM 55641 +; +run; + +* Print first 5 records with over 100k population; +proc print data=population (obs=10); + where pop2017 > 100000; +run; + +* Simulation via a SAS data step; +data sim; + do i = 1 to 100; + x1 = rand("Normal"); + x2 = rand("Binomial", 0.5, 100); + output; + end; +run; + +* Some analysis of sample data included with SAS; +proc means data=sashelp.class; + class sex; + var height weight; + output out = mean_by_sex; +run; + +proc freq data=sashelp.cars; + tables origin; +run; + +* Fit a regression model; +proc reg data=sashelp.cars; + model msrp = enginesize horsepower mpg_city; +run; + +* Plot Fisher's Iris data; +proc sgplot data=sashelp.iris; + scatter x = SepalLength y = SepalWidth / group = Species; +run; + +/* Example macro */ +%macro my_macro(n_iter); + %do i = 1 %to &n_iter; + %put Iteration &i.; + %end; +%mend; + +%my_macro(20); From 4d9d60d9a2be8f89b44c64412d15825c2bc2ebc0 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 1 May 2019 11:58:36 +0100 Subject: [PATCH 02/12] Use Bundler 2.0+ --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 700b0cea01..0cc0a92bd3 100644 --- a/Gemfile +++ b/Gemfile @@ -4,7 +4,7 @@ source 'http://rubygems.org' gemspec -gem 'bundler', '~> 1.15' +gem 'bundler', '~> 2.0' gem 'rake' gem 'minitest', '>= 5.0' From 3191ee40b262959258188a1c4661249f0daf3520 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 12 Jun 2019 22:07:44 +0100 Subject: [PATCH 03/12] Address feedback from pull request Pass string literals to mimetypes Remove excess whitespace Add link to CC-BY licence Add examples of hex numbers to visual sample --- lib/rouge/lexers/sas.rb | 9 +-------- spec/visual/samples/sas | 10 +++++++++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index 4d01df30d3..c8b7b83fb4 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -7,11 +7,7 @@ class SAS < RegexLexer desc "SAS (Statistical Analysis Software)" tag 'sas' filenames '*.sas' - mimetypes *%w( - application/x-sas - application/x-stat-sas - application/x-sas-syntax - ) + mimetypes 'application/x-sas', 'application/x-stat-sas', 'application/x-sas-syntax' def self.data_step_statements # from Data step statements - SAS 9.4 Statements reference @@ -473,8 +469,6 @@ def self.sas_proc_names rule /.*/, Text # Fallback for anything we haven't matched so far end - - state :root do mixin :basics @@ -562,7 +556,6 @@ def self.sas_proc_names mixin :basics2 end - end #class SAS end #module Lexers end #module Rouge diff --git a/spec/visual/samples/sas b/spec/visual/samples/sas index d53e0218f2..e99d0f1aaf 100644 --- a/spec/visual/samples/sas +++ b/spec/visual/samples/sas @@ -3,7 +3,7 @@ data population; /* The first 10 lines of 2017 population data. Source: World Bank https://data.worldbank.org/indicator/sp.pop.totl -Licence: CC BY-4.0 +Used under licence: CC BY-4.0 https://creativecommons.org/licenses/by/4.0 To demonstrate reading data in-line using datalines. */ @@ -69,3 +69,11 @@ run; %mend; %my_macro(20); + +* Hexadecimal and scientific format; +data my_hex; + hex1 = 1ax; + hex2 = 0bx; *SAS requires that hex numbers start with a digit; + sci1 = 3.2e5; + sci2 = 7E-4; +run; From 01c2cc4ec3f09e7ffb2f2ce7571f6324032d052f Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 12 Jun 2019 22:43:00 +0100 Subject: [PATCH 04/12] Clarify and improve handling of lists of keywords * Use consistent naming for method and instance variable * Use a Set for each list of keywords * Wrap the proc_keywords Hash in a method --- lib/rouge/lexers/sas.rb | 390 ++++++++++++++++++++-------------------- 1 file changed, 196 insertions(+), 194 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index c8b7b83fb4..2cadfa16bc 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -12,7 +12,7 @@ class SAS < RegexLexer def self.data_step_statements # from Data step statements - SAS 9.4 Statements reference # http://support.sas.com/documentation/cdl/en/lestmtsref/68024/PDF/default/lestmtsref.pdf - @keywords ||= %w( + @data_step_statements ||= Set.new %w( ABORT ARRAY ATTRIB BY CALL CARDS CARDS4 CATNAME CHECKPOINT EXECUTE_ALWAYS CONTINUE DATA DATALINES DATALINES4 DELETE DESCRIBE DISPLAY DM DO UNTIL WHILE DROP END ENDSAS ERROR EXECUTE FILE FILENAME @@ -29,7 +29,7 @@ def self.data_step_statements def self.sas_functions # from SAS 9.4 Functions and CALL Routines reference # http://support.sas.com/documentation/cdl/en/lefunctionsref/67960/PDF/default/lefunctionsref.pdf - @sasfns ||= %w( + @sas_functions ||= Set.new %w( ABS ADDR ADDRLONG AIRY ALLCOMB ALLPERM ANYALNUM ANYALPHA ANYCNTRL ANYDIGIT ANYFIRST ANYGRAPH ANYLOWER ANYNAME ANYPRINT ANYPUNCT ANYSPACE ANYUPPER ANYXDIGIT ARCOS ARCOSH ARSIN ARSINH ARTANH ATAN @@ -94,8 +94,7 @@ def self.sas_functions def self.sas_macro_statements # from SAS 9.4 Macro Language Reference # Chapter 12 - - @macro_sts ||= %w( + @sas_macro_statements ||= Set.new %w( %COPY %DISPLAY %GLOBAL %INPUT %LET %MACRO %PUT %SYMDEL %SYSCALL %SYSEXEC %SYSLPUT %SYSMACDELETE %SYSMSTORECLEAR %SYSRPUT %WINDOW %ABORT %DO %TO %UNTIL %WHILE %END %GOTO %IF %THEN %ELSE %LOCAL @@ -111,7 +110,7 @@ def self.sas_macro_functions # from SAS 9.4 Macro Language Reference # Chapter 12 - @macro_fns ||= %w( + @sas_macro_functions ||= Set.new %w( %BQUOTE %NRBQUOTE %EVAL %INDEX %LENGTH %QUOTE %NRQUOTE %SCAN %QSCAN %STR %NRSTR %SUBSTR %QSUBSTR %SUPERQ %SYMEXIST %SYMGLOBL %SYMLOCAL %SYSEVALF %SYSFUNC %QSYSFUNC %SYSGET %SYSMACEXEC @@ -124,7 +123,7 @@ def self.sas_auto_macro_vars # from SAS 9.4 Macro Language Reference # Chapter 12 - @macro_auto_vars ||= %w( + @sas_auto_macro_vars ||= Set.new %w( &SYSADDRBITS &SYSBUFFR &SYSCC &SYSCHARWIDTH &SYSCMD &SYSDATASTEPPHASE &SYSDATE &SYSDATE9 &SYSDAY &SYSDEVIC &SYSDMG &SYSDSN &SYSENCODING &SYSENDIAN &SYSENV &SYSERR &SYSERRORTEXT &SYSFILRC &SYSHOSTINFOLONG &SYSHOSTNAME &SYSINDEX @@ -138,198 +137,201 @@ def self.sas_auto_macro_vars ) end - # Create a hash with keywords for common PROCs, keyed by PROC name - PROC_KEYWORDS = Hash.new - - PROC_KEYWORDS["sql"] = %w( - ALTER TABLE CONNECT CREATE INDEX VIEW DELETE DESCRIBE DISCONNECT DROP EXECUTE - INSERT RESET SELECT UPDATE VALIDATE ADD CONSTRAINT DROP FOREIGN KEY PRIMARY - MODIFY LIKE AS ORDER BY USING FROM INTO SET VALUES RESET DISTINCT UNIQUE - WHERE GROUP HAVING LEFT RIGHT INNER JOIN ON - ) - # from SAS 9.4 SQL Procedure User's Guide - - PROC_KEYWORDS["means"] = %w( - BY CLASS FREQ ID OUTPUT OUT TYPES VAR WAYS WEIGHT - ATTRIB FORMAT LABEL WHERE - DESCENDING NOTSORTED - NOTHREADS NOTRAP PCTLDEF SUMSIZE THREADS CLASSDATA COMPLETETYPES - EXCLUSIVE MISSING FW MAXDEC NONOBS NOPRINT ORDER FORMATTED FREQ - UNFORMATTED PRINT PRINTALLTYPES PRINTIDVARS STACKODSOUTPUT - CHARTYPE DESCENDTYPES IDMIN - ALPHA EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF - CLM CSS CV KURTOSIS KURT LCLM MAX MEAN MIN MODE N - NMISS RANGE SKEWNESS SKEW STDDEV STD STDERR SUM SUMWGT UCLM USS VAR - MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 - P80 QRANGE - PROBT PRT T - ASCENDING GROUPINTERNAL MLF PRELOADFMT - MAXID AUTOLABEL AUTONAME KEEPLEN LEVELS NOINHERIT - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["datasets"] = %w( - AGE APPEND ATTRIB AUDIT CHANGE CONTENTS COPY DELETE EXCHANGE - EXCLUDE FORMAT IC CREATE DELETE REACTIVATE INDEX CENTILES INFORMAT - INITIATE LABEL LOG MODIFY REBUILD RENAME REPAIR RESUME SAVE SELECT - SUSPEND TERMINATE USER_VAR XATTR ADD OPTIONS REMOVE SET - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["sort"] = %w( - BY DESCENDING KEY ASCENDING ASC DESC DATECOPY FORCE OVERWRITE - PRESORTED SORTSIZE TAGSORT DUPOUT OUT UNIQUEOUT NODUPKEY NOUNIQUEKEY - NOTHREADS THREADS EQUALS NOEQUALS - ATTRIB FORMAT LABEL WHERE - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["print"] = %w( - BY DESCENDING NOTSORTED PAGEBY SUMBY ID STYLE SUM VAR CONTENTS DATA - GRANDTOTAL_LABEL HEADING LABEL SPLIT SUMLABEL NOSUMLABEL - BLANKLINE COUNT DOUBLE N NOOBS OBS ROUND - ROWS UNIFORM WIDTH - ATTRIB FORMAT LABEL WHERE - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["append"] = %w( - BASE APPENDVER DATA ENCRYPTKEY FORCE GETSORT NOWARN - ATTRIB FORMAT LABEL WHERE - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["transpose"] = %w( - DELIMITER LABEL LET NAME OUT PREFIX SUFFIX BY DESCENDING NOTSORTED - COPY ID IDLABEL VAR INDB - ATTRIB FORMAT LABEL WHERE - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["freq"] = %w( - BY EXACT OUTPUT TABLES TEST WEIGHT - COMPRESS DATA FORMCHAR NLEVELS NOPRINT ORDER PAGE FORMATTED FREQ - INTERNAL - AGREE BARNARD BINOMIAL BIN CHISQ COMOR EQOR ZELEN FISHER JT KAPPA - KENTB TAUB LRCHI MCNEM MEASURES MHCHI OR ODDSRATIO PCHI PCORR RELRISK - RISKDIFF SCORR SMDCR SMDRC STUTC TAUC TREND WTKAP WTKAPPA - OUT AJCHI ALL BDCHI CMH CMH1 CMH2 CMHCOR CMHGA CMHRMS COCHQ CONTGY - CRAMV EQKAP EQWKP GAMMA GS GAILSIMON LAMCR LAMDAS LAMRC LGOR LGRRC1 - LGRRC2 MHOR MHRRC1 MHRRC2 N NMISS PHI PLCORR RDIF1 RDIF2 RISKDIFF1 - RISKDIFF2 RRC1 RELRISK1 RRC2 RELRISK2 RSK1 RISK1 RSK11 RISK11 RSK12 - RISK12 RSK21 RISK21 RSK22 RISK22 TSYMM BOWKER U UCR URC - CELLCHI2 CUMCOL DEVIATION EXPECTED MISSPRINT PEARSONREF PRINTWKTS - SCOROUT SPARSE STDRES TOTPCT - CONTENTS CROSSLIST FORMAT LIST MAXLEVELS NOCOL NOCUM NOFREQ NOPERCENT - NOPRINT NOROW NOSPARSE NOWARN PLOTS OUT OUTCUM OUTEXPECT OUTPCT - ZEROS - ) - # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition - - PROC_KEYWORDS["corr"] = %w( - BY FREQ ID PARTIAL VAR WEIGHT WITH - DATA OUTH OUTK OUTP OUTPLC OUTPLS OUTS - EXCLNPWGHT FISHER HOEFFDING KENDALL NOMISS PEARSON POLYCHORIC - POLYSERIAL ALPHA COV CSSCP SINGULAR SSCP VARDEF PLOTS MATRIX SCATTER - BEST NOCORR NOPRINT NOPROB NOSIMPLE RANK - ) - # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition - - PROC_KEYWORDS["report"] = %w( - BREAK BY DESCENDING NOTSORTED COLUMN COMPUTE STYLE LINE ENDCOMP - CALL DEFINE _ROW_ FREQ RBREAK WEIGHT - ATTRIB FORMAT LABEL WHERE - DATA NOALIAS NOCENTER NOCOMPLETECOLS NOCOMPLETEROWS NOTHREADS - NOWINDOWS OUT PCTLDEF THREADS WINDOWS COMPLETECOLS NOCOMPLETECOLS - COMPLETEROWS NOCOMPLETEROWS CONTENTS SPANROWS COMMAND HELP PROMPT - BOX BYPAGENO CENTER NOCENTER COLWIDTH FORMCHAR LS MISSING PANELS PS - PSPACE SHOWALL SPACING WRAP EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF - NAMED NOHEADER SPLIT HEADLINE HEADSKIP LIST NOEXEC OUTREPT PROFILE - REPORT - COLOR DOL DUL OL PAGE SKIP SUMMARIZE SUPPRESS UL - BLINK COMMAND HIGHLIGHT RVSVIDEO MERGE REPLACE URL URLBP URLP - AFTER BEFORE _PAGE_ LEFT RIGHT CHARACTER LENGTH - EXCLUSIVE MISSING MLF ORDER DATA FORMATTED FREQ INTERNAL PRELOADFMT - WIDTH - ACROSS ANALYSIS COMPUTED DISPLAY GROUP ORDER - CONTENTS FLOW ID NOPRINT NOZERO PAGE - CSS CV MAX MEAN MIN MODE N NMISS PCTN PCTSUM RANGE STD STDERR SUM - SUMWGT USS VAR - MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 - P80 QRANGE - PROBT PRT T - ) - # from BASE SAS 9.4 Procedures Guide, Fifth Edition - - PROC_KEYWORDS["metalib"] = %w( - OMR DBAUTH DBUSER DBPASSWORD EXCLUDE SELECT READ FOLDER FOLDERID - IMPACT_LIMIT NOEXEC PREFIX REPORT UPDATE_RULE DELETE NOADD NODELDUP - NOUPDATE - LIBID LIBRARY LIBURI - TYPE DETAIL SUMMARY - ) - # from SAS 9.4 Language Interfaces to Metadata, Third Edition - - PROC_KEYWORDS["gchart"] = %w( - DATA ANNOTATE GOUT IMAGEMAP BLOCK HBAR HBAR3D VBAR VBAR3D PIE PIE3D - DONUT STAR ANNO - BY NOTE FORMAT LABEL WHERE - BLOCKMAX CAXIS COUTLINE CTEXT LEGEND NOHEADING NOLEGEND PATTERNID - GROUP MIDPOINT SUBGROUP WOUTLINE DESCRIPTION NAME DISCRETE LEVELS - OLD MISSING HTML_LEGEND HTML URL FREQ G100 SUMVAR TYPE - CAUTOREF CERROR CFRAME CLM CREF FRAME NOFRAME GSPACE IFRAME - IMAGESTYLE TILE FIT LAUTOREF NOSYMBOL PATTERNID SHAPE SPACE - SUBOUTSIDE WAUTOREF WIDTH WOUTLINE WREF - ASCENDING AUTOREF CLIPREF DESCENDING FRONTREF GAXIS MAXIS MINOR - NOAXIS NOBASEREF NOZERO RANGE AXIS REF CFREQ CFREQLABEL NONE CPERCENT - CPERCENTLABEL ERRORBAR BARS BOTH TOP FREQLABEL INSIDE MEAN MEANLABEL - NOSTATS OUTSIDE PERCENT PERCENTLABEL PERCENTSUM SUM - CFILL COUTLINE DETAIL_RADIUS EXPLODE FILL SOLID X INVISIBLE NOHEADING - RADIUS WOUTLINE DETAIL_THRESHOLD DETAIL_PERCENT DETAIL_SLICE - DETAIL_VALUE DONUTPCT LABEL ACROSS DOWN GROUP NOGROUPHEADING SUBGROUP - MATCHCOLOR OTHERCOLOR OTHERLABEL PERCENT ARROW PLABEL PPERCENT SLICE - VALUE - ANGLE ASCENDING CLOCKWISE DESCENDING JSTYLE - NOCONNECT STARMAX STARMIN - ) - # from SAS GRAPH 9.4 Reference, Fourth Edition - - PROC_KEYWORDS["gplot"] = %w( - DATA ANNOTATE GOUT IMAGEMAP UNIFORM BUBBLE BUBBLE2 PLOT PLOT2 - BCOLOR BFILL BFONT BLABEL BSCALE AREA RADIUS BSIZE DESCRIPTION NAME - AUTOHREF CAUTOHREF CHREF HAXIS HMINOR HREF HREVERSE HZERO LAUTOHREF - LHREF WAUTOHREF WHREF HTML URL - CAXIS CFRAME CTEXT DATAORDER FRAME NOFRAME FRONTREF GRID IFRAME - IMAGESTYLE TILE FIT NOAXIS - AUTOVREF CAUTOVREF CVREF LAUTOVREF LVREF VAXIS VMINOR VREF VREVERSE - VZERO WAUTOVREF WVREF - CBASELINE COUTLINE - AREAS GRID LEGEND NOLASTAREA NOLEGEND OVERLAY REGEQN SKIPMISS - ) - # from SAS GRAPH 9.4 Reference, Fourth Edition - - PROC_KEYWORDS["reg"] = %w( - MODEL BY FREQ ID VAR WEIGHT ADD CODE DELETE MTEST OUTPUT PAINT - PLOT PRINT REFIT RESTRICT REWEIGHT STORE TEST - ) - # from SAS/STAT 15.1 User's Guide - - PROC_KEYWORDS["sgplot"] = %w( - STYLEATTRS BAND X Y UPPER LOWER BLOCK BUBBLE DENSITY DOT DROPLINE - ELLIPSE ELLIPSEPARM FRINGE GRADLEGEND HBAR HBARBASIC HBARPARM - HBOX HEATMAP HEATMAPPARM HIGHLOW HISTOGRAM HLINE INSET KEYLEGEND - LINEPARM LOESS NEEDLE PBSPLINE POLYGON REFLINE REG SCATTER SERIES - SPLINE STEP SYMBOLCHAR SYMBOLIMAGE TEXT VBAR VBARBASIC VBARPARM - VBOX VECTOR VLINE WATERFALL XAXIS X2AXIS XAXISTABLE YAXIS Y2AXIS - YAXISTABLE - ) - # from ODS Graphics: Procedures Guide, Sixth Edition + def self.proc_keywords + # Create a hash with keywords for common PROCs, keyed by PROC name + @proc_keywords = Hash.new + + @proc_keywords["sql"] ||= Set.new %w( + ALTER TABLE CONNECT CREATE INDEX VIEW DELETE DESCRIBE DISCONNECT DROP EXECUTE + INSERT RESET SELECT UPDATE VALIDATE ADD CONSTRAINT DROP FOREIGN KEY PRIMARY + MODIFY LIKE AS ORDER BY USING FROM INTO SET VALUES RESET DISTINCT UNIQUE + WHERE GROUP HAVING LEFT RIGHT INNER JOIN ON + ) + # from SAS 9.4 SQL Procedure User's Guide + + @proc_keywords["means"] ||= Set.new %w( + BY CLASS FREQ ID OUTPUT OUT TYPES VAR WAYS WEIGHT + ATTRIB FORMAT LABEL WHERE + DESCENDING NOTSORTED + NOTHREADS NOTRAP PCTLDEF SUMSIZE THREADS CLASSDATA COMPLETETYPES + EXCLUSIVE MISSING FW MAXDEC NONOBS NOPRINT ORDER FORMATTED FREQ + UNFORMATTED PRINT PRINTALLTYPES PRINTIDVARS STACKODSOUTPUT + CHARTYPE DESCENDTYPES IDMIN + ALPHA EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF + CLM CSS CV KURTOSIS KURT LCLM MAX MEAN MIN MODE N + NMISS RANGE SKEWNESS SKEW STDDEV STD STDERR SUM SUMWGT UCLM USS VAR + MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 + P80 QRANGE + PROBT PRT T + ASCENDING GROUPINTERNAL MLF PRELOADFMT + MAXID AUTOLABEL AUTONAME KEEPLEN LEVELS NOINHERIT + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["datasets"] ||= Set.new %w( + AGE APPEND ATTRIB AUDIT CHANGE CONTENTS COPY DELETE EXCHANGE + EXCLUDE FORMAT IC CREATE DELETE REACTIVATE INDEX CENTILES INFORMAT + INITIATE LABEL LOG MODIFY REBUILD RENAME REPAIR RESUME SAVE SELECT + SUSPEND TERMINATE USER_VAR XATTR ADD OPTIONS REMOVE SET + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["sort"] ||= Set.new %w( + BY DESCENDING KEY ASCENDING ASC DESC DATECOPY FORCE OVERWRITE + PRESORTED SORTSIZE TAGSORT DUPOUT OUT UNIQUEOUT NODUPKEY NOUNIQUEKEY + NOTHREADS THREADS EQUALS NOEQUALS + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["print"] ||= Set.new %w( + BY DESCENDING NOTSORTED PAGEBY SUMBY ID STYLE SUM VAR CONTENTS DATA + GRANDTOTAL_LABEL HEADING LABEL SPLIT SUMLABEL NOSUMLABEL + BLANKLINE COUNT DOUBLE N NOOBS OBS ROUND + ROWS UNIFORM WIDTH + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["append"] ||= Set.new %w( + BASE APPENDVER DATA ENCRYPTKEY FORCE GETSORT NOWARN + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["transpose"] ||= Set.new %w( + DELIMITER LABEL LET NAME OUT PREFIX SUFFIX BY DESCENDING NOTSORTED + COPY ID IDLABEL VAR INDB + ATTRIB FORMAT LABEL WHERE + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["freq"] ||= Set.new %w( + BY EXACT OUTPUT TABLES TEST WEIGHT + COMPRESS DATA FORMCHAR NLEVELS NOPRINT ORDER PAGE FORMATTED FREQ + INTERNAL + AGREE BARNARD BINOMIAL BIN CHISQ COMOR EQOR ZELEN FISHER JT KAPPA + KENTB TAUB LRCHI MCNEM MEASURES MHCHI OR ODDSRATIO PCHI PCORR RELRISK + RISKDIFF SCORR SMDCR SMDRC STUTC TAUC TREND WTKAP WTKAPPA + OUT AJCHI ALL BDCHI CMH CMH1 CMH2 CMHCOR CMHGA CMHRMS COCHQ CONTGY + CRAMV EQKAP EQWKP GAMMA GS GAILSIMON LAMCR LAMDAS LAMRC LGOR LGRRC1 + LGRRC2 MHOR MHRRC1 MHRRC2 N NMISS PHI PLCORR RDIF1 RDIF2 RISKDIFF1 + RISKDIFF2 RRC1 RELRISK1 RRC2 RELRISK2 RSK1 RISK1 RSK11 RISK11 RSK12 + RISK12 RSK21 RISK21 RSK22 RISK22 TSYMM BOWKER U UCR URC + CELLCHI2 CUMCOL DEVIATION EXPECTED MISSPRINT PEARSONREF PRINTWKTS + SCOROUT SPARSE STDRES TOTPCT + CONTENTS CROSSLIST FORMAT LIST MAXLEVELS NOCOL NOCUM NOFREQ NOPERCENT + NOPRINT NOROW NOSPARSE NOWARN PLOTS OUT OUTCUM OUTEXPECT OUTPCT + ZEROS + ) + # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition + + @proc_keywords["corr"] ||= Set.new %w( + BY FREQ ID PARTIAL VAR WEIGHT WITH + DATA OUTH OUTK OUTP OUTPLC OUTPLS OUTS + EXCLNPWGHT FISHER HOEFFDING KENDALL NOMISS PEARSON POLYCHORIC + POLYSERIAL ALPHA COV CSSCP SINGULAR SSCP VARDEF PLOTS MATRIX SCATTER + BEST NOCORR NOPRINT NOPROB NOSIMPLE RANK + ) + # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition + + @proc_keywords["report"] ||= Set.new %w( + BREAK BY DESCENDING NOTSORTED COLUMN COMPUTE STYLE LINE ENDCOMP + CALL DEFINE _ROW_ FREQ RBREAK WEIGHT + ATTRIB FORMAT LABEL WHERE + DATA NOALIAS NOCENTER NOCOMPLETECOLS NOCOMPLETEROWS NOTHREADS + NOWINDOWS OUT PCTLDEF THREADS WINDOWS COMPLETECOLS NOCOMPLETECOLS + COMPLETEROWS NOCOMPLETEROWS CONTENTS SPANROWS COMMAND HELP PROMPT + BOX BYPAGENO CENTER NOCENTER COLWIDTH FORMCHAR LS MISSING PANELS PS + PSPACE SHOWALL SPACING WRAP EXCLNPWGT QMARKERS QMETHOD QNTLDEF VARDEF + NAMED NOHEADER SPLIT HEADLINE HEADSKIP LIST NOEXEC OUTREPT PROFILE + REPORT + COLOR DOL DUL OL PAGE SKIP SUMMARIZE SUPPRESS UL + BLINK COMMAND HIGHLIGHT RVSVIDEO MERGE REPLACE URL URLBP URLP + AFTER BEFORE _PAGE_ LEFT RIGHT CHARACTER LENGTH + EXCLUSIVE MISSING MLF ORDER DATA FORMATTED FREQ INTERNAL PRELOADFMT + WIDTH + ACROSS ANALYSIS COMPUTED DISPLAY GROUP ORDER + CONTENTS FLOW ID NOPRINT NOZERO PAGE + CSS CV MAX MEAN MIN MODE N NMISS PCTN PCTSUM RANGE STD STDERR SUM + SUMWGT USS VAR + MEDIAN P50 Q1 P25 Q3 P75 P1 P90 P5 P95 P10 P99 P20 P30 P40 P60 P70 + P80 QRANGE + PROBT PRT T + ) + # from BASE SAS 9.4 Procedures Guide, Fifth Edition + + @proc_keywords["metalib"] ||= Set.new %w( + OMR DBAUTH DBUSER DBPASSWORD EXCLUDE SELECT READ FOLDER FOLDERID + IMPACT_LIMIT NOEXEC PREFIX REPORT UPDATE_RULE DELETE NOADD NODELDUP + NOUPDATE + LIBID LIBRARY LIBURI + TYPE DETAIL SUMMARY + ) + # from SAS 9.4 Language Interfaces to Metadata, Third Edition + + @proc_keywords["gchart"] ||= Set.new %w( + DATA ANNOTATE GOUT IMAGEMAP BLOCK HBAR HBAR3D VBAR VBAR3D PIE PIE3D + DONUT STAR ANNO + BY NOTE FORMAT LABEL WHERE + BLOCKMAX CAXIS COUTLINE CTEXT LEGEND NOHEADING NOLEGEND PATTERNID + GROUP MIDPOINT SUBGROUP WOUTLINE DESCRIPTION NAME DISCRETE LEVELS + OLD MISSING HTML_LEGEND HTML URL FREQ G100 SUMVAR TYPE + CAUTOREF CERROR CFRAME CLM CREF FRAME NOFRAME GSPACE IFRAME + IMAGESTYLE TILE FIT LAUTOREF NOSYMBOL PATTERNID SHAPE SPACE + SUBOUTSIDE WAUTOREF WIDTH WOUTLINE WREF + ASCENDING AUTOREF CLIPREF DESCENDING FRONTREF GAXIS MAXIS MINOR + NOAXIS NOBASEREF NOZERO RANGE AXIS REF CFREQ CFREQLABEL NONE CPERCENT + CPERCENTLABEL ERRORBAR BARS BOTH TOP FREQLABEL INSIDE MEAN MEANLABEL + NOSTATS OUTSIDE PERCENT PERCENTLABEL PERCENTSUM SUM + CFILL COUTLINE DETAIL_RADIUS EXPLODE FILL SOLID X INVISIBLE NOHEADING + RADIUS WOUTLINE DETAIL_THRESHOLD DETAIL_PERCENT DETAIL_SLICE + DETAIL_VALUE DONUTPCT LABEL ACROSS DOWN GROUP NOGROUPHEADING SUBGROUP + MATCHCOLOR OTHERCOLOR OTHERLABEL PERCENT ARROW PLABEL PPERCENT SLICE + VALUE + ANGLE ASCENDING CLOCKWISE DESCENDING JSTYLE + NOCONNECT STARMAX STARMIN + ) + # from SAS GRAPH 9.4 Reference, Fourth Edition + + @proc_keywords["gplot"] ||= Set.new %w( + DATA ANNOTATE GOUT IMAGEMAP UNIFORM BUBBLE BUBBLE2 PLOT PLOT2 + BCOLOR BFILL BFONT BLABEL BSCALE AREA RADIUS BSIZE DESCRIPTION NAME + AUTOHREF CAUTOHREF CHREF HAXIS HMINOR HREF HREVERSE HZERO LAUTOHREF + LHREF WAUTOHREF WHREF HTML URL + CAXIS CFRAME CTEXT DATAORDER FRAME NOFRAME FRONTREF GRID IFRAME + IMAGESTYLE TILE FIT NOAXIS + AUTOVREF CAUTOVREF CVREF LAUTOVREF LVREF VAXIS VMINOR VREF VREVERSE + VZERO WAUTOVREF WVREF + CBASELINE COUTLINE + AREAS GRID LEGEND NOLASTAREA NOLEGEND OVERLAY REGEQN SKIPMISS + ) + # from SAS GRAPH 9.4 Reference, Fourth Edition + + @proc_keywords["reg"] ||= Set.new %w( + MODEL BY FREQ ID VAR WEIGHT ADD CODE DELETE MTEST OUTPUT PAINT + PLOT PRINT REFIT RESTRICT REWEIGHT STORE TEST + ) + # from SAS/STAT 15.1 User's Guide + + @proc_keywords["sgplot"] ||= Set.new %w( + STYLEATTRS BAND X Y UPPER LOWER BLOCK BUBBLE DENSITY DOT DROPLINE + ELLIPSE ELLIPSEPARM FRINGE GRADLEGEND HBAR HBARBASIC HBARPARM + HBOX HEATMAP HEATMAPPARM HIGHLOW HISTOGRAM HLINE INSET KEYLEGEND + LINEPARM LOESS NEEDLE PBSPLINE POLYGON REFLINE REG SCATTER SERIES + SPLINE STEP SYMBOLCHAR SYMBOLIMAGE TEXT VBAR VBARBASIC VBARPARM + VBOX VECTOR VLINE WATERFALL XAXIS X2AXIS XAXISTABLE YAXIS Y2AXIS + YAXISTABLE + ) + # from ODS Graphics: Procedures Guide, Sixth Edition + return @proc_keywords + end def self.sas_proc_names # from SAS Procedures by Name # http://support.sas.com/documentation/cdl/en/allprodsproc/68038/HTML/default/viewer.htm#procedures.htm - @proc_names ||= %w( + @sas_proc_names ||= Set.new %w( ACCESS ACECLUS ADAPTIVEREG ALLELE ANOM ANOVA APPEND APPSRV ARIMA AUTHLIB AUTOREG BCHOICE BOM BOXPLOT BTL BUILD CALENDAR CALIS CALLRFC CANCORR CANDISC CAPABILITY CASECONTROL CATALOG CATMOD CDISC CDISC @@ -546,7 +548,7 @@ def self.sas_proc_names token Keyword elsif self.class.sas_functions.include? m[0].upcase token Keyword - elsif PROC_KEYWORDS.has_key?(@proc_name) and PROC_KEYWORDS[@proc_name].include? m[0].upcase + elsif self.class.proc_keywords.has_key?(@proc_name) and self.class.proc_keywords[@proc_name].include? m[0].upcase token Keyword else token Name From 004c8453247957e0b7dd4d5326187eec340b7d95 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Fri, 5 Jul 2019 11:16:29 +0100 Subject: [PATCH 05/12] Only do fallback matching to end of word --- lib/rouge/lexers/sas.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index 2cadfa16bc..169b8221cd 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -468,7 +468,7 @@ def self.sas_proc_names rule /@/, Str::Symbol # line hold specifiers rule /\?/, Str::Symbol # used for format modifiers - rule /.*/, Text # Fallback for anything we haven't matched so far + rule /[^\s]+/, Text # Fallback for anything we haven't matched so far end state :root do From d15314166644805e0684d21be4372b53e9c3371b Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Fri, 5 Jul 2019 11:16:58 +0100 Subject: [PATCH 06/12] Backslash doesn't escape single quotes in SAS --- lib/rouge/lexers/sas.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index 169b8221cd..a0601fbbd2 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -513,7 +513,7 @@ def self.sas_proc_names state :single_string do rule /''/, Str::Escape rule /'/, Str::Single, :pop! - rule /[^\\']+/, Str::Single + rule /[^']+/, Str::Single end state :double_string do From ac2f4bd053e6ce89d97485fd4cf34e807193bbd4 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Fri, 5 Jul 2019 13:57:12 +0100 Subject: [PATCH 07/12] Add newline to end of demos/sas --- lib/rouge/demos/sas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/demos/sas b/lib/rouge/demos/sas index 24ac5aca0c..1eee876ed8 100644 --- a/lib/rouge/demos/sas +++ b/lib/rouge/demos/sas @@ -10,4 +10,4 @@ proc means data=sashelp.class; class sex; var height weight; output out = mean_by_sex; -run; \ No newline at end of file +run; From 3903eb5aef3117d3b432f6c38ef4897d17dbb86e Mon Sep 17 00:00:00 2001 From: tomsutch Date: Fri, 5 Jul 2019 14:10:08 +0100 Subject: [PATCH 08/12] Use ||= for proc_keywords Hash Co-Authored-By: Ashwin Maroli --- lib/rouge/lexers/sas.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index a0601fbbd2..eaf38b683d 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -139,7 +139,7 @@ def self.sas_auto_macro_vars def self.proc_keywords # Create a hash with keywords for common PROCs, keyed by PROC name - @proc_keywords = Hash.new + @proc_keywords ||= {} @proc_keywords["sql"] ||= Set.new %w( ALTER TABLE CONNECT CREATE INDEX VIEW DELETE DESCRIBE DISCONNECT DROP EXECUTE From b1f3ae3e67387bc6750aa1eac34228c3a6dd5d04 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Fri, 5 Jul 2019 14:21:21 +0100 Subject: [PATCH 09/12] Remove redundancy in matching character classes --- lib/rouge/lexers/sas.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index eaf38b683d..1bacecabff 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -439,12 +439,12 @@ def self.sas_proc_names rule /\$/, Keyword::Type # Macro definitions - rule /(%macro|%mend)(\s*)([\w\d]+)/i do + rule /(%macro|%mend)(\s*)(\w+)/i do groups Keyword, Text, Name::Function end rule /%mend/, Keyword - rule /%\w[\w\d]*/ do |m| + rule /%\w+/ do |m| if self.class.sas_macro_statements.include? m[0].upcase token Keyword elsif self.class.sas_macro_functions.include? m[0].upcase @@ -488,15 +488,15 @@ def self.sas_proc_names end # Data step definitions - rule /(data)(\s+)([\w\d\.]+)/i do + rule /(data)(\s+)([\w\.]+)/i do groups Keyword, Text, Name::Variable end # Libname definitions - rule /(libname)(\s+)([\w\d]+)/i do + rule /(libname)(\s+)(\w+)/i do groups Keyword, Text, Name::Variable end - rule /\w[\w\d]*/ do |m| + rule /\w+/ do |m| if self.class.data_step_statements.include? m[0].upcase token Keyword elsif self.class.sas_functions.include? m[0].upcase @@ -543,7 +543,7 @@ def self.sas_proc_names rule /(quit|run)/i, Keyword, :pop! mixin :basics - rule /\w[\w\d]*/ do |m| + rule /\w+/ do |m| if self.class.data_step_statements.include? m[0].upcase token Keyword elsif self.class.sas_functions.include? m[0].upcase From f0a0871123d7062290d9734d26a0fd66c6fd4fc1 Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 17 Jul 2019 09:52:42 +0100 Subject: [PATCH 10/12] Detect proc name more efficiently No need to re-split text as we are capturing terms separately Use upper-case keys in proc_keywords for simplicity --- lib/rouge/lexers/sas.rb | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index 1bacecabff..a807f8e285 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -141,7 +141,7 @@ def self.proc_keywords # Create a hash with keywords for common PROCs, keyed by PROC name @proc_keywords ||= {} - @proc_keywords["sql"] ||= Set.new %w( + @proc_keywords["SQL"] ||= Set.new %w( ALTER TABLE CONNECT CREATE INDEX VIEW DELETE DESCRIBE DISCONNECT DROP EXECUTE INSERT RESET SELECT UPDATE VALIDATE ADD CONSTRAINT DROP FOREIGN KEY PRIMARY MODIFY LIKE AS ORDER BY USING FROM INTO SET VALUES RESET DISTINCT UNIQUE @@ -149,7 +149,7 @@ def self.proc_keywords ) # from SAS 9.4 SQL Procedure User's Guide - @proc_keywords["means"] ||= Set.new %w( + @proc_keywords["MEANS"] ||= Set.new %w( BY CLASS FREQ ID OUTPUT OUT TYPES VAR WAYS WEIGHT ATTRIB FORMAT LABEL WHERE DESCENDING NOTSORTED @@ -168,7 +168,7 @@ def self.proc_keywords ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["datasets"] ||= Set.new %w( + @proc_keywords["DATASETS"] ||= Set.new %w( AGE APPEND ATTRIB AUDIT CHANGE CONTENTS COPY DELETE EXCHANGE EXCLUDE FORMAT IC CREATE DELETE REACTIVATE INDEX CENTILES INFORMAT INITIATE LABEL LOG MODIFY REBUILD RENAME REPAIR RESUME SAVE SELECT @@ -176,7 +176,7 @@ def self.proc_keywords ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["sort"] ||= Set.new %w( + @proc_keywords["SORT"] ||= Set.new %w( BY DESCENDING KEY ASCENDING ASC DESC DATECOPY FORCE OVERWRITE PRESORTED SORTSIZE TAGSORT DUPOUT OUT UNIQUEOUT NODUPKEY NOUNIQUEKEY NOTHREADS THREADS EQUALS NOEQUALS @@ -184,7 +184,7 @@ def self.proc_keywords ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["print"] ||= Set.new %w( + @proc_keywords["PRINT"] ||= Set.new %w( BY DESCENDING NOTSORTED PAGEBY SUMBY ID STYLE SUM VAR CONTENTS DATA GRANDTOTAL_LABEL HEADING LABEL SPLIT SUMLABEL NOSUMLABEL BLANKLINE COUNT DOUBLE N NOOBS OBS ROUND @@ -193,20 +193,20 @@ def self.proc_keywords ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["append"] ||= Set.new %w( + @proc_keywords["APPEND"] ||= Set.new %w( BASE APPENDVER DATA ENCRYPTKEY FORCE GETSORT NOWARN ATTRIB FORMAT LABEL WHERE ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["transpose"] ||= Set.new %w( + @proc_keywords["TRANSPOSE"] ||= Set.new %w( DELIMITER LABEL LET NAME OUT PREFIX SUFFIX BY DESCENDING NOTSORTED COPY ID IDLABEL VAR INDB ATTRIB FORMAT LABEL WHERE ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["freq"] ||= Set.new %w( + @proc_keywords["FREQ"] ||= Set.new %w( BY EXACT OUTPUT TABLES TEST WEIGHT COMPRESS DATA FORMCHAR NLEVELS NOPRINT ORDER PAGE FORMATTED FREQ INTERNAL @@ -226,7 +226,7 @@ def self.proc_keywords ) # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition - @proc_keywords["corr"] ||= Set.new %w( + @proc_keywords["CORR"] ||= Set.new %w( BY FREQ ID PARTIAL VAR WEIGHT WITH DATA OUTH OUTK OUTP OUTPLC OUTPLS OUTS EXCLNPWGHT FISHER HOEFFDING KENDALL NOMISS PEARSON POLYCHORIC @@ -235,7 +235,7 @@ def self.proc_keywords ) # from Base SAS 9.4 Procedures Guide: Statistical Procedures, Fourth Edition - @proc_keywords["report"] ||= Set.new %w( + @proc_keywords["REPORT"] ||= Set.new %w( BREAK BY DESCENDING NOTSORTED COLUMN COMPUTE STYLE LINE ENDCOMP CALL DEFINE _ROW_ FREQ RBREAK WEIGHT ATTRIB FORMAT LABEL WHERE @@ -261,7 +261,7 @@ def self.proc_keywords ) # from BASE SAS 9.4 Procedures Guide, Fifth Edition - @proc_keywords["metalib"] ||= Set.new %w( + @proc_keywords["METALIB"] ||= Set.new %w( OMR DBAUTH DBUSER DBPASSWORD EXCLUDE SELECT READ FOLDER FOLDERID IMPACT_LIMIT NOEXEC PREFIX REPORT UPDATE_RULE DELETE NOADD NODELDUP NOUPDATE @@ -270,7 +270,7 @@ def self.proc_keywords ) # from SAS 9.4 Language Interfaces to Metadata, Third Edition - @proc_keywords["gchart"] ||= Set.new %w( + @proc_keywords["GCHART"] ||= Set.new %w( DATA ANNOTATE GOUT IMAGEMAP BLOCK HBAR HBAR3D VBAR VBAR3D PIE PIE3D DONUT STAR ANNO BY NOTE FORMAT LABEL WHERE @@ -294,7 +294,7 @@ def self.proc_keywords ) # from SAS GRAPH 9.4 Reference, Fourth Edition - @proc_keywords["gplot"] ||= Set.new %w( + @proc_keywords["GPLOT"] ||= Set.new %w( DATA ANNOTATE GOUT IMAGEMAP UNIFORM BUBBLE BUBBLE2 PLOT PLOT2 BCOLOR BFILL BFONT BLABEL BSCALE AREA RADIUS BSIZE DESCRIPTION NAME AUTOHREF CAUTOHREF CHREF HAXIS HMINOR HREF HREVERSE HZERO LAUTOHREF @@ -308,13 +308,13 @@ def self.proc_keywords ) # from SAS GRAPH 9.4 Reference, Fourth Edition - @proc_keywords["reg"] ||= Set.new %w( + @proc_keywords["REG"] ||= Set.new %w( MODEL BY FREQ ID VAR WEIGHT ADD CODE DELETE MTEST OUTPUT PAINT PLOT PRINT REFIT RESTRICT REWEIGHT STORE TEST ) # from SAS/STAT 15.1 User's Guide - @proc_keywords["sgplot"] ||= Set.new %w( + @proc_keywords["SGPLOT"] ||= Set.new %w( STYLEATTRS BAND X Y UPPER LOWER BLOCK BUBBLE DENSITY DOT DROPLINE ELLIPSE ELLIPSEPARM FRINGE GRADLEGEND HBAR HBARBASIC HBARPARM HBOX HEATMAP HEATMAPPARM HIGHLOW HISTOGRAM HLINE INSET KEYLEGEND @@ -476,9 +476,9 @@ def self.sas_proc_names # PROC definitions rule %r((proc)(\s+)(\w+))ix do |m| - @proc_name = m[0].split(' ')[1].downcase + @proc_name = m[3].upcase puts " proc name: #{@proc_name}" if @debug - if self.class.sas_proc_names.include? @proc_name.upcase + if self.class.sas_proc_names.include? @proc_name groups Keyword, Text, Keyword else groups Keyword, Text, Name From 10d2c0a60ad7f2e7c1fa10d9188953dd1114118c Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 17 Jul 2019 09:56:39 +0100 Subject: [PATCH 11/12] Replace regex delimiters to prevent errors Co-Authored-By: Ashwin Maroli --- lib/rouge/lexers/sas.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index a807f8e285..b7ca5118dc 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -427,7 +427,7 @@ def self.sas_proc_names rule /\b(eq|ne|gt|lt|ge|le|in)\b/i, Operator::Word rule /[&|!¦¬∘~]/, Operator rule /\b(and|or|not)\b/i, Operator::Word - rule /[<>|><]/, Operator # min/max + rule /(<>|><)/, Operator # min/max rule /\|\|/, Operator # concatenation # The OF operator should also be highlighted (Language Reference p49) @@ -475,7 +475,7 @@ def self.sas_proc_names mixin :basics # PROC definitions - rule %r((proc)(\s+)(\w+))ix do |m| + rule %r!(proc)(\s+)(\w+)!ix do |m| @proc_name = m[3].upcase puts " proc name: #{@proc_name}" if @debug if self.class.sas_proc_names.include? @proc_name From 2faee3becc6ff73b35b0a472dc34dec33becd4db Mon Sep 17 00:00:00 2001 From: Tom Sutch Date: Wed, 17 Jul 2019 10:38:29 +0100 Subject: [PATCH 12/12] Resolve Rubocop AmbiguousRegexpLiteral offenses --- lib/rouge/lexers/sas.rb | 104 ++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/lib/rouge/lexers/sas.rb b/lib/rouge/lexers/sas.rb index b7ca5118dc..6095854d63 100644 --- a/lib/rouge/lexers/sas.rb +++ b/lib/rouge/lexers/sas.rb @@ -378,13 +378,13 @@ def self.sas_proc_names # Rules to be parsed before the keywords (which are different depending # on the context) - rule /\s+/m, Text + rule %r/\s+/m, Text # Single-line comments (between * and ;) - these can actually go onto multiple lines # case 1 - where it starts a line - rule /^\s*%?\*[^;]*;/m, Comment::Single + rule %r/^\s*%?\*[^;]*;/m, Comment::Single # case 2 - where it follows the previous statement on the line (after a semicolon) - rule /(;)(\s*)(%?\*[^;]*;)/m do + rule %r/(;)(\s*)(%?\*[^;]*;)/m do groups Punctuation, Text, Comment::Single end @@ -392,30 +392,30 @@ def self.sas_proc_names rule %r(/[*].*?[*]/)m, Comment::Multiline # date/time constants (Language Reference pp91-2) - rule /'[0-9a-z]+?'d/i, Literal::Date - rule /'.+?'dt/i, Literal::Date - rule /'[0-9:]+?([a|p]m)?'t/i, Literal::Date + rule %r/'[0-9a-z]+?'d/i, Literal::Date + rule %r/'.+?'dt/i, Literal::Date + rule %r/'[0-9:]+?([a|p]m)?'t/i, Literal::Date - rule /'/, Str::Single, :single_string - rule /"/, Str::Double, :double_string - rule /&[a-z0-9_&.]+/i, Name::Variable + rule %r/'/, Str::Single, :single_string + rule %r/"/, Str::Double, :double_string + rule %r/&[a-z0-9_&.]+/i, Name::Variable # numeric constants (Language Reference p91) - rule /\d[0-9a-f]*x/i, Num::Hex - rule /\d[0-9e\-.]+/i, Num # scientific notation + rule %r/\d[0-9a-f]*x/i, Num::Hex + rule %r/\d[0-9e\-.]+/i, Num # scientific notation # auto variables from DATA step (Language Reference p46, p37) - rule /\b(_n_|_error_|_file_|_infile_|_msg_|_iorc_|_cmd_)\b/i, Name::Builtin::Pseudo + rule %r/\b(_n_|_error_|_file_|_infile_|_msg_|_iorc_|_cmd_)\b/i, Name::Builtin::Pseudo # auto variable list names - rule /\b(_character_|_numeric_|_all_)\b/i, Name::Builtin + rule %r/\b(_character_|_numeric_|_all_)\b/i, Name::Builtin # datalines/cards etc - rule /\b(datalines|cards)(\s*)(;)/i do + rule %r/\b(datalines|cards)(\s*)(;)/i do groups Keyword, Text, Punctuation push :datalines end - rule /\b(datalines4|cards4)(\s*)(;)/i do + rule %r/\b(datalines4|cards4)(\s*)(;)/i do groups Keyword, Text, Punctuation push :datalines4 end @@ -423,28 +423,28 @@ def self.sas_proc_names # operators (Language Reference p96) rule %r(\*\*|[\*/\+-]), Operator - rule /[^¬~]?=:?|[<>]=?:?/, Operator - rule /\b(eq|ne|gt|lt|ge|le|in)\b/i, Operator::Word - rule /[&|!¦¬∘~]/, Operator - rule /\b(and|or|not)\b/i, Operator::Word - rule /(<>|><)/, Operator # min/max - rule /\|\|/, Operator # concatenation + rule %r/[^¬~]?=:?|[<>]=?:?/, Operator + rule %r/\b(eq|ne|gt|lt|ge|le|in)\b/i, Operator::Word + rule %r/[&|!¦¬∘~]/, Operator + rule %r/\b(and|or|not)\b/i, Operator::Word + rule %r/(<>|><)/, Operator # min/max + rule %r/\|\|/, Operator # concatenation # The OF operator should also be highlighted (Language Reference p49) - rule /\b(of)\b/i, Operator::Word - rule /\b(like)\b/i, Operator::Word # Language Ref p181 + rule %r/\b(of)\b/i, Operator::Word + rule %r/\b(like)\b/i, Operator::Word # Language Ref p181 - rule /\d+/, Num::Integer + rule %r/\d+/, Num::Integer - rule /\$/, Keyword::Type + rule %r/\$/, Keyword::Type # Macro definitions - rule /(%macro|%mend)(\s*)(\w+)/i do + rule %r/(%macro|%mend)(\s*)(\w+)/i do groups Keyword, Text, Name::Function end - rule /%mend/, Keyword + rule %r/%mend/, Keyword - rule /%\w+/ do |m| + rule %r/%\w+/ do |m| if self.class.sas_macro_statements.include? m[0].upcase token Keyword elsif self.class.sas_macro_functions.include? m[0].upcase @@ -460,15 +460,15 @@ def self.sas_proc_names # on the context) # Missing values (Language Reference p81) - rule /\s\.[;\s]/, Keyword::Constant # missing - rule /\s\.[a-z_]/, Name::Constant # user-defined missing + rule %r/\s\.[;\s]/, Keyword::Constant # missing + rule %r/\s\.[a-z_]/, Name::Constant # user-defined missing - rule /[\(\),;:\{\}\[\]\\\.]/, Punctuation + rule %r/[\(\),;:\{\}\[\]\\\.]/, Punctuation - rule /@/, Str::Symbol # line hold specifiers - rule /\?/, Str::Symbol # used for format modifiers + rule %r/@/, Str::Symbol # line hold specifiers + rule %r/\?/, Str::Symbol # used for format modifiers - rule /[^\s]+/, Text # Fallback for anything we haven't matched so far + rule %r/[^\s]+/, Text # Fallback for anything we haven't matched so far end state :root do @@ -488,15 +488,15 @@ def self.sas_proc_names end # Data step definitions - rule /(data)(\s+)([\w\.]+)/i do + rule %r/(data)(\s+)([\w\.]+)/i do groups Keyword, Text, Name::Variable end # Libname definitions - rule /(libname)(\s+)(\w+)/i do + rule %r/(libname)(\s+)(\w+)/i do groups Keyword, Text, Name::Variable end - rule /\w+/ do |m| + rule %r/\w+/ do |m| if self.class.data_step_statements.include? m[0].upcase token Keyword elsif self.class.sas_functions.include? m[0].upcase @@ -511,39 +511,39 @@ def self.sas_proc_names state :single_string do - rule /''/, Str::Escape - rule /'/, Str::Single, :pop! - rule /[^']+/, Str::Single + rule %r/''/, Str::Escape + rule %r/'/, Str::Single, :pop! + rule %r/[^']+/, Str::Single end state :double_string do - rule /&[a-z0-9_&]+\.?/i, Str::Interpol - rule /""/, Str::Escape - rule /"/, Str::Double, :pop! + rule %r/&[a-z0-9_&]+\.?/i, Str::Interpol + rule %r/""/, Str::Escape + rule %r/"/, Str::Double, :pop! - rule /[^&"]+/, Str::Double + rule %r/[^&"]+/, Str::Double # Allow & to be used as character if not already matched as macro variable - rule /&/, Str::Double + rule %r/&/, Str::Double end state :datalines do - rule /[^;]/, Literal::String::Heredoc - rule /;/, Punctuation, :pop! + rule %r/[^;]/, Literal::String::Heredoc + rule %r/;/, Punctuation, :pop! end state :datalines4 do - rule /;{4}/, Punctuation, :pop! - rule /[^;]/, Literal::String::Heredoc - rule /;{,3}/, Literal::String::Heredoc + rule %r/;{4}/, Punctuation, :pop! + rule %r/[^;]/, Literal::String::Heredoc + rule %r/;{,3}/, Literal::String::Heredoc end # PROCS state :proc do - rule /(quit|run)/i, Keyword, :pop! + rule %r/(quit|run)/i, Keyword, :pop! mixin :basics - rule /\w+/ do |m| + rule %r/\w+/ do |m| if self.class.data_step_statements.include? m[0].upcase token Keyword elsif self.class.sas_functions.include? m[0].upcase