summaryrefslogtreecommitdiff
path: root/apps/codestyle/codestyle-sql.py
blob: 7d7511323c885e091962fd0c7edf67bc3133b539 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import io
import os
import sys
import re
import glob

# Get the pending directory of the project
base_dir = os.getcwd()
pattern = os.path.join(base_dir, 'data/sql/updates/pending_db_*')
src_directory = glob.glob(pattern)

# Global variables
error_handler = False
results = {
    "Multiple blank lines check": "Passed",
    "Trailing whitespace check": "Passed",
    "SQL codestyle check": "Passed",
    "INSERT & DELETE safety usage check": "Passed",
    "Missing semicolon check": "Passed",
    "Backtick check": "Passed"
}

# Collect all files in all directories
def collect_files_from_directories(directories: list) -> list:
    all_files = []
    for directory in directories:
        for root, _, files in os.walk(directory):
            for file in files:
                if not file.endswith('.sh'):  # Skip .sh files
                    all_files.append(os.path.join(root, file))
    return all_files

# Main function to parse all the files of the project
def parsing_file(files: list) -> None:
    print("Starting AzerothCore SQL Codestyle check...")
    print(" ")
    print("Please read the SQL Standards for AzerothCore:")
    print("https://www.azerothcore.org/wiki/sql-standards")
    print(" ")

    # Iterate over all files
    for file_path in files:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                multiple_blank_lines_check(file, file_path)
                trailing_whitespace_check(file, file_path)
                sql_check(file, file_path)
                insert_delete_safety_check(file, file_path)
                semicolon_check(file, file_path)
                backtick_check(file, file_path)
        except UnicodeDecodeError:
            print(f"\n❌ Could not decode file {file_path}")
            sys.exit(1)

    # Output the results
    print("\n ")
    for check, result in results.items():
        print(f"{check} : {result}")
    if error_handler:
        print("\n ")
        print("\n❌ Please fix the codestyle issues above.")
        sys.exit(1)
    else:
        print("\n ")
        print(f"\n✅ Everything looks good")

# Codestyle patterns checking for multiple blank lines
def multiple_blank_lines_check(file: io, file_path: str) -> None:
    global error_handler, results
    file.seek(0)  # Reset file pointer to the beginning
    check_failed = False
    consecutive_blank_lines = 0
    # Parse all the file
    for line_number, line in enumerate(file, start = 1):
        if line.strip() == '':
            consecutive_blank_lines += 1
            if consecutive_blank_lines > 1:
                print(f"❌ Multiple blank lines found in {file_path} at line {line_number - 1}")
                check_failed = True
        else:
            consecutive_blank_lines = 0
    # Additional check for the end of the file
    if consecutive_blank_lines >= 1:
        print(f"❌ Multiple blank lines found at the end of: {file_path}")
        check_failed = True
    # Handle the script error and update the result output
    if check_failed:
        error_handler = True
        results["Multiple blank lines check"] = "Failed"

# Codestyle patterns checking for whitespace at the end of the lines
def trailing_whitespace_check(file: io, file_path: str) -> None:
    global error_handler, results
    file.seek(0)  # Reset file pointer to the beginning
    check_failed = False
    # Parse all the file
    for line_number, line in enumerate(file, start = 1):
        if line.endswith(' \n'):
            print(f"❌ Trailing whitespace found: {file_path} at line {line_number}")
            check_failed = True
    if check_failed:
        error_handler = True
        results["Trailing whitespace check"] = "Failed"

# Codestyle patterns checking for various codestyle issues
def sql_check(file: io, file_path: str) -> None:
    global error_handler, results
    file.seek(0)  # Reset file pointer to the beginning
    check_failed = False

    # Parse all the file
    for line_number, line in enumerate(file, start = 1):
        if [match for match in ['broadcast_text'] if match in line]:
            print(
                f"❌ DON'T EDIT broadcast_text TABLE UNLESS YOU KNOW WHAT YOU ARE DOING!\nThis error can safely be ignored if the changes are approved to be sniffed: {file_path} at line {line_number}")
            check_failed = True
        if "EntryOrGuid" in line:
            print(
                f"❌ Please use entryorguid syntax instead of EntryOrGuid in {file_path} at line {line_number}\nWe recommend to use keira to have the right syntax in auto-query generation")
            check_failed = True
        if [match for match in [';;'] if match in line]:
            print(
                f"❌ Double semicolon (;;) found in {file_path} at line {line_number}")
            check_failed = True
        if re.match(r"\t", line):
            print(
                f"❌ Tab found! Replace it to 4 spaces: {file_path} at line {line_number}")
            check_failed = True

        last_line = line[-1].strip()
        if last_line:
            print(
                f"❌ The last line is not a newline. Please add a newline: {file_path}")
            check_failed = True

    # Handle the script error and update the result output
    if check_failed:
        error_handler = True
        results["SQL codestyle check"] = "Failed"

def insert_delete_safety_check(file: io, file_path: str) -> None:
    global error_handler, results
    file.seek(0)  # Reset file pointer to the beginning
    not_delete = ["creature_template", "gameobject_template", "item_template", "quest_template"]
    check_failed = False
    previous_line = ""

    # Parse all the file
    for line_number, line in enumerate(file, start = 1):
        if line.startswith("--"):
            continue
        if "INSERT" in line and "DELETE" not in previous_line:
            print(f"❌ No DELETE keyword found before the INSERT in {file_path} at line {line_number}\nIf this error is intended, please notify a maintainer")
            check_failed = True
        previous_line = line
        match = re.match(r"DELETE FROM\s+`([^`]+)`", line, re.IGNORECASE)
        if match:
            table_name = match.group(1)
            if table_name in not_delete:
                print(
                    f"❌ Entries from {table_name} should not be deleted! {file_path} at line {line_number}\nIf this error is intended, please notify a maintainer")
                check_failed = True

    # Handle the script error and update the result output
    if check_failed:
        error_handler = True
        results["INSERT & DELETE safety usage check"] = "Failed"

def semicolon_check(file: io, file_path: str) -> None:
    global error_handler, results

    file.seek(0)  # Reset file pointer to the start
    check_failed = False

    sql_statement_regex = re.compile(r'^\s*(SELECT|INSERT|UPDATE|DELETE|REPLACE|SET)\b', re.IGNORECASE)
    block_comment_start = re.compile(r'/\*')
    block_comment_end = re.compile(r'\*/')
    inline_comment = re.compile(r'--.*')

    query_open = False
    in_block_comment = False
    inside_values_block = False

    lines = file.readlines()
    total_lines = len(lines)

    def get_next_non_blank_line(start):
        """ Get the next non-blank, non-comment line starting from `start` """
        for idx in range(start, total_lines):
            next_line = lines[idx].strip()
            if next_line and not next_line.startswith('--') and not next_line.startswith('/*'):
                return next_line
        return None

    for line_number, line in enumerate(lines, start=1):
        stripped_line = line.strip()

        # Skip single-line comments
        if stripped_line.startswith('--'):
            continue

        # Handle block comments
        if in_block_comment:
            if '*/' in stripped_line:
                in_block_comment = False
                stripped_line = stripped_line.split('*/', 1)[1].strip()
            else:
                continue
        else:
            if '/*' in stripped_line:
                query_open = False  # Reset query state at start of block comment
                in_block_comment = True
                stripped_line = stripped_line.split('/*', 1)[0].strip()

        # Skip empty lines (unless inside values block)
        if not stripped_line and not inside_values_block:
            continue

        # Remove inline comments after SQL
        stripped_line = stripped_line.split('--', 1)[0].strip()

        if stripped_line.upper().startswith("SET") and not stripped_line.endswith(";"):
            print(f"❌ Missing semicolon in {file_path} at line {line_number}")
            check_failed = True

        # Detect query start
        if not query_open and any(keyword in stripped_line.upper() for keyword in ["SELECT", "INSERT", "UPDATE", "DELETE", "REPLACE"]):
            query_open = True

        # Detect start of multi-line VALUES block
        if any(kw in stripped_line.upper() for kw in ["INSERT", "REPLACE"]) and "VALUES" in stripped_line.upper():
            inside_values_block = True
            query_open = True  # Ensure query is marked open too

        if inside_values_block:
            if not stripped_line:
                continue  # Allow blank lines inside VALUES block

            if stripped_line.startswith('('):
                # Get next non-blank line to detect if we're at the last row
                next_line = get_next_non_blank_line(line_number)
                
                if next_line and next_line.startswith('('):
                    # Expect comma if another row follows
                    if not stripped_line.endswith(','):
                        print(f"❌ Missing comma in {file_path} at line {line_number}")
                        check_failed = True
                else:
                    # Expect semicolon if this is the final row
                    if not stripped_line.endswith(';'):
                        print(f"❌ Missing semicolon in {file_path} at line {line_number}")
                        check_failed = True
                        inside_values_block = False
                        query_open = False
                    else:
                        inside_values_block = False  # Close block if semicolon was found

        elif query_open and not inside_values_block:
            # Normal query handling (outside multi-row VALUES block)
            if line_number == total_lines and not stripped_line.endswith(';'):
                print(f"❌ Missing semicolon in {file_path} at the last line {line_number}")
                check_failed = True
                query_open = False
            elif stripped_line.endswith(';'):
                query_open = False

    if check_failed:
        error_handler = True
        results["Missing semicolon check"] = "Failed"

def backtick_check(file: io, file_path: str) -> None:
    global error_handler, results
    file.seek(0)
    check_failed = False

    # Find SQL clauses
    pattern = re.compile(
        r'\b(SELECT|FROM|JOIN|WHERE|GROUP BY|ORDER BY|DELETE FROM|UPDATE|INSERT INTO|SET|REPLACE|REPLACE INTO)\s+(.*?)(?=;$|(?=\b(?:WHERE|SET|VALUES)\b)|$)',  
        re.IGNORECASE | re.DOTALL
    )

    # Make sure to ignore values enclosed in single- and doublequotes
    quote_pattern = re.compile(r"'(?:\\'|[^'])*'|\"(?:\\\"|[^\"])*\"")

    for line_number, line in enumerate(file, start=1):
        # Ignore comments
        if line.startswith('--'):
            continue

        # Sanitize single- and doublequotes to prevent false positives
        sanitized_line = quote_pattern.sub('', line)
        matches = pattern.findall(sanitized_line)
        
        for clause, content in matches:
            # Find all words and exclude @variables
            words = re.findall(r'\b(?<!@)([a-zA-Z_][a-zA-Z0-9_]*)\b', content)

            for word in words:
                # Skip MySQL keywords
                if word.upper() in {"SELECT", "FROM", "JOIN", "WHERE", "GROUP", "BY", "ORDER",
                                    "DELETE", "UPDATE", "INSERT", "INTO", "SET", "VALUES", "AND",
                                    "IN", "OR", "REPLACE", "NOT", "BETWEEN",
                                    "DISTINCT", "HAVING", "LIMIT", "OFFSET", "AS", "ON", "INNER",
                                    "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "NATURAL",
                                    "EXISTS", "LIKE", "IS", "NULL", "UNION", "ALL", "ASC", "DESC",
                                    "CASE", "WHEN", "THEN", "ELSE", "END", "CREATE", "TABLE",
                                    "ALTER", "DROP", "DATABASE", "INDEX", "VIEW", "TRIGGER",
                                    "PROCEDURE", "FUNCTION", "PRIMARY", "KEY", "FOREIGN", "REFERENCES",
                                    "CONSTRAINT", "DEFAULT", "AUTO_INCREMENT", "UNIQUE", "CHECK",
                                    "SHOW", "DESCRIBE", "EXPLAIN", "USE", "GRANT", "REVOKE",
                                    "BEGIN", "COMMIT", "ROLLBACK", "SAVEPOINT", "LOCK", "UNLOCK",
                                    "WITH", "RECURSIVE", "COLUMN", "ENGINE", "CHARSET", "COLLATE",
                                    "IF", "ELSEIF", "LOOP", "WHILE", "DO", "HANDLER", "LEAVE",
                                    "ITERATE", "DECLARE", "CURSOR", "FETCH", "OPEN", "CLOSE"}:
                    continue

                # Make sure the word is enclosed in backticks
                if not re.search(rf'`{re.escape(word)}`', content):
                    print(f"❌ Missing backticks around ({word}). {file_path} at line {line_number}")
                    check_failed = True

    if check_failed:
        error_handler = True
        results["Backtick check"] = "Failed"

# Collect all files from matching directories
all_files = collect_files_from_directories(src_directory)

# Main function
parsing_file(all_files)