COBOLのソースコードをPythonで字句解析

 

COBOLのソースコードを字句解析


パターン1:


✏スクリプト

import re

# COBOLソースコード
cobol_code = """
       IDENTIFICATION DIVISION.
       PROGRAM-ID. SAMPLE.
       DATA DIVISION.
       WORKING-STORAGE SECTION.
       01 NUM1 PIC 9(3) VALUE 100.
       01 NUM2 PIC 9(3) VALUE 200.
       01 RESULT PIC 9(3).
       PROCEDURE DIVISION.
           ADD NUM1 TO NUM2 GIVING RESULT.
           DISPLAY "The result is " RESULT.
           STOP RUN.
"""

# 正規表現パターンを定義
pattern = r"(\b[0-9]+\([0-9]+\)\b|\b[0-9]+\b|\b[A-Z0-9-]+\b|\+|-|\*|\/|\.|:|>|<|=|\(|\)|\.)"

# 正規表現パターンにマッチするトークンを抽出
tokens = re.findall(pattern, cobol_code, re.IGNORECASE)

# 抽出したトークンを表示
for token in tokens:
    print(token)




パターン2:指定した文字列を抽出


✏スクリプト

import re

# COBOLソースコードimport re

code = """
   IF XXX_CD1 = '5' OR XXX_CDA = 'A' 
      OR XXX_CD2 = XXXX
      AND TEST = 'ABC'
   ELSE IF AAAA-XXX_CD1 = '&&'
      NEXT SENTENCE
   ELSE IF AAAA_XXX_CD1-BBB = '&&'
      NEXT SENTENCE
   END

"""

target_phrase = "XXX_CD"
target_value = "A"


# 正規表現パターンを定義
pattern = f"[A-Z0-9-_]*{target_phrase}[0-9-_][A-Z0-9-_]*"
tokens = re.findall(pattern, code, re.IGNORECASE)

for token in tokens:
    print(token)

print("------------")

# 正規表現パターンを定義
pattern = f"[\'\"]{target_value}[\'\"]"
tokens = re.findall(pattern, code, re.IGNORECASE)

for token in tokens:
    print(token)



コメント

このブログの人気の投稿

【論文メモ】A systematic literature review on source code similarity measurement and clone detection: techniques, applications, and challenges

【論文】A Survey on Causal Inference<2021>

【論文】Treatment Effect Estimation with Data-Driven Variable Decomposition