fix: 修复tx源歌词与翻译错位的问题

2025-05-23 19:17:41 +08:00 · 2024-10-04 12:34:51 +08:00 · 2024-10-04 12:34:51 +08:00 · 3a2da26245
commit 3a2da26245
parent 193618b34e
1 changed files with 97 additions and 88 deletions
--- a/modules/tx/lyric.py
+++ b/modules/tx/lyric.py
@ -26,70 +26,66 @@ class ParseTools:
            'timeLabelFixRxp': re.compile(r'(?:\.0+|0+)$'),
        }
-    def ms_format(self, time_ms):
+    def msFormat(self, timeMs):
-        if not time_ms:
+        if isinstance(timeMs, float) and timeMs.is_nan():
            return ''
-        ms = time_ms % 1000
+        ms = timeMs % 1000
-        time_ms /= 1000
+        timeMs //= 1000
-        m = str(int(time_ms / 60)).zfill(2)
+        m = str(int(timeMs // 60)).zfill(2)
-        time_ms %= 60
+        s = str(int(timeMs % 60)).zfill(2)
-        s = str(int(time_ms)).zfill(2)
+        return f'[{m}:{s}.{str(ms).zfill(3)}]'
        return f"[{m}:{s}.{str(ms).zfill(3)}]"
-    def parse_lyric(self, lrc):
+    def parseLyric(self, lrc):
-        lrc = lrc.strip()
+        lrc = lrc.strip().replace('\r', '')
        lrc = lrc.replace('\r', '')
        if not lrc:
            return {'lyric': '', 'lxlyric': ''}
-        lines = lrc.split('\n')
+        # print(lrc)
-        lxlrc_lines = []
+        lines = lrc.split('\n')
-        lyric_lines = []
+        lxlrcLines = []
        lrcLines = []
        for line in lines:
            line = line.strip()
            result = self.rxps['lineTime'].match(line)
            if not result:
                if line.startswith('[offset'):
-                    lxlrc_lines.append(line)
+                    lxlrcLines.append(line)
-                    lyric_lines.append(line)
+                    lrcLines.append(line)
-                if self.rxps['lineTime2'].search(line):
+                if self.rxps['lineTime2'].match(line):
-                    lyric_lines.append(line)
+                    lrcLines.append(line)
                continue
-            start_ms_time = int(result.group(1))
+            startMsTime = int(result.group(1))
-            start_time_str = self.ms_format(start_ms_time)
+            startTimeStr = self.msFormat(startMsTime)
-            if not start_time_str:
+            if not startTimeStr:
                continue
            words = re.sub(self.rxps['lineTime'], '', line)
-            lyric_lines.append(f"{start_time_str}{re.sub(self.rxps['wordTimeAll'], '', words)}")
+            lrcLines.append(f'{startTimeStr}{re.sub(self.rxps["wordTimeAll"], "", words)}')
            times = re.findall(self.rxps['wordTimeAll'], words)
            if not times:
                continue
-            times = [
+            _rxp = r"\((\d+),(\d+)\)"
-                f"<{max(int(match.group(1)) - start_ms_time, 0)},{match.group(2)}>"
+            times = [f'''<{max(int(re.search(_rxp, time).group(1)) - startMsTime, 0)},{re.search(_rxp, time).group(2)}>''' for time in times]
-                for match in re.finditer(r'\((\d+),(\d+)\)', words)
+            wordArr = re.split(self.rxps['wordTime'], words)
-            ]
+            newWords = ''.join([f'{time}{wordArr[index]}' for index, time in enumerate(times)])
-            word_arr = re.split(self.rxps['wordTime'], words)
+            lxlrcLines.append(f'{startTimeStr}{newWords}')
            new_words = ''.join([f"{time}{word}" for time, word in zip(times, word_arr)])
            lxlrc_lines.append(f"{start_time_str}{new_words}")
        return {
-            'lyric': '\n'.join(lyric_lines),
+            'lyric': '\n'.join(lrcLines),
-            'lxlyric': '\n'.join(lxlrc_lines),
+            'lxlyric': '\n'.join(lxlrcLines),
        }
-    def parse_rlyric(self, lrc):
+    def parseRlyric(self, lrc):
-        lrc = lrc.strip()
+        lrc = lrc.strip().replace('\r', '')
        lrc = lrc.replace('\r', '')
        if not lrc:
            return {'lyric': '', 'lxlyric': ''}
        lines = lrc.split('\n')
-        lyric_lines = []
+        lines = lrc.split('\n')
        lrcLines = []
        for line in lines:
            line = line.strip()
@ -97,91 +93,104 @@ class ParseTools:
            if not result:
                continue
-            start_ms_time = int(result.group(1))
+            startMsTime = int(result.group(1))
-            start_time_str = self.ms_format(start_ms_time)
+            startTimeStr = self.msFormat(startMsTime)
-            if not start_time_str:
+            if not startTimeStr:
                continue
            words = re.sub(self.rxps['lineTime'], '', line)
            lrcLines.append(f'{startTimeStr}{re.sub(self.rxps["wordTimeAll"], "", words)}')
-            lyric_lines.append(f"{start_time_str}{re.sub(self.rxps['wordTimeAll'], '', words)}")
+        return '\n'.join(lrcLines)
-        return '\n'.join(lyric_lines)
+    def removeTag(self, string):
    def remove_tag(self, string):
        return re.sub(r'^[\S\s]*?LyricContent="', '', string).replace('"\/>[\S\s]*?$', '')
-    def get_intv(self, interval):
+    def getIntv(self, interval):
        if not interval:
            return 0
        if '.' not in interval:
            interval += '.0'
-        arr = re.split(':|\.', interval.ljust(8, '0'))[:3]
+        arr = re.split(r':|\.', interval)
-        m, s, ms = map(int, arr)
+        while len(arr) < 3:
-        return m * 3600000 + s * 1000 + ms
+            arr.insert(0, '0')
        m, s, ms = arr
        return int(m) * 3600000 + int(s) * 1000 + int(ms)
-    def fix_rlrc_time_tag(self, rlrc, lrc):
+    def fixRlrcTimeTag(self, rlrc, lrc):
-        rlrc_lines = rlrc.split('\n')
+        rlrcLines = rlrc.split('\n')
-        lrc_lines = lrc.split('\n')
+        lrcLines = lrc.split('\n')
-        new_lrc = []
+        newLrc = []
-        for line in rlrc_lines:
+
-            result = self.rxps['lineTime2'].search(line)
+        for line in rlrcLines:
            result = self.rxps['lineTime2'].match(line)
            if not result:
                continue
            words = re.sub(self.rxps['lineTime2'], '', line)
            if not words.strip():
                continue
-            t1 = self.get_intv(result.group(1))
+            t1 = self.getIntv(result.group(1))
-            while lrc_lines:
+
-                lrc_line = lrc_lines.pop(0)
+            while lrcLines:
-                lrc_line_result = self.rxps['lineTime2'].search(lrc_line)
+                lrcLine = lrcLines.pop(0)
-                if not lrc_line_result:
+                lrcLineResult = self.rxps['lineTime2'].match(lrcLine)
                if not lrcLineResult:
                    continue
-                t2 = self.get_intv(lrc_line_result.group(1))
+                t2 = self.getIntv(lrcLineResult.group(1))
                if abs(t1 - t2) < 100:
-                    new_lrc.append(re.sub(self.rxps['lineTime2'], lrc_line_result.group(0), line))
+                    newLrc.append(re.sub(self.rxps['lineTime2'], lrcLineResult.group(0), line))
                    break
        return '\n'.join(new_lrc)
-    def fix_tlrc_time_tag(self, tlrc, lrc):
+        return '\n'.join(newLrc)
        tlrc_lines = tlrc.split('\n')
        lrc_lines = lrc.split('\n')
        new_lrc = []
        time_tag_rxp = r'^\[[\d:.]+\]'
-        for line in tlrc_lines:
+    def fixTlrcTimeTag(self, tlrc, lrc):
-            result = re.match(time_tag_rxp, line)
+        tlrcLines = tlrc.split('\n')
        lrcLines = lrc.split('\n')
        newLrc = []
        for line in tlrcLines:
            result = self.rxps['lineTime2'].match(line)
            if not result:
                continue
-            words = re.sub(time_tag_rxp, '', line)
+            words = re.sub(self.rxps['lineTime2'], '', line)
            if not words.strip():
                continue
-            tag = re.sub(r'\[\d+:\d+\.\d+\]', '', result.group(0))
+            time = result.group(1)
            if '.' in time:
                time += '0' * (3 - len(time.split('.')[1]))
-            while lrc_lines:
+            t1 = self.getIntv(time)
-                lrc_line = lrc_lines.pop(0)
+
-                lrc_line_result = re.match(time_tag_rxp, lrc_line)
+            while lrcLines:
-                if not lrc_line_result:
+                lrcLine = lrcLines.pop(0)
                lrcLineResult = self.rxps['lineTime2'].match(lrcLine)
                if not lrcLineResult:
                    continue
-                if tag in lrc_line_result.group(0):
+                t2 = self.getIntv(lrcLineResult.group(1))
-                    new_lrc.append(re.sub(time_tag_rxp, lrc_line_result.group(0), line))
+                if abs(t1 - t2) < 100:
                    newLrc.append(re.sub(self.rxps['lineTime2'], lrcLineResult.group(0), line))
                    break
-        return '\n'.join(new_lrc)
+        return '\n'.join(newLrc)
-    def parse(self, lrc, tlrc, rlrc):
+    def parse(self, lrc, tlrc=None, rlrc=None):
        info = {
            'lyric': '',
            'tlyric': '',
            'rlyric': '',
            'lxlyric': '',
        }
        if lrc:
-            lyric_info = self.parse_lyric(self.remove_tag(lrc))
+            parsed_lrc = self.parseLyric(self.removeTag(lrc))
-            info['lyric'] = lyric_info['lyric']
+            info['lyric'] = parsed_lrc['lyric']
-            info['lxlyric'] = lyric_info['lxlyric']
+            info['lxlyric'] = parsed_lrc['lxlyric']
        if rlrc:
-            info['rlyric'] = self.fix_rlrc_time_tag(self.parse_rlyric(self.remove_tag(rlrc)), info['lyric'])
+            info['rlyric'] = self.fixRlrcTimeTag(self.parseRlyric(self.removeTag(rlrc)), info['lyric'])
        if tlrc:
-            info['tlyric'] = self.fix_tlrc_time_tag(tlrc, info['lyric'])
+            info['tlyric'] = self.fixTlrcTimeTag(tlrc, info['lyric'])
        return info