summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Feck <cfeck@kde.org>2016-07-22 17:57:25 (GMT)
committerChristoph Feck <cfeck@kde.org>2016-07-22 17:57:25 (GMT)
commit5415d07720286beb22962c30a01593c30a1cfce1 (patch)
tree85be0e448280ef3ea53ceaeae2e36a501f6c9547
parent9ba72a807a18da73c05e3e99f1c9799cf95f0c36 (diff)
Fix "See also" references
1. There are two types of "x" references in NamesList.txt, one with parentheses and names, the second only stating the referenced code point. Support for the second type was added. 2. The regexp for the first type was incorrect for code points with more than 4 hex digits. The additional digits were eaten by the .* regexp preceeding the capture. For example, the reference to U+1F4B2 in the details list for character U+0024 DOLLAR SIGN was recognized as F4B2. 3. Added missing range check; the current data file format only supports 16 bit references.
-rwxr-xr-xkcharselect-generate-datafile.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/kcharselect-generate-datafile.py b/kcharselect-generate-datafile.py
index cf93c50..4774cb3 100755
--- a/kcharselect-generate-datafile.py
+++ b/kcharselect-generate-datafile.py
@@ -625,7 +625,8 @@ class Parser:
unicodeRegexp = re.compile(r'^([0-9A-F]+)')
aliasRegexp = re.compile(r'^\s+=\s+(.+)$') #equal
- seeAlsoRegexp = re.compile(r'^\s+x\s+.*([0-9A-F]{4,6})\)$') #ex
+ seeAlsoRegexp1 = re.compile(r'^\s+x\s+.*\s([0-9A-F]{4,6})\)$') #ex
+ seeAlsoRegexp2 = re.compile(r'^\s+x\s+([0-9A-F]{4,6})$') #ex
noteRegexp = re.compile(r'^\s+\*\s+(.+)$') #star
approxEquivalentRegexp = re.compile(r'^\s+#\s+(.+)$') #pound
equivalentRegexp = re.compile(r'^\s+:\s+(.+)$') #colon
@@ -640,7 +641,8 @@ class Parser:
m3 = noteRegexp.match(line)
m4 = approxEquivalentRegexp.match(line)
m5 = equivalentRegexp.match(line)
- m6 = seeAlsoRegexp.match(line)
+ m6 = seeAlsoRegexp1.match(line)
+ m7 = seeAlsoRegexp2.match(line)
if invalidRegexp.match(line):
continue
elif m1:
@@ -664,7 +666,12 @@ class Parser:
details.addEntry(currChar, "equiv", value)
elif m6:
value = int(m6.group(1), 16)
- details.addEntry(currChar, "seeAlso", value)
+ if value < 0x10000:
+ details.addEntry(currChar, "seeAlso", value)
+ elif m7:
+ value = int(m7.group(1), 16)
+ if value < 0x10000:
+ details.addEntry(currChar, "seeAlso", value)
def parseBlocks(self, inBlocks, sectionsBlocks):
regexp = re.compile(r'^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$')
for line in inBlocks:
@@ -680,7 +687,7 @@ class Parser:
for line in inSections:
line = line[:-1]
if len(line) == 0:
- continue;
+ continue
temp = line.split(" ")
if temp[0] == "SECTION":
currSection = line[8:]