Almost right. Not bad for a first try.
First you need to make a text encoding converter.
Bool BOOL_MakeTables()
{
TextEncoding const plainTextEncoding=
::CreateTextEncoding(kTextEncodingUnicodeV2_0,
kUnicodeNoSubset,
kUnicode16BitFormat);
if (!plainTextEncoding)
{
ReturnFalse("\pCreateTextEncoding fails");
}
TextEncoding const hfsTextEncoding=
::CreateTextEncoding(kTextEncodingUnicodeV2_0,
kUnicodeCanonicalDecompVariant,
kUnicode16BitFormat);
if (!hfsTextEncoding)
{
ReturnFalse("\pCreateTextEncoding fails");
}
TECObjectRef plainToHFS;
OSStatus status=::TECCreateConverter(&plainToHFS,
plainTextEncoding,
hfsTextEncoding);
if (status)
{
ReturnFalse_status("\pTECCreateConverter fails",status);
}
const Bool ok=BOOL_MakeTables(plainToHFS);
status=::TECDisposeConverter(plainToHFS);
if (status)
{
ReturnFalse("\pTECDisposeConverter fails");
}
if (!ok)
{
ReturnFalse("\pMakeTables fails");
}
return True;
}
Then you need to make 4 tables.
#include "Make4Tables.h"
#include "PStr.cp.h"
//----------------------------------------
// Implementation details.
//----------------------------------------
Bool BOOL_MakeTable(TECObjectRef plainToHFS,
SInt32 length);
Bool BOOL_MakeTable(TECObjectRef plainToHFS,
SInt32 length,
BigTextFileWriter &cpp);
Bool BOOL_MakeTable(TECObjectRef plainToHFS,
SInt32 length,
BigTextFileWriter &cpp,
BigTextFileWriter &scheme);
Bool BOOL_ShowCpp(UniChar plain,
const HFSUniStr255 &u,
BigTextFileWriter &cpp);
Bool BOOL_ShowScheme(UniChar plain,
const HFSUniStr255 &u,
BigTextFileWriter &scheme);
//----------------------------------------
Bool BOOL_Make4Tables(TECObjectRef const plainToHFS)
{
for (SInt32 length=1;length<=4;length++)
{
if (!BOOL_MakeTable(plainToHFS,
length))
{
ReturnFalse("\pMakeTable fails");
}
}
return True;
}
//----------------------------------------
// Implementation details.
//----------------------------------------
Bool BOOL_MakeTable(TECObjectRef const plainToHFS,
const SInt32 length)
{
PStr<31> path;
SpellReadable(path,"\p:Output:Data");
SpellNumber(path,length);
SpellReadable(path,"\p.cp");
FSSpec spec;
OSErr err=::FSMakeFSSpec(0,
0,
path,
&spec);
if (err)
{
if (err!=fnfErr)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
err=::FSpCreate(&spec,
'R*ch',
'TEXT',
smSystemScript);
if (err)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
}
BigMacFileWriter cpp;
if (!cpp.BOOL_Open(spec))
{
ReturnFalse("\pOpen fails");
}
const Bool ok=BOOL_MakeTable(plainToHFS,
length,
cpp);
if (!cpp.BOOL_Close())
{
ReturnFalse("\pClose fails");
}
if (!ok)
{
ReturnFalse("\pMakeTable fails");
}
return True;
}
Bool BOOL_MakeTable(TECObjectRef const plainToHFS,
const SInt32 length,
BigTextFileWriter &cpp)
{
PStr<31> path;
SpellReadable(path,"\p:Output:Data");
SpellNumber(path,length);
SpellReadable(path,"\p.ss");
FSSpec spec;
OSErr err=::FSMakeFSSpec(0,
0,
path,
&spec);
if (err)
{
if (err!=fnfErr)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
err=::FSpCreate(&spec,
'R*ch',
'TEXT',
smSystemScript);
if (err)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
}
BigMacFileWriter scheme;
if (!scheme.BOOL_Open(spec))
{
ReturnFalse("\pOpen fails");
}
const Bool ok=BOOL_MakeTable(plainToHFS,
length,
cpp,
scheme);
if (!scheme.BOOL_Close())
{
ReturnFalse("\pClose fails");
}
if (!ok)
{
ReturnFalse("\pMakeTable fails");
}
return True;
}
Bool BOOL_MakeTable(TECObjectRef const plainToHFS,
const SInt32 length,
BigTextFileWriter &cpp,
BigTextFileWriter &scheme)
{
SInt32 count=0;
for (SInt32 i=0;i<65536;i++)
{
const UniChar plain=static_cast<UniChar>(i);
HFSUniStr255 hfs;
ByteCount actualInputLength;
ByteCount actualOutputLength;
const OSStatus status=::TECConvertText(plainToHFS,
reinterpret_cast<ConstTextPtr>(&plain),
2,
&actualInputLength,
reinterpret_cast<TextPtr>(hfs.unicode),
510,
&actualOutputLength);
if (status)
{
ReturnFalse_status("\pTECConvertText fails",status);
}
hfs.length=static_cast<UInt16>(actualOutputLength>>1);
if (hfs.length==length)
{
if (not ((length==1) and (*hfs.unicode==plain)))
{
count++;
if (!BOOL_ShowCpp(plain,
hfs,
cpp))
{
ReturnFalse("\pShowCpp fails");
}
if (!BOOL_ShowScheme(plain,
hfs,
scheme))
{
ReturnFalse("\pShowScheme fails");
}
}
}
}
cout << "Count for length ";
cout << length;
cout << " is ";
cout << count;
cout << ".";
cout << endl;
return True;
}
Bool BOOL_ShowCpp(const UniChar plain,
const HFSUniStr255 &hfs,
BigTextFileWriter &cpp)
{
SpellReadable(cpp,"\p{");
SpellNumber(cpp,plain);
for (SInt32 i=0;i<hfs.length;i++)
{
SpellReadable(cpp,"\p,");
SpellNumber(cpp,hfs.unicode[i]);
}
SpellReadable(cpp,"\p},");
StartANewLine(cpp);
return True;
}
Bool BOOL_ShowScheme(const UniChar plain,
const HFSUniStr255 &hfs,
BigTextFileWriter &scheme)
{
SpellReadable(scheme,"\p(");
SpellNumber(scheme,plain);
for (SInt32 i=0;i<hfs.length;i++)
{
SpellReadable(scheme,"\p ");
SpellNumber(scheme,hfs.unicode[i]);
}
SpellReadable(scheme,"\p)");
StartANewLine(scheme);
return True;
}
You can use this to write your own Unicode to HFS conversion.
Then you can use these tables to build a tree. Thanks to the Scheme language we can do this in a few words.
(require-library "compat.ss")
(define conversion
(class object% ()
(private
[tree '()]
[unicode #f])
(public
[add (lambda (plain hfs)
(if (null? hfs)
(set! unicode plain)
(letrec ((first (car hfs))
(rest (cdr hfs))
(found (assv first tree)))
(if found
(send (cadr found) add plain rest)
(let ((new (make-object conversion)))
(begin
(set! tree (cons (list first new) tree))
(send new add plain rest)))))))]
[search (lambda (hfs)
(if (null? hfs)
unicode
(letrec ((first (car hfs))
(rest (cdr hfs))
(found (assoc first tree)))
(if found
(send (cadr found) search rest)
#f))))]
[count (lambda ()
(define (sum tree)
(if (null? tree)
0
(let ((first (car tree))
(rest (cdr tree)))
(+ (send (cadr first) count)
(sum rest)))))
(if (null? tree)
0
(+ 1 (sum tree))))]
[merge-sort (lambda ()
(define (merge-sort-tree tree)
(define (merge-sort-sub x)
(send (cadr x) merge-sort))
(define (lt? x y)
(< (car x) (car y)))
(begin
(for-each merge-sort-sub tree)
(sort lt? tree)))
(if (null? tree)
'ok
(set! tree (merge-sort-tree tree))))]
[get-tree (lambda ()
(define (translate pair)
(list (car pair) (send (cadr pair) get-tree)))
(if (null? tree)
(number->string unicode)
(let ((translated (map translate tree)))
(if unicode
(cons (number->string unicode) translated)
translated))))]
[display-tree-lengths (lambda ()
(define (translate pair)
(send (cadr pair) display-tree-lengths))
(if (not (null? tree))
(begin
(display (length tree))
(newline)
(for-each translate tree))))])
(sequence
(super-init))))
(define c (make-object conversion))
(define (add plain hfs) (send c add plain hfs))
(define (search hfs) (send c search hfs))
(define (count) (send c count))
(define (merge-sort) (send c merge-sort))
(define (get-tree) (send c get-tree))
(define (display-tree-lengths) (send c display-tree-lengths))
(define (add-data data)
(define (add-plain-and-hfs r)
(add (car r) (cdr r)))
(for-each add-plain-and-hfs data))
(define (write-binary vpad b)
(define dest-port
(open-output-file vpad 'truncate/replace))
(define (write-n1 n)
(write-char (integer->char n) dest-port))
(define (write-n2 n)
(define hi (quotient n 256))
(define lo (remainder n 256))
(write-n1 hi)
(write-n1 lo))
(define (write-elements t)
(begin
(write-n2 (length t))
(for-each write-conversion t)))
(define (write-table t)
(define first (car t))
(define rest (cdr t))
(if (string? first)
(begin
(write-n1 0)
(write-n2 (string->number first))
(write-elements rest))
(begin
(write-n1 1)
(write-elements t))))
(define (write-conversion c)
(define hfs (car c))
(define x (cadr c))
(begin
(write-n2 hfs)
(if (string? x)
(begin
(write-n1 0)
(write-n2 (string->number x)))
(begin
(write-n1 1)
(write-table x)))))
(begin
(for-each write-conversion b)
(close-output-port dest-port)))
(define (write-scheme vpad x)
(define dest-port
(open-output-file vpad 'truncate/replace))
(begin
(write x dest-port)
(close-output-port dest-port)))
(load "Data1.ss")
(load "Data2.ss")
(load "Data3.ss")
(load "Data4.ss")
(add-data data1)
(add-data data2)
(add-data data3)
(add-data data4)
(search '(180)) ; => 8189
(search '(105 769)) ; => 146
(search '(4362 4468 4532)) ; => 50457
(search '(919 837 788 769)) ; => 8093
(count) ; => 595
(merge-sort)
(define b (get-tree))
(write-binary "Tree.dat" b)
(write-scheme "Tree.ss" b)
(display-tree-lengths)
; =>
; root has length 76
; 51 times length 1
; 24 times length 2
; 42 times length 3
; 15 times length 4
; 6 times length 5
; 4 times length 7
; 4 times length 9
; 19 times length 21
; 399 times length 27
The result is saved to a binary file that we copy into 'HFS+' resource 129.
Then you also make a table that tells you wether a character translates into itself:
#include "MakeIdentityTable.h"
//----------------------------------------
// Implementation details.
//----------------------------------------
Bool BOOL_MakeIdentityTable(TECObjectRef plainToHFS,
BigBinaryFileWriter &output);
//----------------------------------------
Bool BOOL_MakeIdentityTable(TECObjectRef const plainToHFS)
{
FSSpec spec;
OSErr err=::FSMakeFSSpec(0,
0,
"\p:Output:Identity.dat",
&spec);
if (err)
{
if (err!=fnfErr)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
err=::FSpCreate(&spec,
'hDmp',
'BINA',
smSystemScript);
if (err)
{
ReturnFalse_err("\pFSpCreate fails",err);
}
}
BigBinaryFileWriter output;
if (!output.BOOL_Open(spec))
{
ReturnFalse("\pOpen fails");
}
const Bool ok=BOOL_MakeIdentityTable(plainToHFS,
output);
if (!output.BOOL_Close())
{
ReturnFalse("\pClose fails");
}
if (!ok)
{
ReturnFalse("\pMakeIdentityTable fails");
}
return True;
}
//----------------------------------------
// Implementation details.
//----------------------------------------
Bool BOOL_MakeIdentityTable(TECObjectRef const plainToHFS,
BigBinaryFileWriter &output)
{
for (SInt32 i=0;i<65536;i++)
{
const UniChar plain=static_cast<UniChar>(i);
HFSUniStr255 hfs;
ByteCount actualInputLength;
ByteCount actualOutputLength;
const OSStatus status=::TECConvertText(plainToHFS,
reinterpret_cast<ConstTextPtr>(&plain),
2,
&actualInputLength,
reinterpret_cast<TextPtr>(hfs.unicode),
510,
&actualOutputLength);
if (status)
{
ReturnFalse_status("\pTECConvertText fails",status);
}
hfs.length=static_cast<UInt16>(actualOutputLength>>1);
const bool identity=(hfs.length==1) and (*hfs.unicode==plain);
WriteBinary(output,identity);
}
return True;
}
The result is saved to a binary file that we copy into 'HFS+' resource 128.
'HFS+' resource 128 is used by the conversions in either direction.
'HFS+' resource 129 is used only by the HFS to Unicode conversion.
The 4 tables are used only by the Unicode to HFS conversion.
Then we notice that there are a few characters which are converted to the same character. If you convert them back then you may not get the original. For example 63, 65534 and 65535 are converted to 63.
Then you may end up with these conversions:
59 => 59 => 894
63 => 63 => 65535
96 => 96 => 8175
180 => 180 => 8189
183 => 183 => 903
198 => 198 => 1236
230 => 230 => 1237
399 => 399 => 1240
415 => 415 => 1256
439 => 439 => 1248
601 => 601 => 1241
629 => 629 => 1257
658 => 658 => 1249
697 => 697 => 884
768 => 768 => 832
769 => 769 => 833
787 => 787 => 835
953 => 953 => 8126
65534 => 63 => 65535
We avoid this by storing only the smallest number if there's more than one possibility. Then you get these conversions:
832 => 768 => 768
833 => 769 => 769
835 => 787 => 787
884 => 697 => 697
894 => 59 => 59
903 => 183 => 183
1236 => 198 => 198
1237 => 230 => 230
1240 => 399 => 399
1241 => 601 => 601
1248 => 439 => 439
1249 => 658 => 658
1256 => 415 => 415
1257 => 629 => 629
8126 => 953 => 953
8175 => 96 => 96
8189 => 180 => 180
65534 => 63 => 63
65535 => 63 => 63
This was found with this program:
Bool BOOL_Test_Unicode_HFS()
{
for (z4 i=0;i<65536;i++)
{
c_n2 unicode1=static_cast<n2>(i);
z4 hfs_length;
n2 hfs[4];
UnicodeToHFS::Convert(unicode1,
hfs_length,
hfs);
z4 hfs_used;
n2 unicode2;
if (!HFSToUnicode::CanConvert(hfs_length,
hfs,
hfs_used,
unicode2))
{
SpellReadable(cout,"\pThere's no conversion for ");
SpellNumber(cout,unicode1);
StartANewLine(cout);
}
if (hfs_used!=hfs_length)
{
ReturnFalse("\pUnused characters");
}
if (unicode2!=unicode1)
{
SpellNumber(cout,unicode1);
SpellReadable(cout,"\p => ");
for (z4 j=0;j<hfs_length;j++)
{
SpellNumber(cout,hfs[j]);
SpellReadable(cout,"\p ");
}
SpellReadable(cout,"\p=> ");
SpellNumber(cout,unicode2);
StartANewLine(cout);
}
}
return True;
}
My converter reads these resources and recreates the tables and the tree, then disposes the resources.
The memory use is fair:
- around 93 K shared.
- around 640 K for Unicode to HFS only.
- around 527 K for HFS to Unicode only.
Now the bad news: 18134 Unicode characters are not accepted by FSRenameUnicode. That's 28%.
Bool BOOL_TryEveryHFSFilenameWithLength1()
{
FSSpec spec;
OSErr err=::FSMakeFSSpec(0,
0,
"\pTest:0",
&spec);
if (err)
{
ReturnFalse_err("\pFSMakeFSSpec fails",err);
}
FSRef ref;
err=::FSpMakeFSRef(&spec,
&ref);
if (err)
{
ReturnFalse_err("\pFSpMakeFSRef fails",err);
}
for (SInt32 i=0;i<65536;i++)
{
UniChar unicode[1];
*unicode=static_cast<UniChar>(i);
HFSUniStr255 hfs;
UnicodeToHFS::Convert(1,
unicode,
hfs);
err=::FSRenameUnicode(&ref,
hfs.length,
hfs.unicode,
kTextEncodingDefaultFormat,
nil);
if (err)
{
cout << "Rename as ";
cout << i;
cout << " returns ";
cout << err;
cout << '.';
cout << endl;
}
}
return True;
}
In UnicodeTable.sitx you can see which characters you can use.
That's how far I've got.
See the demo program in the attachment.