Changes On Branch bookmark-dupes

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch bookmark-dupes Excluding Merge-Ins

This is equivalent to a diff from 3ab8aec172 to 0abdcf0828

2015-01-13
19:31
Added a new project, mailjanitor, which is supposed to read one or more IMAP folders, look for messages matching certain criteria, and perform actions on those messages (or otherwise). Will use the Synapse components for communication with the IMAP server, so added the entire Synapse distribution. Leaf check-in: ca2aa94ea0 user: tinus tags: mail-janitor
2015-01-12
08:04
Prepare TDuplicateSet for re-use in MergeFolders. Leaf check-in: 0abdcf0828 user: tinus tags: bookmark-dupes
07:30
Updated readbookmarks.pas so it also works properly with bookmarks.html exported from Google Chrome and Mozilla Firefox (initially it was only made for HTML export from Xmarks). check-in: 5fa0ba3897 user: tinus tags: trunk
00:45
Don't save intermediate files. check-in: e88e248c9b user: tinus tags: bookmark-dupes
2015-01-11
20:39
Give preference to bookmarks in subfolders of the toolbar as well. check-in: 6081fa977b user: tinus tags: bookmark-dupes
19:18
Merge of bookmark deduplicator. It works. check-in: 3ab8aec172 user: tinus tags: trunk
19:16
URL matching is now case-insensitive across the board. Fixed freeing the same objects multiple times. Leaf check-in: 98032daaf2 user: tinus tags: bookmark-dupes
2014-12-24
12:49
FreePascal-script that parses the HTML-export of XMarks's bookmarks (http://xmarks.com), and possibly also Netscape/Firefox's bookmarks.html (untested). At this point, does nothing but parse them, and display them on the output. Plans are deduplicating bookmarks, merging folders, removing empty folders, and merging consecutive dividers. check-in: 15a85711b3 user: tinus tags: trunk

Changes to readbookmarks.pas.

1
2
3
4

5






























6
7
8
9
10
11
12
1
2
3

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42



-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







#!/usr/bin/env instantfpc
{$mode objfpc}{$H+}{$ASSERTIONS ON}
uses
  Classes, SysUtils, StrUtils, DateUtils,
  Classes, SysUtils, StrUtils,
  fgl, Regexpr;

type
  TStringArray = array of string;

function Join(const Strings: TStringArray; const Separator: string = sLineBreak): string;
var
  i: integer;
begin
  if Length(Strings) = 0 then
    Exit;
  Result := Strings[Low(Strings)];
  for i := Low(Strings) + 1 to High(Strings) do begin
    Result := Result + Separator + Strings[i];
  end;
end {Join};

function IndexOfText(const S: string; const Strings: TStringArray): integer;
var
  i: integer;
begin
  Result := -1;
  for i := Low(Strings) to High(Strings) do begin
    if SameText(Strings[i], S) then begin
      Result := i;
      Exit;
    end;
  end;
end {IndexOfText};



type
  TFolder = class;

  { TEntry }

  TEntry = class
42
43
44
45
46
47
48

49
50
51
52



53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

79
80
81
82
83
84

85
86
87

88
89
90
91



92
93
94
95
96




97
98
99
100
101
102
103
104
105

106
107

108




109
110






































111
112
113
114


115
116
117
118


119
120
121
122
123
124
125
126
127
128
129


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

147
148
149
150
151
152
153
154
155
156
157
158


159
160
161
162

163
164
165
166
167
168
169
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130
131
132



133
134
135
136
137
138
139
140
141
142
143
144

145
146

147
148
149
150
151
152


153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198


199
200











201
202

















203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

221
222
223
224
225
226
227
228







+




+
+
+






+



















-
+






+


-
+




+
+
+


-
-
-
+
+
+
+








-
+

-
+

+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




+
+


-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+












+
+



-
+








  TContentEntry = class(TEntry)
  private
    FAdded: TDateTime;
    FDescription: string;
    FModified: TDateTime;
    FName: string;
    FAttributes: TStringList;
  protected
    procedure DoSetProperty(const Key, Value: string; var HasBeenSet: boolean); virtual;
    function  GetHTMLAttributes: string; virtual;
  public
    constructor Create(const AParent: TFolder; const LineNumber: cardinal); override;
    destructor  Destroy; override;

    procedure ReadProperties(HtmlAttributes: string);

    property Name: string             read FName        write FName;
    property Added: TDateTime         read FAdded       write FAdded;
    property LastModified: TDateTime  read FModified    write FModified;
    property Description: string      read FDescription write FDescription;
    property Attributes: TStringList  read FAttributes;
  end;

  { TFolder }

  TFolder = class(TContentEntry)
  private
    FEntries: TEntries;
    FToolbar: boolean;
  protected
    procedure DoSetProperty(const Key, Value: string; var HasBeenSet: boolean); override;
    function  GetHTMLAttributes: string; override;
  public
    constructor Create(const AParent: TFolder; const LineNumber: cardinal); override;
    destructor  Destroy; override;

    function ToString: AnsiString; override;
    procedure SaveToStrings(const Strings: TStrings); override;

    property Entries: TEntries        read FEntries;
    property IsToolbarFolder: boolean read FToolbar;
    property IsToolbarFolder: boolean read FToolbar write FToolbar;
  end;

  { TBookmark }

  TBookmark = class(TContentEntry)
  private
    FURL: string;
    FFeedURL: string;
    FKeyword: string;
    FURL: string;
    FTags: TStringList;
  protected
    procedure DoSetProperty(const Key, Value: string; var HasBeenSet: boolean); override;
    function  GetHTMLAttributes: string; override;
  public
    constructor Create(const AParent: TFolder; const LineNumber: cardinal); override;
    destructor  Destroy; override;

    procedure SaveToStrings(const Strings: TStrings); override;

    property URL: string      read FURL     write FURL;
    property FeedURL: string  read FFeedURL write FFeedURL;
    property Keyword: string  read FKeyword write FKeyword;
    property URL: string        read FURL     write FURL;
    property FeedURL: string    read FFeedURL write FFeedURL;
    property Keyword: string    read FKeyword write FKeyword;
    property Tags: TStringList  read FTags;
  end;


const // for use both in HTMLDecode and HTMLEncode
  cEntities: array[0..9] of string = ('amp', '&', 'lt', '<', 'gt', '>', 'quot', '"', 'apos', '''');

function HTMLDecode(const HTML: string): string;
var
  i, ei: integer;
  i: integer;
  c: char;
  InEntity, Found: boolean;
  InEntity: boolean;
  Entity: string;
  { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
  procedure ProcessEntity(const FoundSemicolon: boolean);
  var
    ei: integer;
  CharCode: integer;
begin
    CharCode: integer;
    Found: boolean;
  begin
    Found := False;
    if Length(Entity) > 1 then begin
      if Entity[1] = '#' then begin
        CharCode := 0;
        if Entity[2] = 'x' then begin
          Found := TryStrToInt('$' + Copy(Entity, 3, Length(Entity)), CharCode);
        end else begin
          Found := TryStrToInt(Copy(Entity, 2, Length(Entity)), CharCode);
        end;
        if Found then
          Entity := Char(CharCode);
      end else begin
        for ei := Low(cEntities) to High(cEntities) do begin
          if (ei mod 2) = 0 then begin
            if Entity = cEntities[ei] then begin
              Entity := cEntities[ei + 1];
              Found := True;
              Break;
            end;
          end;
        end {for};
      end;
    end;
    if Found then
      Result := Result + Entity
    else begin
      Result := Result + '&' + Entity;
      if FoundSemicolon then
        Result := Result + ';';
    end;
    InEntity := False;
    Entity := '';
  end {ProcessEntity};
  { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
begin
  Result := '';
  Entity := '';
  InEntity := False;
  for i := 1 to Length(HTML) do begin
    if InEntity and (Length(Entity) > 6) then
      ProcessEntity(False);
    c := HTML[i];
    if InEntity then begin
      if c = ';' then begin
        Found := False;
      if c = '&' then begin
        ProcessEntity(False);
        if Length(Entity) > 1 then begin
          if Entity[1] = '#' then begin
            CharCode := 0;
            if Entity[2] = 'x' then begin
              Found := TryStrToInt('$' + Copy(Entity, 3, Length(Entity)), CharCode);
            end else begin
              Found := TryStrToInt(Copy(Entity, 2, Length(Entity)), CharCode);
            end;
            if Found then
              Entity := Char(CharCode);
          end else begin
        InEntity := True;
      end else if c = ';' then begin
            for ei := Low(cEntities) to High(cEntities) do begin
              if (ei mod 2) = 0 then begin
                if Entity = cEntities[ei] then begin
                  Entity := cEntities[ei + 1];
                  Found := True;
                  Break;
                end;
              end;
            end {for};
          end;
        end;
        if Found then
          Result := Result + Entity
        else
          Result := Result + '&' + Entity + ';';
        InEntity := False;
        Entity := '';
        ProcessEntity(True);
      end else begin
        Entity := Entity + c;
      end;
    end else begin
      if c = '&' then begin
        InEntity := True;
        Entity := '';
      end else begin
        Result := Result + c;
      end;
    end;
  end;
  if InEntity then
    ProcessEntity(False);
end {HTMLDecode};

type
  THTMLEncodingContext = (hecText, hecAttribute);
  THTMLEncodingContext = (hecText, hecAttribute, hecURI);

function HTMLEncode(const Text: string; const Context: THTMLEncodingContext = hecText): string;
var
  i: integer;
  { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
  procedure ReplaceEntity;
  var
180
181
182
183
184
185
186
187

188



189
190

191
192
193
194
195
196
197
239
240
241
242
243
244
245

246
247
248
249
250
251

252
253
254
255
256
257
258
259







-
+

+
+
+

-
+







    Result[i] := '&';
  end {ReplaceEntity};
  { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
begin
  Result := Text;
  for i := Length(Result) downto 1 do begin
    case Result[i] of
      '&', '<', '>', #0:
      '<', '>', #0:
        ReplaceEntity;
      '&':
        if Context <> hecURI then
          ReplaceEntity;
      '"':
        if Context = hecAttribute then
        if Context in [hecAttribute, hecURI] then
          ReplaceEntity;
    end {case};
  end;
end {HTMLEncode};


{ TDivider }
245
246
247
248
249
250
251
















252
253
254
255
256
257
258
259
260
261
262
263
264
265



266
267
268
269
270

271
272

273
274

275
276
277


278
279
280
281
282
283
284
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

354
355

356
357
358
359
360
361
362
363
364
365
366
367
368







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














+
+
+





+

-
+

-
+



+
+







  // Then, check if Self is not already in the new parent; if not, add Self
  if Assigned(FParent) and (FParent.Entries.IndexOf(Self) = -1) then
    FParent.Entries.Add(Self);
end {TEntry.SetParent};


{ TBookmark }

constructor TBookmark.Create(const AParent: TFolder; const LineNumber: cardinal);
begin
  inherited Create(AParent, LineNumber);
  FTags := TStringList.Create;
  FTags.Delimiter := ',';
  FTags.StrictDelimiter := True;
  FTags.Sorted := True;
  FTags.Duplicates := dupIgnore;
end {TBookmark.Create};

destructor TBookmark.Destroy;
begin
  FTags.Free;
  inherited Destroy;
end {TBookmark.Destroy};

procedure TBookmark.DoSetProperty(const Key, Value: string; var HasBeenSet: boolean);
begin
  inherited DoSetProperty(Key, Value, HasBeenSet);

  if SameText(Key, 'HREF') then begin
    FURL := Value;
    HasBeenSet := True;
  end else if SameText(Key, 'FEEDURL') then begin
    FFeedURL := Value;
    HasBeenSet := True;
  end else if SameText(Key, 'SHORTCUTURL') then begin
    FKeyword := Value;
    HasBeenSet := True;
  end else if SameText(Key, 'TAGS') then begin
    FTags.DelimitedText := Value;
    HasBeenSet := True;
  end;
end {TBookmark.DoSetProperty};

function TBookmark.GetHTMLAttributes: string;
begin
  Result := '';
  if FURL <> '' then
    Result := ' HREF="' + HTMLEncode(FURL, hecAttribute) + '"';
    Result := Result + ' HREF="' + HTMLEncode(FURL, hecURI) + '"';
  if FFeedURL <> '' then
    Result := Result + ' FEEDURL="' + HTMLEncode(FFeedURL, hecAttribute) + '"';
    Result := Result + ' FEEDURL="' + HTMLEncode(FFeedURL, hecURI) + '"';
  if FKeyword <> '' then
    Result := Result + ' SHORTCUTURL="' + HTMLEncode(FKeyword, hecAttribute) + '"';
  Result := Result + inherited GetHTMLAttributes;
  if FTags.Count > 0 then
    Result := Result + ' TAGS="' + HTMLEncode(FTags.DelimitedText, hecAttribute) + '"';
end {TBookmark.GetHTMLAttributes};

procedure TBookmark.SaveToStrings(const Strings: TStrings);
begin
  Strings.Add(StringOfChar(' ', 4 * Level) + '<DT><A' + GetHTMLAttributes + '>'
            + HTMLEncode(Name)
            + '</A>');
318
319
320
321
322
323
324

325
326

327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347













348


349
350
351
352
353
354

355
356
357
358
359
360
361

362
363
364
365
366





367
368
369
370
371



372

373
374

375
376








377
378
379
380
381
382
383
384
385

386
387
388
389
390
391

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416

417
418
419
420
421
422
423


424
425
426
427
428



429
430
431
432
433
434


435
436
437

438
439
440
441
442
443
444
445
446







447
448
449







450

451


452
453


454
455

456




457
458
459
460








461






462

463
464
465
466

467
468
469
470

471
472
473
474
475
476
477
402
403
404
405
406
407
408
409
410

411

412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460

461
462
463
464


465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480

481
482

483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498

499
500
501
502
503
504

505
506
507
508
509
510
511















512
513
514

515
516
517
518
519
520
521
522
523
524
525
526
527


528
529
530
531
532
533
534
535
536
537
538
539
540

541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567

568
569
570
571
572
573
574
575
576

577
578
579
580
581
582




583
584
585
586
587
588
589
590
591
592
593
594
595
596
597

598
599
600
601

602
603
604
605

606
607
608
609
610
611
612
613







+

-
+
-




















+
+
+
+
+
+
+
+
+
+
+
+
+

+
+






+






-
+



-
-
+
+
+
+
+





+
+
+

+

-
+

-
+
+
+
+
+
+
+
+








-
+





-
+






-
-
-
-
-
-
-
-
-
-
-
-
-
-
-



-
+







+
+



-
-
+
+
+






+
+


-
+









+
+
+
+
+
+
+



+
+
+
+
+
+
+
-
+

+
+


+
+

-
+

+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+

+
+
+
+
+
+
-
+



-
+



-
+







    FToolbar := SameText(Value, 'TRUE');
    HasBeenSet := True;
  end;
end {TFolder.DoSetProperty};

function TFolder.GetHTMLAttributes: string;
begin
  Result := inherited GetHTMLAttributes;
  if FToolbar then
    Result := ' PERSONAL_TOOLBAR_FOLDER="TRUE"';
    Result := Result + ' PERSONAL_TOOLBAR_FOLDER="TRUE"';
  Result := inherited GetHTMLAttributes + Result;
end {TFolder.GetHTMLAttributes};

procedure TFolder.SaveToStrings(const Strings: TStrings);
var
  Indent: string;
  i: integer;
begin
  Indent := StringOfChar(' ', 4 * Level);
  if Level > 0 then
    Strings.Add(Indent+'<DT><H3' + GetHTMLAttributes +'>' + HTMLEncode(Name) + '</H3>');
  Strings.Add(Indent+'<DL><p>');
  for i := 0 to Entries.Count - 1 do begin
    Entries[i].SaveToStrings(Strings);
  end;
  Strings.Add(Indent+'</DL><p>');
end {TFolder.SaveToStrings};


{ TContentEntry }

constructor TContentEntry.Create(const AParent: TFolder; const LineNumber: cardinal);
begin
  inherited Create(AParent, LineNumber);
  FAttributes := TStringList.Create;
  FAttributes.NameValueSeparator := '=';
end {TContentEntry.Create};

destructor TContentEntry.Destroy;
begin
  FAttributes.Free;
  inherited Destroy;
end;

procedure TContentEntry.ReadProperties(HtmlAttributes: string);
const
  cKnownKeys: array[0..4] of string = ('ICON', 'ICON_URI', 'WEB_PANEL', 'UNFILED_BOOKMARKS_FOLDER', 'LAST_CHARSET');
var
  rxAttr: TRegExpr;
  Matches: boolean;
  Key, Value: string;
  HasBeenSet: boolean;
begin
  FAttributes.Clear;
  HtmlAttributes := TrimLeft(HtmlAttributes);
  rxAttr := TRegExpr.Create;
  rxAttr.Expression := '(\S+?)\s*=\s*"([^"]*)"';
  rxAttr.Compile;
  Matches := rxAttr.Exec(HtmlAttributes);
  while Matches do begin
    Key := HTMLDecode(LowerCase(rxAttr.Match[1]));
    Key := HTMLDecode(rxAttr.Match[1]);
    Value := HTMLDecode(rxAttr.Match[2]);
    HasBeenSet := False;
    DoSetProperty(Key, Value, HasBeenSet);
    if not HasBeenSet then
      WriteLn(ErrOutput, '!!! Unprocessed property "', Key, '" in line ', Self.Line,' !!!');
    if not HasBeenSet then begin
      if IndexOfText(Key, cKnownKeys) = -1 then
        WriteLn(ErrOutput, '!!! Unprocessed property "', Key, '" in line ', Self.Line,' !!!');
      FAttributes.Add(Key + FAttributes.NameValueSeparator + Value);
    end;
    Matches := rxAttr.ExecNext;
  end;
end {TContentEntry.ReadProperties};

function TContentEntry.GetHTMLAttributes: string;
var
  i: integer;
  hec: THTMLEncodingContext;
begin
  Result := '';
  if FAdded > 0 then
    Result := ' ADD_DATE="' + IntToStr(SecondsBetween(FAdded, EncodeDate(1970, 1, 1))) + '"';
    Result := Result + ' ADD_DATE="' + IntToStr(Round((FAdded - EncodeDate(1970, 1, 1)) * SecsPerDay)) + '"';
  if FModified > 0 then
    Result := ' LAST_MODIFIED="' + IntToStr(SecondsBetween(FModified, EncodeDate(1970, 1, 1))) + '"';
    Result := Result + ' LAST_MODIFIED="' + IntToStr(Round((FModified - EncodeDate(1970, 1, 1)) * SecsPerDay)) + '"';
  for i := 0 to FAttributes.Count - 1 do begin
    if AnsiContainsText(FAttributes.Names[i], 'URL') or AnsiContainsText(FAttributes.Names[i], 'HREF') or AnsiContainsText(FAttributes.Names[i], 'SRC') then
      hec := hecURI
    else
      hec := hecAttribute;
    Result := Result + ' ' + HTMLEncode(FAttributes.Names[i]) + '="' + HTMLEncode(FAttributes.ValueFromIndex[i], hec) + '"';
  end;
end {TContentEntry.GetHTMLAttributes};

procedure TContentEntry.DoSetProperty(const Key, Value: string; var HasBeenSet: boolean);
var
  Seconds: Integer;
begin
  if SameText(Key, 'ADD_DATE') then begin
    if TryStrToInt(Value, Seconds) then
      FAdded := EncodeDate(1970, 1, 1) + (Seconds / 86400)
      FAdded := EncodeDate(1970, 1, 1) + (Seconds / SecsPerDay)
    else
      FAdded := 0;
    HasBeenSet := True;
  end else if SameText(Key, 'LAST_MODIFIED') then begin
    if TryStrToInt(Value, Seconds) then
      FModified := EncodeDate(1970, 1, 1) + (Seconds / 86400)
      FModified := EncodeDate(1970, 1, 1) + (Seconds / SecsPerDay)
    else
      FModified := 0;
    HasBeenSet := True;
  end;
end {TContentEntry.DoSetProperty};

type
  TStringArray = array of string;

function Join(const Strings: TStringArray; const Separator: string = sLineBreak): string;
var
  i: integer;
begin
  if Length(Strings) = 0 then
    Exit;
  Result := Strings[Low(Strings)];
  for i := Low(Strings) + 1 to High(Strings) do begin
    Result := Result + Separator + Strings[i];
  end;
end {Join};

type
  { TDuplicateSet }

  TDuplicateSet = class(specialize TFPGObjectList<TBookmark>)
  TDuplicateSet = class(specialize TFPGObjectList<TContentEntry>)
  private
    FNames: TStringArray;
    FAdded: TDateTime;
    FLastModified: TDateTime;
    FMinLevel: cardinal;
    FMaxLevel: cardinal;
    FDescriptions: TStringArray;
    FAttributes: TStringList;
    FTags: TStringList;

    function AddStringIfUnique(const NewString: string; var Strings: TStringArray): boolean;
  public
    constructor Create(const Initial: TBookmark);
    function Add(const Bookmark: TBookmark): integer;
    constructor Create(const Initial: TContentEntry);
    destructor  Destroy; override;
    function Add(const Entry: TContentEntry): integer;
    property Names: TStringArray        read FNames;
    property Added: TDateTime           read FAdded;
    property LastModified: TDateTime    read FLastModified;
    property Descriptions: TStringArray read FDescriptions;
    property MinLevel: cardinal         read FMinLevel;
    property MaxLevel: cardinal         read FMaxLevel;
    property Attributes: TStringList    read FAttributes;
    property Tags: TStringList          read FTags;
  end;

constructor TDuplicateSet.Create(const Initial: TBookmark);
constructor TDuplicateSet.Create(const Initial: TContentEntry);
begin
  inherited Create(False);
  FAdded := 0;
  FLastModified := 0;
  SetLength(FNames, 1);
  FNames[0] := Initial.Name;
  SetLength(FDescriptions, 0);
  FMinLevel := High(FMinLevel);
  FMaxLevel := Low(FMaxLevel);
  FAttributes := TStringList.Create;
  FAttributes.NameValueSeparator := '=';
  FTags := TStringList.Create;
  FTags.Delimiter := ',';
  FTags.StrictDelimiter := True;
  FTags.Sorted := True;
  FTags.Duplicates := dupIgnore;
  Add(Initial);
end {TDuplicateSet.Create};

destructor TDuplicateSet.Destroy;
begin
  FTags.Free;
  FAttributes.Free;
  inherited Destroy;
end {TDuplicateSet.Destroy};

function TDuplicateSet.Add(const Bookmark: TBookmark): integer;
function TDuplicateSet.Add(const Entry: TContentEntry): integer;
var
  Bookmark: TBookmark;
  AddName, AddDescription: boolean;
  D: TDateTime;
  L: integer;
  IsNewer: boolean;
  i: integer;
begin
  Result := inherited Add(Bookmark);
  Result := inherited Add(Entry);

  AddName := True;
  AddDescription := (Length(Entry.Description) > 0) and (Pos(Entry.Description, Entry.Name) = 0);
  if Entry is TBookmark then begin
    Bookmark := TBookmark(Entry);
  if Pos(Bookmark.Name, Bookmark.URL) = 0 then
    AddStringIfUnique(Bookmark.Name, FNames);
  if (Length(Bookmark.Description) > 0) and (Pos(Bookmark.Description, Bookmark.Name) = 0) and (Pos(Bookmark.Description, Bookmark.URL) <> 1) then
    AddStringIfUnique(Bookmark.Description, FDescriptions);
    AddName := Pos(Bookmark.Name, Bookmark.URL) = 0;
    AddDescription := AddDescription and (Pos(Bookmark.Description, Bookmark.URL) <> 1);
    FTags.AddStrings(Bookmark.Tags);
  end;
  if AddName then
    AddStringIfUnique(Entry.Name, FNames);
  if AddDescription then
    AddStringIfUnique(Entry.Description, FDescriptions);

  IsNewer := (Entry.LastModified > 0) and ((FLastModified = 0) or (Entry.LastModified > FLastModified));
  for i := 0 to Entry.Attributes.Count - 1 do begin
    if IsNewer or (FAttributes.IndexOfName(Entry.Attributes.Names[i]) = -1) then
      FAttributes.Values[Entry.Attributes.Names[i]] := Entry.Attributes.ValueFromIndex[i];
  end;

  D := Bookmark.Added;
  D := Entry.Added;
  if (D > 0) and ((FAdded = 0) or (D < FAdded)) then
    FAdded := D;

  D := Bookmark.LastModified;
  D := Entry.LastModified;
  if (D > 0) and (D > FLastModified) then
    FLastModified := D;

  L := Bookmark.Level;
  L := Entry.Level;
  if L < FMinLevel then
    FMinLevel := L;
  if L > FMaxLevel then
    FMaxLevel := L;
end {TDuplicateSet.Add};

function TDuplicateSet.AddStringIfUnique(const NewString: string; var Strings: TStringArray): boolean;
619
620
621
622
623
624
625

626
627

628
629
630






631
632

633
634
635
636
637
638
639
755
756
757
758
759
760
761
762
763

764
765
766
767
768
769
770
771
772
773
774

775
776
777
778
779
780
781
782







+

-
+



+
+
+
+
+
+

-
+







          if Trim(Copy(Line, 1, P - 1)) <> '' then begin
            Assert(Entry is TContentEntry);
            TContentEntry(Entry).Description := TContentEntry(Entry).Description + sLineBreak + HTMLDecode(Copy(Line, 1, P - 1));
            //WriteLn(StringOfChar(#9, Entry.Level - 1), ' "', TContentEntry(Entry).Description, '"');
          end;
          ReadingDescription := False;
        end;

        Line := Copy(Line, P + 1, Length(Line));
        if AnsiStartsText('HR>', Line) then begin
        while AnsiStartsText('HR>', Line) do begin
          Entry := TDivider.Create(Folder, i);
          Bookmarks.Add(Entry);
          //WriteLn(StringOfChar(#9, Entry.Level - 1), '---- ---- ---- ---- ---- ');
          Line := Copy(Line, 4, Length(Line));
          P := Pos('<', Line);
          if P = 0 then
            Continue;
          Line := Copy(Line, P + 1, Length(Line));
        end;

        end else if AnsiStartsText('DT><A', Line) then begin
        if AnsiStartsText('DT><A', Line) then begin
          Entry := TBookmark.Create(Folder, i);
          Bookmarks.Add(Entry);

          P := PosEx('>', Line, 6);
          TContentEntry(Entry).ReadProperties(Copy(Line, 6, P - 6));
          Pe := PosEx('</A>', Line, P);
          TContentEntry(Entry).Name := HTMLDecode(Copy(Line, P + 1, Pe - P - 1));
731
732
733
734
735
736
737
738

739
740
741
742
743
744
745
874
875
876
877
878
879
880

881
882
883
884
885
886
887
888







-
+







function TBookmarkManager.FindDupesByURL(const AllEntries: TEntries; const StartIndex: integer; const DupeSet: TDuplicateSet): cardinal;
var
  URL: string;
  i: integer;
  Bookmark: TBookmark;
begin
  Result := 0;
  URL := DupeSet.Items[0].URL;
  URL := TBookmark(DupeSet.Items[0]).URL;
  for i := StartIndex to AllEntries.Count - 1 do begin
    if (AllEntries[i] is TBookmark) then begin
      Bookmark := TBookmark(AllEntries[i]);
      if SameText(Bookmark.URL, URL) then begin
        DupeSet.Add(Bookmark);
        Inc(Result);
      end;
775
776
777
778
779
780
781













782
783
784
785

786

787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805


806
807
808


809
810
811
812
813
814
815
816
817
818










819
820
821


822


823
824
825
826
827
828
829
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940

941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960


961
962
963


964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986


987
988
989
990
991
992
993
994
995
996
997
998







+
+
+
+
+
+
+
+
+
+
+
+
+



-
+

+

















-
-
+
+

-
-
+
+










+
+
+
+
+
+
+
+
+
+

-
-
+
+

+
+







          Dupeset.Free;
      end;
    end {if TBookmark};
  end {for i};
end {TBookmarkManager.FindDupes};

function TBookmarkManager.RemoveDuplicates(const AllEntries: TEntries): TEntries;
  function IsInToolbar(const Bookmark: TContentEntry): boolean;
  var
    Folder: TFolder;
  begin
    Result := False;
    Folder := Bookmark.Parent;
    while Assigned(Folder) do begin
      Result := Folder.IsToolbarFolder;
      if Result then
        Exit;
      Folder := Folder.Parent;
    end;
  end {IsInToolbar};
var
  NumDupes: integer;
  Dupes: TDuplicateSet;
  di, bi: integer;
  di, bi, ni: integer;
  Bookmark: TBookmark;
  Names: TStringArray;
begin
  // Walk through all bookmarks; for each bookmark, walk through the entire list again, and gather all bookmarks with the same URL;
  //  keeping track of the earliest add date, and the latest modify date.
  //  Keep the names and descriptions in a list each, only adding them if they're not already in there.
  //  Also keep track of the highest level.
  NumDupes := FindDupesets(AllEntries);
  WriteLn(NumDupes, ' duplicates found. Marking for deletion...');

  Result := TEntries.Create(False);
  for di := FDupeSets.Count - 1 downto 0 do begin
    Bookmark := nil;
    Dupes := FDupeSets.Data[di];

    // Identify which bookmark should be kept. By default, if there's one on the toolbar, we keep
    //  that one. Otherwise, we keep the one with the deepest level, on the assumption that nested
    //  deeper means better organised.
    for bi := 0 to Dupes.Count - 1 do begin
      if Assigned(Dupes[bi].Parent) and Dupes[bi].Parent.IsToolbarFolder then begin
        Bookmark := Dupes[bi];
      if IsInToolbar(Dupes[bi]) then begin
        Bookmark := TBookmark(Dupes[bi]);
        Break;
      end else if not Assigned(Bookmark) and (Dupes[bi].Level = Dupes.MaxLevel) then
        Bookmark := Dupes[bi];
      end else if (not Assigned(Bookmark)) and (Dupes[bi].Level = Dupes.MaxLevel) then
        Bookmark := TBookmark(Dupes[bi]);
    end {for bi};
    Assert(Assigned(Bookmark));

    //   apply the earliest add date and the latest modify date,
    //   apply the combined names and descriptions,
    Bookmark.Added := Dupes.Added;
    Bookmark.LastModified := Dupes.LastModified;
    if Length(Dupes.Descriptions) > 0 then
      Bookmark.Description := Join(Dupes.Descriptions);
    if Length(Dupes.Names) > 1 then begin
      SetLength(Names, Length(Dupes.Names));
      bi := 0; // offset due to deleted name(s)
      for ni := Low(Names) to High(Names) do begin
        if SameText(Bookmark.Name, Dupes.Names[ni]) then
          Inc(bi)
        else
          Names[ni - bi] := Dupes.Names[ni];
      end;
      if bi > 0 then
        SetLength(Names, Length(Names) - bi);
      Bookmark.Description := IfThen(Bookmark.Description <> '', Bookmark.Description + sLineBreak, '')
                            + 'Alternative names:' + sLineBreak
                            + Join(Dupes.Names);
                            + 'Alternative name' + IfThen(Length(Names) = 1, ': ', 's:' + sLineBreak)
                            + Join(Names);
    end;
    Bookmark.Attributes.Assign(Dupes.Attributes);
    Bookmark.Tags.AddStrings(Dupes.Tags);

    // Instead of deleting them right now, move them to the result set of entries. The calling party can then free them.
    for bi := Dupes.Count - 1 downto 0 do begin
      if Dupes[bi] <> Bookmark then begin
        Result.Add(Dupes[bi]);
      end;
    end;
857
858
859
860
861
862
863

864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881













882
883

884
885
886
887
888
889
890
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065

1066
1067
1068
1069
1070
1071
1072
1073







+


















+
+
+
+
+
+
+
+
+
+
+
+
+

-
+








function TBookmarkManager.MergeFolders(const RootEntries: TEntries): TEntries;
var
  fi, i, Index: integer;
  Folder, Preceding: TFolder;
  Entry: TEntry;
  Merged: TEntries;
  FolderIsMoreRecent: boolean;
begin
  Result := TEntries.Create(False);
  for fi := RootEntries.Count - 1 downto 0 do begin
    Entry := RootEntries[fi];
    //WriteLn(StringOfChar('.', Entry.Level), Entry.ToString);
    if Entry is TFolder then begin
      Folder := TFolder(Entry);

      // Look for a preceding folder with the same name
      Preceding := nil;
      for i := 0 to fi - 1 do begin
        if (RootEntries[i] is TFolder) and SameText(Folder.Name, TFolder(RootEntries[i]).Name) then begin
          Preceding := TFolder(RootEntries[i]);
          Break;
        end;
      end;

      if Assigned(Preceding) then begin
        // merge the attributes of both folders
        if Folder.IsToolbarFolder then
          Preceding.IsToolbarFolder := True;
        if (Folder.Added > 0) and ((Preceding.Added = 0) or (Folder.Added < Preceding.Added)) then
          Preceding.Added := Folder.Added;
        FolderIsMoreRecent := (Folder.LastModified > 0) and (Folder.LastModified > Preceding.LastModified);
        if FolderIsMoreRecent then
          Preceding.LastModified := Folder.LastModified;
        for i := 0 to Folder.Attributes.Count - 1 do begin
          if FolderIsMoreRecent or (Preceding.Attributes.IndexOfName(Folder.Attributes.Names[i]) = -1) then
            Preceding.Attributes.Values[Folder.Attributes.Names[i]] := Folder.Attributes.ValueFromIndex[i];
        end;

        //WriteLn(#9, 'Moving ', Folder.Entries.Count ,' entries to preceding ', Preceding.ToString);
        // move all its RootEntries to the Preceding folder
        // move all its entries to the Preceding folder
        Index := Preceding.Entries.Count;
        for i := Folder.Entries.Count - 1 downto 0 do begin
          Entry := Folder.Entries[i];
          Preceding.Entries.Insert(Index, Entry);
          Entry.Parent := Preceding;
        end;
      end else begin
998
999
1000
1001
1002
1003
1004
1005

1006
1007
1008
1009
1010
1011
1012

1013
1014
1015
1016
1017
1018
1019

1020
1021
1022
1023
1024
1025
1026

1027
1028

1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1181
1182
1183
1184
1185
1186
1187

1188
1189
1190
1191
1192
1193
1194

1195
1196
1197
1198
1199
1200
1201

1202
1203
1204
1205
1206
1207
1208

1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222







-
+






-
+






-
+






-
+


+










begin
  try
    Mgr := TBookmarkManager.Create;
    try

      Mgr.LoadFromFile(ParamStr(1));
      WriteLn(Mgr.Bookmarks.Count, ' entries loaded.');
      Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-1' + ExtractFileExt(ParamStr(1))));
      //Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-1' + ExtractFileExt(ParamStr(1))));
      WriteLn;

      // REMOVE DUPLICATES
      WriteLn('Looking for duplicate bookmarks...');
      Mgr.RemoveDuplicates;
      WriteLn(Mgr.Bookmarks.Count, ' entries left.');
      Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-2' + ExtractFileExt(ParamStr(1))));
      //Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-2' + ExtractFileExt(ParamStr(1))));
      WriteLn;

      // MERGE DUPLICATE FOLDERS AND PRUNE EMPTY ONES
      WriteLn('Merging duplicate folders and pruning empty ones...');
      Mgr.MergeFolders;
      WriteLn(Mgr.Bookmarks.Count, ' entries left.');
      Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-3' + ExtractFileExt(ParamStr(1))));
      //Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-3' + ExtractFileExt(ParamStr(1))));
      WriteLn;

      // MERGE CONSECUTIVE SEPARATORS
      WriteLn('Sanitizing dividers...');
      Mgr.SanitizeDividers;
      WriteLn(Mgr.Bookmarks.Count, ' entries left.');
      Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-4' + ExtractFileExt(ParamStr(1))));
      //Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-out-4' + ExtractFileExt(ParamStr(1))));
      WriteLn;

      Mgr.SaveToFile(ChangeFileExt(ParamStr(1), '-new' + ExtractFileExt(ParamStr(1))));
    finally
      Mgr.Free;
    end;
  except
    Beep;
    ShowException(ExceptObject, ExceptAddr);
    //Readln;
  end;
end.