318 self unEscape:'/Home/a%C3%A4%C3%B6%C3%BCa' |
320 self unEscape:'/Home/a%C3%A4%C3%B6%C3%BCa' |
319 " |
321 " |
320 |
322 |
321 "Modified: / 09-01-2011 / 10:44:50 / cg" |
323 "Modified: / 09-01-2011 / 10:44:50 / cg" |
322 "Modified (comment): / 06-05-2015 / 15:40:04 / sr" |
324 "Modified (comment): / 06-05-2015 / 15:40:04 / sr" |
|
325 "Modified (comment): / 03-02-2017 / 17:06:32 / stefan" |
323 ! |
326 ! |
324 |
327 |
325 unescapeCharacterEntities:aString |
328 unescapeCharacterEntities:aString |
326 "helper to unescape character entities in a string. |
329 "helper to unescape character entities in a string. |
327 Normally, this is done by the HTMLParser when it scans text, |
330 Normally, this is done by the HTMLParser when it scans text, |
419 "Created: / 06-05-2015 / 16:56:14 / sr" |
422 "Created: / 06-05-2015 / 16:56:14 / sr" |
420 "Modified: / 18-05-2015 / 12:13:35 / sr" |
423 "Modified: / 18-05-2015 / 12:13:35 / sr" |
421 ! |
424 ! |
422 |
425 |
423 urlEncode2:aStringOrStream on:ws |
426 urlEncode2:aStringOrStream on:ws |
|
427 <resource: #obsolete> |
424 "helper to escape invalid/dangerous characters in an urls arguments. |
428 "helper to escape invalid/dangerous characters in an urls arguments. |
425 Similar to urlEncode, but treats '*','~' and spaces differently. |
429 Similar to urlEncode, but treats '*','~' and spaces differently. |
426 (some clients, such as bitTorrent seem to require this - time will tell...) |
430 (some clients, such as bitTorrent seem to require this - time will tell...) |
427 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_', is encoded using |
431 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_', is encoded using |
428 the '%nn' format, where nn is the hexadecimal value of the byte. |
432 the '%nn' format, where nn is the hexadecimal value of the byte. |
454 "Modified: / 06-05-2015 / 15:43:39 / sr" |
458 "Modified: / 06-05-2015 / 15:43:39 / sr" |
455 ! |
459 ! |
456 |
460 |
457 urlEncode:aStringOrStream on:ws |
461 urlEncode:aStringOrStream on:ws |
458 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
462 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
459 Similar to urlEncode2, but treats '*','~' and spaces differently. |
463 |
460 (some clients, such as bitTorrent seem to require urlEncode2 - time will tell...) |
464 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using |
461 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '*', is encoded using |
|
462 the '%nn' format, where nn is the hexadecimal value of the byte. |
465 the '%nn' format, where nn is the hexadecimal value of the byte. |
|
466 Characters outside the ASCII range are encoded into utf8 first. |
463 Spaces are encoded as '+'. |
467 Spaces are encoded as '+'. |
464 see: application/x-www-form-urlencoded |
468 see: application/x-www-form-urlencoded |
465 see: RFC1738" |
469 see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)" |
466 |
470 |
467 |rs c cp space| |
471 |rs c| |
468 |
472 |
469 space := Character space. |
|
470 rs := aStringOrStream readStream. |
473 rs := aStringOrStream readStream. |
471 |
474 |
472 [rs atEnd] whileFalse: [ |
475 [(c := rs nextOrNil) notNil] whileTrue: [ |
473 c := rs next. |
476 |cp| |
474 |
477 |
475 (c isLetterOrDigit or:[ '-_.*' includes:c ]) ifTrue:[ |
478 (c isLetterOrDigit or:['-_.~' includes:c]) ifTrue:[ |
476 ws nextPut:c. |
479 ws nextPut:c. |
477 ] ifFalse:[ |
480 ] ifFalse:[ |
478 c == space ifTrue:[ |
481 c == Character space ifTrue:[ |
479 ws nextPut:$+. |
482 ws nextPut:$+. |
480 ] ifFalse:[ |
483 ] ifFalse:[ |
481 ws nextPut: $%. |
484 cp := c codePoint. |
482 (cp := c codePoint) > 16rFF ifTrue:[ |
485 cp > 16r7F ifTrue:[ |
483 ws nextPut: $u. |
486 c utf8Encoded do:[:eachUtf8Char| |
484 cp printOn:ws base:16 size:4 fill:$0. |
487 ws nextPut: $%. |
|
488 eachUtf8Char codePoint printOn:ws base:16 size:2 fill:$0. |
|
489 ]. |
485 ] ifFalse:[ |
490 ] ifFalse:[ |
|
491 ws nextPut: $%. |
486 cp printOn:ws base:16 size:2 fill:$0. |
492 cp printOn:ws base:16 size:2 fill:$0. |
487 ]. |
493 ]. |
488 ]. |
494 ]. |
489 ]. |
495 ]. |
490 ]. |
496 ]. |
491 |
497 |
|
498 " |
|
499 self urlEncoded:'hokus pokus fidibus*-/~' |
|
500 self urlEncoded:'Ützel Brötzel*-/~' |
|
501 self urlEncoded:'χαιρε' |
|
502 " |
|
503 |
492 "Modified: / 09-01-2011 / 10:43:30 / cg" |
504 "Modified: / 09-01-2011 / 10:43:30 / cg" |
493 "Modified: / 06-05-2015 / 16:06:52 / sr" |
505 "Modified: / 06-05-2015 / 16:06:52 / sr" |
|
506 "Modified (comment): / 07-02-2017 / 14:51:42 / stefan" |
494 ! |
507 ! |
495 |
508 |
496 urlEncoded2: aString |
509 urlEncoded2: aString |
|
510 <resource: #obsolete> |
497 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
511 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
498 Similar to urlEncoded, but treats '*','~' and spaces differently. |
512 Similar to urlEncoded, but treats '*','~' and spaces differently. |
499 (some clients, such as bitTorrent seem to require this - time will tell...) |
513 (some clients, such as bitTorrent seem to require this - time will tell...) |
500 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using |
514 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using |
501 the '%nn' format, where nn is the hexadecimal value of the byte. |
515 the '%nn' format, where nn is the hexadecimal value of the byte. |
521 "Created: / 09-01-2011 / 10:34:50 / cg" |
535 "Created: / 09-01-2011 / 10:34:50 / cg" |
522 ! |
536 ! |
523 |
537 |
524 urlEncoded: aString |
538 urlEncoded: aString |
525 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
539 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
526 Similar to urlEncoded2, but treats '*','~' and spaces differently. |
540 |
527 (some clients, such as bitTorrent seem to require urlEncoded2 - time will tell...) |
541 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using |
528 Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '*', is encoded using |
|
529 the '%nn' format, where nn is the hexadecimal value of the byte. |
542 the '%nn' format, where nn is the hexadecimal value of the byte. |
|
543 Characters outside the ASCII range are encoded into utf8 first. |
530 Spaces are encoded as '+'. |
544 Spaces are encoded as '+'. |
531 see: application/x-www-form-urlencoded |
545 see: application/x-www-form-urlencoded |
532 see: RFC1738" |
546 see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)" |
533 |
547 |
534 |ws| |
548 |ws| |
535 |
549 |
536 ws := String writeStreamWithInitialSize:aString size. |
550 ws := WriteStream on:(String new:aString size + 20). |
537 self urlEncode:aString on:ws. |
551 self urlEncode:aString on:ws. |
538 ^ ws contents |
552 ^ ws contents |
539 |
553 |
540 |
554 |
541 " |
555 " |